├── mypy.ini ├── CHANGELOG.md ├── pytest.ini ├── boda ├── lib │ ├── __init__.py │ ├── torchinfo │ │ ├── __init__.py │ │ ├── formatting.py │ │ ├── layer_info.py │ │ └── model_statistics.py │ └── torchsummary │ │ ├── __init__.py │ │ ├── formatting.py │ │ ├── layer_info.py │ │ ├── model_statistics.py │ │ └── torchsummary.py ├── ops │ ├── __init__.py │ └── anchor_generators.py ├── models │ ├── centermask │ │ └── __init__.py │ ├── feature_extractor │ │ ├── __init__.py │ │ ├── vggnet.py │ │ ├── pafpn.py │ │ ├── fpn.py │ │ ├── resnet.py │ │ ├── mobilenetv2.py │ │ └── efficientnet.py │ ├── __init__.py │ ├── yolox │ │ ├── __init__.py │ │ ├── configuration_yolox.py │ │ ├── loss_yolox.py │ │ └── utils.py │ ├── ssd │ │ ├── __init__.py │ │ ├── configuration_ssd.py │ │ ├── README.md │ │ ├── inference_ssd.py │ │ └── loss_ssd.py │ ├── solov2 │ │ ├── __init__.py │ │ ├── configuration_solov1.py │ │ ├── architecture_decoupled_solov1.py │ │ ├── inference_solov1.py │ │ └── README.md │ └── yolact │ │ ├── __init__.py │ │ ├── configuration_yolact.py │ │ ├── README.md │ │ └── inference_yolact.py ├── __init__.py ├── setup.py ├── README.md ├── file_utils.py ├── custom_activation.py ├── custom_modules.py ├── postprocessing.py └── base_configuration.py ├── benchmarks └── benchmark_yolact.py ├── .flake8 ├── boda.png ├── docs ├── requirements.txt ├── source │ ├── index.rst │ └── conf.py ├── Makefile └── make.bat ├── environment.yml ├── run_test_ssd.py ├── CONTRIBUTING.md ├── .gitignore ├── calc_flops.py ├── setup.py ├── README.md └── run_test.py /mypy.ini: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boda/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boda/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/benchmark_yolact.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boda/models/centermask/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 3 | -------------------------------------------------------------------------------- /boda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/unerue/boda/HEAD/boda.png -------------------------------------------------------------------------------- /boda/models/feature_extractor/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FeaturePyramidNetworks 2 | from .vggnet import * 3 | from .resnet import * 4 | -------------------------------------------------------------------------------- /boda/models/__init__.py: -------------------------------------------------------------------------------- 1 | # from .feature_extractor import * 2 | from .ssd import * 3 | from .yolact import * 4 | 5 | # from .yolox import * 6 | -------------------------------------------------------------------------------- /boda/lib/torchinfo/__init__.py: -------------------------------------------------------------------------------- 1 | """ torchinfo """ 2 | from .model_statistics import ModelStatistics 3 | from .torchinfo import summary 4 | 5 | __all__ = ("ModelStatistics", "summary") 6 | -------------------------------------------------------------------------------- /boda/lib/torchsummary/__init__.py: -------------------------------------------------------------------------------- 1 | """ torchsummary """ 2 | from .model_statistics import ModelStatistics 3 | from .torchsummary import summary 4 | 5 | __all__ = ("ModelStatistics", "summary") 6 | -------------------------------------------------------------------------------- /boda/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import * 2 | 3 | 4 | # __all__ = [ 5 | # 'SsdConfig', 'SsdModel', 'SsdLoss', 6 | # 'YolactConfig', 'YolactModel', 'YolactLoss', 7 | # 'Solov1Config', 'Solov1Model', 8 | # ] 9 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy 3 | sphinx-copybutton>=0.3.1 4 | sphinx-gallery>=0.9.0 5 | sphinx==3.5.4 6 | tabulate 7 | Jinja2<3.1.* 8 | -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme -------------------------------------------------------------------------------- /boda/models/yolox/__init__.py: -------------------------------------------------------------------------------- 1 | # from .configuration_yolox import YoloXConfig 2 | # from .architecture_yolox import YoloXModel 3 | # # from .loss_yolox import Yo 4 | 5 | 6 | # __all__ = [ 7 | # 'YoloXConfig', 'YoloXModel', 8 | # ] 9 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: boda 2 | channels: 3 | - conda-forge 4 | - pytorch 5 | - anaconda 6 | dependencies: 7 | - python=3.7 8 | - pytorch=1.7 9 | - torchvision=0.8 10 | - cudatoolkit=10.2 11 | - numpy 12 | - cython 13 | # - pip: 14 | # - pycocotools 15 | # - opencv-python -------------------------------------------------------------------------------- /boda/models/ssd/__init__.py: -------------------------------------------------------------------------------- 1 | from .architecture_ssd import SsdPredictNeck, SsdPredictHead, SsdModel 2 | from .configuration_ssd import SsdConfig 3 | 4 | # from .loss_ssd import SsdLoss 5 | 6 | 7 | __all__ = [ 8 | "SsdConfig", 9 | "SsdPredictNeck", 10 | "SsdPredictHead", 11 | "SsdModel", 12 | ] 13 | -------------------------------------------------------------------------------- /run_test_ssd.py: -------------------------------------------------------------------------------- 1 | from boda.models import SsdModel, SsdConfig 2 | 3 | 4 | from boda.lib.torchsummary import summary 5 | import torch 6 | 7 | config = SsdConfig(num_classes=80) 8 | model = SsdModel(config).to('cuda') 9 | model.eval() 10 | print(model) 11 | # print(summary(model, input_size=(16, 3, 550, 550), verbose=0)) 12 | print(summary(model, input_data=(3, 550, 550), verbose=0)) -------------------------------------------------------------------------------- /boda/models/solov2/__init__.py: -------------------------------------------------------------------------------- 1 | # from .configuration_solov1 import Solov1Config 2 | # from .architecture_solov1 import Solov1PredictNeck, Solov1PredictHead, Solov1Model 3 | # # from .architecture_decoupled_solov1 import DecoupledSolov1Model 4 | # from .loss_solov1 import Solov1Loss 5 | 6 | 7 | # __all__ = [ 8 | # 'Solov1Loss', 'Solov1Config', 'Solov1PredictNeck', 9 | # 'Solov1PredictHead', 'Solov1Model', 'Solov1Loss' 10 | # ] 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to boda 2 | --- 3 | 4 | ## Code formatting and typing 5 | 6 | ### Formatting 7 | 8 | To format your code, install `ufmt` 9 | 10 | ```bash 11 | pip install ufmt==1.3.2 black==21.9b0 usort==0.6.4 12 | ``` 13 | 14 | ```bash 15 | ufmt format boda 16 | ``` 17 | 18 | ### Type annotations 19 | 20 | ```bash 21 | mypy --config-file mypy.ini 22 | ``` 23 | 24 | ## Unit tests 25 | 26 | ```bash 27 | pytest test -vvv 28 | ``` 29 | 30 | ## Documentation 31 | ```bash 32 | cd docs 33 | make html-noplot 34 | ``` -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. boda documentation master file, created by 2 | sphinx-quickstart on Mon Jul 11 19:55:45 2022. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to boda's documentation! 7 | ================================ 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | 14 | 15 | Indices and tables 16 | ================== 17 | 18 | * :ref:`genindex` 19 | * :ref:`modindex` 20 | * :ref:`search` 21 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /boda/setup.py: -------------------------------------------------------------------------------- 1 | # from Cython.Build import cythonize 2 | # from numpy.distutils.misc_util import Configuration 3 | 4 | 5 | # def cythonize_extensions(top_path, config): 6 | # config.ext_modules = cythonize( 7 | # config.ext_modules, 8 | # compiler_directives={'language_level': '3'}) 9 | 10 | 11 | # def configuration(parent_package='', top_path=None): 12 | # config = Configuration('boda', parent_package, top_path) 13 | # config.add_subpackage('models') 14 | # config.add_subpackage('utils') 15 | # config.add_subpackage('lib') 16 | # cythonize_extensions(top_path, config) 17 | 18 | # return config 19 | 20 | 21 | # if __name__ == '__main__': 22 | # from numpy.distutils.core import setup 23 | 24 | # setup(**configuration(top_path='').todict()) 25 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | .vscode 3 | .DS_Store 4 | __pycache__ 5 | boda.egg-info 6 | build 7 | dist 8 | tests 9 | misc 10 | doc 11 | boda/models/cascade_mask_rcnn/ 12 | boda/models/efficientdet/ 13 | boda/models/faster_rcnn/ 14 | boda/models/fcos/ 15 | boda/models/keypoint_rcnn/ 16 | boda/models/mask_rcnn/ 17 | boda/models/polarmask/ 18 | # boda/models/solov1/ 19 | boda/models/yolact_edge/ 20 | boda/models/yolov4/ 21 | 22 | # boda/ops/ 23 | 24 | benchmarks/data/ 25 | benchmarks/*pth 26 | benchmarks/samples/ 27 | benchmarks/dataset/ 28 | benchmarks/benchmark_yolov1.py 29 | benchmarks/benchmark_backbone.py 30 | 31 | boda/dev/ 32 | dev/ 33 | old/ 34 | cache/ 35 | 36 | run.py 37 | test_ssd.py 38 | test_yolov1.py 39 | test_yolact.py 40 | test_solov1.py 41 | test_fcos.py 42 | test_backbone.py 43 | test_centermask.py 44 | eval_yolact.py 45 | test_faster_rcnn.py 46 | test_mask_rcnn.py 47 | test_keypoint_rcnn.py 48 | 49 | logo.pptx 50 | *.pth 51 | *.zip 52 | *.jpg -------------------------------------------------------------------------------- /boda/models/yolox/configuration_yolox.py: -------------------------------------------------------------------------------- 1 | # from typing import List 2 | 3 | # from ...base_configuration import BaseConfig 4 | 5 | 6 | # class YoloXConfig(BaseConfig): 7 | # model_name = 'yolox' 8 | 9 | # def __init__( 10 | # self, 11 | # num_classes: int = 80, 12 | # image_size: int = 640, 13 | # depth: float = 1.0, 14 | # width: float = 1.0, 15 | # act: str = 'silu', 16 | # selected_backbone_layers: List[int] = [2, 3, 4], 17 | # depthwise: bool = False, 18 | # test_conf: float = 0.01, 19 | # nmsthre: float = 0.65, 20 | # ): 21 | # super().__init__( 22 | # num_classes=num_classes, 23 | # max_size=image_size, 24 | # ) 25 | # self.depth = depth 26 | # self.width = width 27 | # self.act = act 28 | 29 | # self.selected_backbone_layers = selected_backbone_layers 30 | 31 | # self.depthwise = depthwise 32 | 33 | # self.test_conf = test_conf 34 | # self.nmsthre = nmsthre 35 | -------------------------------------------------------------------------------- /boda/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | 3 | ## Library Structure 4 | ```{bash} 5 | . 6 | +-- models 7 | | +-- model 8 | | | +-- configuration_model.py 9 | | | +-- architecture_model.py 10 | | | +-- loss_model.py 11 | | | +-- inference_model.py 12 | | | +-- README.md 13 | | +-- backbone.py 14 | | +-- neck.py 15 | +-- utils 16 | | +-- box.py 17 | | +-- mask.py 18 | | +-- nms.py 19 | +-- lib 20 | | +-- torchsummary 21 | +-- base_architecture.py 22 | +-- base_configuration.py 23 | +-- modules.py 24 | +-- activation.py 25 | +-- setup.py 26 | ``` 27 | 28 | ## Abstract Structure 29 | 30 | ```{python} 31 | class Backbone(nn.Module): 32 | def __init__(self): 33 | super().__init__() 34 | 35 | def forward(self): 36 | return 37 | 38 | 39 | class Neck(nn.Module): 40 | def __init__(self): 41 | super().__init__() 42 | 43 | def _make_layer(self): 44 | 45 | def forward(self): 46 | return 47 | 48 | 49 | class Head(nn.Module): 50 | 51 | 52 | class Pretrained: 53 | 54 | 55 | class Model() 56 | ``` -------------------------------------------------------------------------------- /calc_flops.py: -------------------------------------------------------------------------------- 1 | from boda.models import YolactConfig, YolactModel 2 | # from boda.lib.torchsummary import summary 3 | from torchinfo import summary 4 | from boda.models.backbone_mobilenetv3 import mobilenet_v3_large, mobilenet_v3_small 5 | from boda.models.backbone_resnet import resnet101, resnet18, resnet34, resnet50 6 | # from torchvision.models import resnet50, mobilenet_v3_large, resnet101 7 | 8 | 9 | config = YolactConfig(num_classes=90) 10 | # model = YolactModel(config, backbone=mobilenet_v3_small(), selected_backbone_layers=[3, 8, 11]).to('cuda:0') 11 | # model = YolactModel(config, backbone=mobilenet_v3_large(), selected_backbone_layers=[6, 12, 15]).to('cuda:0') 12 | # model = YolactModel(config, backbone=resnet50(), selected_backbone_layers=[1, 2, 3]).to('cuda:0') 13 | # print(summary(model, (1, 3, 550, 550), verbose=0)) 14 | 15 | # from boda.resnet import resnet101 16 | model = mobilenet_v3_small().to('cuda') 17 | # model = mobilenet_v3_large().to('cuda') 18 | # print(summary(model, input_data=(3, 550, 550), depth=2, verbose=0)) 19 | 20 | # model = resnet101().to('cuda') 21 | # model = resnet50().to('cuda') 22 | print(summary(model, (1, 3, 224, 224), depth=3, verbose=0)) 23 | -------------------------------------------------------------------------------- /boda/models/ssd/configuration_ssd.py: -------------------------------------------------------------------------------- 1 | from ...base_configuration import BaseConfig 2 | 3 | 4 | SSD_PRETRAINED_CONFIG = { 5 | "ssd300": None, 6 | "ssd512": None, 7 | } 8 | 9 | 10 | class SsdConfig(BaseConfig): 11 | """Configuration for SSD 12 | 13 | Arguments: 14 | max_size (): 15 | 16 | """ 17 | 18 | def __init__( 19 | self, 20 | num_classes: int = 20, 21 | max_size: int = 300, 22 | preserve_aspect_ratio: bool = False, 23 | selected_layers: int = -1, 24 | num_grids: int = 7, 25 | **kwargs 26 | ) -> None: 27 | super().__init__(max_size=max_size, **kwargs) 28 | self.selected_layers = [3, 4] 29 | self.boxes = [4, 6, 6, 6, 4, 4] 30 | self.num_classes = num_classes 31 | self.backbone_name = "vgg16" 32 | self.aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] 33 | self.variance = [0.1, 0.2] 34 | self.min_sizes = [30, 60, 111, 162, 213, 264] 35 | self.max_sizes = [60, 111, 162, 213, 264, 315] 36 | self.steps = [8, 16, 32, 64, 100, 300] 37 | self.clip = True 38 | # self.grid_sizes = [38, 19, 10, 5, 3, 1] 39 | -------------------------------------------------------------------------------- /boda/models/yolox/loss_yolox.py: -------------------------------------------------------------------------------- 1 | # import torch 2 | # from torch import nn 3 | 4 | 5 | # class IOUloss(nn.Module): 6 | # def __init__(self, reduction="none", loss_type="iou"): 7 | # super(IOUloss, self).__init__() 8 | # self.reduction = reduction 9 | # self.loss_type = loss_type 10 | 11 | # def forward(self, pred, target): 12 | # assert pred.shape[0] == target.shape[0] 13 | 14 | # pred = pred.view(-1, 4) 15 | # target = target.view(-1, 4) 16 | # tl = torch.max( 17 | # (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) 18 | # ) 19 | # br = torch.min( 20 | # (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) 21 | # ) 22 | 23 | # area_p = torch.prod(pred[:, 2:], 1) 24 | # area_g = torch.prod(target[:, 2:], 1) 25 | 26 | # en = (tl < br).type(tl.type()).prod(dim=1) 27 | # area_i = torch.prod(br - tl, 1) * en 28 | # area_u = area_p + area_g - area_i 29 | # iou = (area_i) / (area_u + 1e-16) 30 | 31 | # if self.loss_type == "iou": 32 | # loss = 1 - iou ** 2 33 | # elif self.loss_type == "giou": 34 | # c_tl = torch.min( 35 | # (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) 36 | # ) 37 | # c_br = torch.max( 38 | # (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) 39 | # ) 40 | # area_c = torch.prod(c_br - c_tl, 1) 41 | # giou = iou - (area_c - area_u) / area_c.clamp(1e-16) 42 | # loss = 1 - giou.clamp(min=-1.0, max=1.0) 43 | 44 | # if self.reduction == "mean": 45 | # loss = loss.mean() 46 | # elif self.reduction == "sum": 47 | # loss = loss.sum() 48 | 49 | # return loss 50 | -------------------------------------------------------------------------------- /boda/lib/torchinfo/formatting.py: -------------------------------------------------------------------------------- 1 | """ formatting.py """ 2 | import math 3 | from enum import Enum, unique 4 | from typing import Dict, Iterable, List 5 | 6 | from .layer_info import LayerInfo 7 | 8 | 9 | @unique 10 | class Verbosity(Enum): 11 | """Contains verbosity levels.""" 12 | 13 | QUIET, DEFAULT, VERBOSE = 0, 1, 2 14 | 15 | 16 | class FormattingOptions: 17 | """Class that holds information about formatting the table output.""" 18 | 19 | def __init__( 20 | self, 21 | max_depth: int, 22 | verbose: int, 23 | col_names: Iterable[str], 24 | col_width: int, 25 | ): 26 | self.max_depth = max_depth 27 | self.verbose = verbose 28 | self.col_names = col_names 29 | self.col_width = col_width 30 | self.layer_name_width = 40 31 | 32 | def set_layer_name_width( 33 | self, summary_list: List[LayerInfo], align_val: int = 5 34 | ) -> None: 35 | """ 36 | Set layer name width by taking the longest line length and rounding up to 37 | the nearest multiple of align_val. 38 | """ 39 | max_length = 0 40 | for info in summary_list: 41 | depth_indent = info.depth * align_val + 1 42 | max_length = max(max_length, len(str(info)) + depth_indent) 43 | if max_length >= self.layer_name_width: 44 | self.layer_name_width = math.ceil(max_length / align_val) * align_val 45 | 46 | def get_total_width(self) -> int: 47 | """Calculate the total width of all lines in the table.""" 48 | return len(tuple(self.col_names)) * self.col_width + self.layer_name_width 49 | 50 | def format_row(self, layer_name: str, row_values: Dict[str, str]) -> str: 51 | """Get the string representation of a single layer of the model.""" 52 | info_to_use = [row_values.get(row_type, "") for row_type in self.col_names] 53 | new_line = f"{layer_name:<{self.layer_name_width}} " 54 | for info in info_to_use: 55 | new_line += f"{info:<{self.col_width}} " 56 | return new_line.rstrip() + "\n" 57 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'boda' 21 | copyright = '2022, Kyung-Su Kang' 22 | author = 'Kyung-Su Kang' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = '0.01a' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # List of patterns, relative to source directory, that match files and 40 | # directories to ignore when looking for source files. 41 | # This pattern also affects html_static_path and html_extra_path. 42 | exclude_patterns = [] 43 | 44 | 45 | # -- Options for HTML output ------------------------------------------------- 46 | 47 | # The theme to use for HTML and HTML Help pages. See the documentation for 48 | # a list of builtin themes. 49 | # 50 | html_theme = 'alabaster' 51 | 52 | # Add any paths that contain custom static files (such as style sheets) here, 53 | # relative to this directory. They are copied after the builtin static files, 54 | # so a file named "default.css" will overwrite the builtin "default.css". 55 | html_static_path = ['_static'] -------------------------------------------------------------------------------- /boda/lib/torchsummary/formatting.py: -------------------------------------------------------------------------------- 1 | """ formatting.py """ 2 | import math 3 | from enum import Enum, unique 4 | from typing import Dict, Iterable, List 5 | 6 | from .layer_info import LayerInfo 7 | 8 | 9 | @unique 10 | class Verbosity(Enum): 11 | """Contains verbosity levels.""" 12 | 13 | QUIET, DEFAULT, VERBOSE = 0, 1, 2 14 | 15 | 16 | class FormattingOptions: 17 | """Class that holds information about formatting the table output.""" 18 | 19 | def __init__( 20 | self, 21 | use_branching: bool, 22 | max_depth: int, 23 | verbose: int, 24 | col_names: Iterable[str], 25 | col_width: int, 26 | ): 27 | self.use_branching = use_branching 28 | self.max_depth = max_depth 29 | self.verbose = verbose 30 | self.col_names = col_names 31 | self.col_width = col_width 32 | self.layer_name_width = 40 33 | 34 | def set_layer_name_width( 35 | self, summary_list: List[LayerInfo], align_val: int = 5 36 | ) -> None: 37 | """ 38 | Set layer name width by taking the longest line length and rounding up to 39 | the nearest multiple of align_val. 40 | """ 41 | max_length = 0 42 | for info in summary_list: 43 | depth_indent = info.depth * align_val + 1 44 | max_length = max(max_length, len(str(info)) + depth_indent) 45 | if max_length >= self.layer_name_width: 46 | self.layer_name_width = math.ceil(max_length / align_val) * align_val 47 | 48 | def get_total_width(self) -> int: 49 | """Calculate the total width of all lines in the table.""" 50 | return len(tuple(self.col_names)) * self.col_width + self.layer_name_width 51 | 52 | def format_row(self, layer_name: str, row_values: Dict[str, str]) -> str: 53 | """Get the string representation of a single layer of the model.""" 54 | info_to_use = [row_values.get(row_type, "") for row_type in self.col_names] 55 | new_line = "{:<{}} ".format(layer_name, self.layer_name_width) 56 | for info in info_to_use: 57 | new_line += "{:<{}} ".format(info, self.col_width) 58 | return new_line.rstrip() + "\n" 59 | -------------------------------------------------------------------------------- /boda/file_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | from urllib.request import urlretrieve 5 | 6 | 7 | class DataEncoder(json.JSONEncoder): 8 | def default(self, obj): 9 | if isinstance(obj, list): 10 | return json.JSONEncoder().encode(obj) 11 | 12 | return json.JSONEncoder.default(self, obj) 13 | 14 | 15 | def progressbar(cur, total=100): 16 | percent = "{:.2%}".format(cur / total) 17 | sys.stdout.write("\r") 18 | # sys.stdout.write("[%-50s] %s" % ('=' * int(math.floor(cur * 50 / total)),percent)) 19 | sys.stdout.write("[%-100s] %s" % ("=" * int(cur), percent)) 20 | sys.stdout.flush() 21 | 22 | 23 | def schedule(blocknum, blocksize, totalsize): 24 | """ 25 | blocknum: currently downloaded block 26 | blocksize: block size for each transfer 27 | totalsize: total size of web page files 28 | """ 29 | if totalsize == 0: 30 | percent = 0 31 | else: 32 | percent = blocknum * blocksize / totalsize 33 | if percent > 1.0: 34 | percent = 1.0 35 | 36 | percent = percent * 100 37 | print("download : %.2f%%" % (percent)) 38 | progressbar(percent) 39 | 40 | 41 | def reporthook(count, block_size, total_size): 42 | """ 43 | https://blog.shichao.io/2012/10/04/progress_speed_indicator_for_urlretrieve_in_python.html 44 | """ 45 | # global start_time 46 | # if count == 0: 47 | # start_time = time.time() 48 | # return 49 | # duration = time.time() - start_time 50 | progress_size = int(count * block_size) 51 | # speed = int(progress_size / (1024 * duration)) 52 | percent = int(count * block_size * 100 / total_size) 53 | # min(int(count*blockSize*100/totalSize),100) 54 | sys.stdout.write( 55 | f"\rDownload file for pretrained model: {percent:>3}% {progress_size / (1024*1204):>4.1f} MB" 56 | ) 57 | 58 | # sys.stdout.write("\rDownload pretrained model: %d%%, %d MB, %d KB/s, %d seconds passed" % 59 | # (percent, progress_size / (1024 * 1024), speed, duration)) 60 | sys.stdout.flush() 61 | 62 | 63 | def get_file_from_url( 64 | file_name: str, 65 | ): 66 | """ 67 | file_name (): model_name/file_name.json or pth 68 | """ 69 | url = "https://unerue.synology.me/boda/models/" 70 | urlretrieve(f"{url}{file_name}", config_file, reporthook) 71 | print() 72 | -------------------------------------------------------------------------------- /boda/custom_activation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | 6 | class Swish(nn.Module): 7 | """Swish https://arxiv.org/pdf/1905.02244.pdf""" 8 | 9 | @staticmethod 10 | def forward(x): 11 | return x * torch.sigmoid(x) 12 | 13 | 14 | class Hardswish(nn.Module): 15 | """export-friendly version of nn.Hardswish() 16 | 17 | Return: 18 | x * F.hardsigmoid(x) for torchscript and CoreML 19 | """ 20 | 21 | @staticmethod 22 | def forward(x): 23 | # for torchscript, CoreML and ONNX 24 | return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 25 | 26 | 27 | class MemoryEfficientSwish(nn.Module): 28 | class F(torch.autograd.Function): 29 | @staticmethod 30 | def forward(ctx, x): 31 | ctx.save_for_backward(x) 32 | return x * torch.sigmoid(x) 33 | 34 | @staticmethod 35 | def backward(ctx, grad_output): 36 | x = ctx.saved_tensors[0] 37 | sx = torch.sigmoid(x) 38 | return grad_output * (sx * (1 + x * (1 - sx))) 39 | 40 | def forward(self, x): 41 | return self.F.apply(x) 42 | 43 | 44 | class Mish(nn.Module): 45 | """# Mish https://github.com/digantamisra98/Mish""" 46 | 47 | @staticmethod 48 | def forward(x): 49 | return x * F.softplus(x).tanh() 50 | 51 | 52 | class MemoryEfficientMish(nn.Module): 53 | class F(torch.autograd.Function): 54 | @staticmethod 55 | def forward(ctx, x): 56 | ctx.save_for_backward(x) 57 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 58 | 59 | @staticmethod 60 | def backward(ctx, grad_output): 61 | x = ctx.saved_tensors[0] 62 | sx = torch.sigmoid(x) 63 | fx = F.softplus(x).tanh() 64 | return grad_output * (fx + x * sx * (1 - fx * fx)) 65 | 66 | def forward(self, x): 67 | return self.F.apply(x) 68 | 69 | 70 | class FReLU(nn.Module): 71 | """FReLU https://arxiv.org/abs/2007.11824""" 72 | 73 | def __init__(self, in_channels, kernel_size=3): 74 | super().__init__() 75 | self.conv = nn.Conv2d( 76 | in_channels, 77 | in_channels, 78 | kernel_size, 79 | stride=1, 80 | padding=1, 81 | groups=in_channels, 82 | bias=False, 83 | ) 84 | self.bn = nn.BatchNorm2d(in_channels) 85 | 86 | def forward(self, x): 87 | return torch.max(x, self.bn(self.conv(x))) 88 | -------------------------------------------------------------------------------- /boda/models/yolact/__init__.py: -------------------------------------------------------------------------------- 1 | from .architecture_yolact import YolactPredictHead, YolactModel 2 | from .configuration_yolact import YolactConfig 3 | from .inference_yolact import PostprocessYolact 4 | 5 | # from .loss_yolact import YolactLoss 6 | 7 | 8 | __all__ = ["YolactConfig", "PostprocessYolact", "YolactPredictHead", "YolactModel"] 9 | 10 | # _import_structure = { 11 | # 'configuration_yolact': ['YolactConfig'], 12 | # 'architecture_yolact': ['YolactPredictNeck', 'YolactPredictHead', 'YolactModel'], 13 | # 'loss_yolact': ['YolactLoss'] 14 | # } 15 | # import importlib 16 | # import os 17 | # import sys 18 | 19 | 20 | # class _BaseLazyModule(ModuleType): 21 | # """ 22 | # Module class that surfaces all objects but only performs associated imports when the objects are requested. 23 | # """ 24 | 25 | # # Very heavily inspired by optuna.integration._IntegrationModule 26 | # # https://github.com/optuna/optuna/blob/master/optuna/integration/__init__.py 27 | # def __init__(self, name, import_structure): 28 | # super().__init__(name) 29 | # self._modules = set(import_structure.keys()) 30 | # self._class_to_module = {} 31 | # for key, values in import_structure.items(): 32 | # for value in values: 33 | # self._class_to_module[value] = key 34 | # # Needed for autocompletion in an IDE 35 | # self.__all__ = list(import_structure.keys()) + sum(import_structure.values(), []) 36 | 37 | # # Needed for autocompletion in an IDE 38 | # def __dir__(self): 39 | # return super().__dir__() + self.__all__ 40 | 41 | # def __getattr__(self, name: str) -> Any: 42 | # if name in self._modules: 43 | # value = self._get_module(name) 44 | # elif name in self._class_to_module.keys(): 45 | # module = self._get_module(self._class_to_module[name]) 46 | # value = getattr(module, name) 47 | # else: 48 | # raise AttributeError(f"module {self.__name__} has no attribute {name}") 49 | 50 | # setattr(self, name, value) 51 | # return value 52 | 53 | # def _get_module(self, module_name: str) -> ModuleType: 54 | # raise NotImplementedError 55 | 56 | 57 | # class _LazyModule(_BaseLazyModule): 58 | # """ 59 | # Module class that surfaces all objects but only performs associated imports when the objects are requested. 60 | # """ 61 | 62 | # __file__ = globals()["__file__"] 63 | # __path__ = [os.path.dirname(__file__)] 64 | 65 | # def _get_module(self, module_name: str): 66 | # return importlib.import_module("." + module_name, self.__name__) 67 | -------------------------------------------------------------------------------- /boda/models/solov2/configuration_solov1.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Tuple, Sequence, Union, Any 3 | 4 | from ...base_configuration import BaseConfig 5 | 6 | 7 | solov1_pretrained_models = {"solov1-base": ""} 8 | 9 | 10 | class Solov1Config(BaseConfig): 11 | """Configuration for SOLOv1 12 | 13 | Arguments: 14 | max_size (): 15 | padding (): 16 | proto_net_structure (List): 17 | """ 18 | 19 | config_name = "solov1" 20 | 21 | def __init__( 22 | self, 23 | num_classes: int = 80, 24 | min_size: int = 800, 25 | max_size: int = 1333, 26 | preserve_aspect_ratio: bool = True, 27 | selected_layers: Sequence[int] = [0, 1, 2, 3], 28 | fpn_channels: int = 256, 29 | num_extra_fpn_layers: int = 1, 30 | scales: Sequence[int] = [[8, 32], [16, 64], [32, 128], [64, 256], [128, 512]], 31 | grids: Sequence[int] = [40, 36, 24, 16, 12], 32 | strides: Sequence[int] = [4, 8, 16, 32, 64], 33 | base_edges: Sequence[int] = [16, 32, 64, 128, 256], 34 | **kwargs 35 | ) -> None: 36 | super().__init__(max_size=max_size, **kwargs) 37 | self.num_classes = num_classes 38 | self.selected_layers = selected_layers 39 | self.fpn_channels = fpn_channels 40 | self.num_extra_fpn_layers = num_extra_fpn_layers 41 | self.scales = scales 42 | self.grids = grids 43 | self.strides = strides 44 | self.base_edges = base_edges 45 | 46 | self.cate_down_pos = 0 47 | 48 | 49 | class DecoupledSolov1Config(BaseConfig): 50 | """Configuration for SOLOv1 51 | 52 | Arguments: 53 | max_size (): 54 | padding (): 55 | proto_net_structure (List): 56 | """ 57 | 58 | config_name = "solov1" 59 | 60 | def __init__( 61 | self, 62 | num_classes: int = 80, 63 | max_size: Tuple[int] = (1333, 800), 64 | selected_layers: Sequence[int] = [0, 1, 2, 3], 65 | fpn_channels: int = 256, 66 | num_extra_fpn_layers: int = 1, 67 | scales: Sequence[int] = [[8, 32], [16, 64], [32, 128], [64, 256], [128, 512]], 68 | grids: Sequence[int] = [40, 36, 24, 16, 12], 69 | strides: Sequence[int] = [4, 8, 16, 32, 64], 70 | base_edges: Sequence[int] = [16, 32, 64, 128, 256], 71 | **kwargs 72 | ) -> None: 73 | super().__init__(max_size=max_size, **kwargs) 74 | self.num_classes = num_classes 75 | self.selected_layers = selected_layers 76 | self.fpn_channels = fpn_channels 77 | self.num_extra_fpn_layers = num_extra_fpn_layers 78 | self.scales = scales 79 | self.grids = grids 80 | self.strides = strides 81 | self.base_edges = base_edges 82 | 83 | self.cate_down_pos = 0 84 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from numpy.distutils.core import setup 3 | from numpy.distutils.misc_util import Configuration 4 | from distutils.command.clean import clean as Clean 5 | from distutils.command.sdist import sdist 6 | import os 7 | import shutil 8 | 9 | 10 | def configuration(parent_package='', top_path=None): 11 | config = Configuration(None, parent_package, top_path) 12 | 13 | config.set_options( 14 | ignore_setup_xxx_py=True, 15 | assume_default_configuration=True, 16 | delegate_options_to_subpackages=True, 17 | quiet=True) 18 | config.add_subpackage('boda') 19 | 20 | return config 21 | 22 | 23 | class CleanCommand(Clean): 24 | description = 'Remove build artifacts from the source tree' 25 | 26 | def run(self): 27 | Clean.run(self) 28 | # Remove c files if we are not within a sdist package 29 | cwd = os.path.abspath(os.path.dirname(__file__)) 30 | remove_c_files = not os.path.exists(os.path.join(cwd, 'PKG-INFO')) 31 | if remove_c_files: 32 | print('Will remove generated .c files') 33 | if os.path.exists('build'): 34 | shutil.rmtree('build') 35 | for dirpath, dirnames, filenames in os.walk('sklearn'): 36 | for filename in filenames: 37 | if any(filename.endswith(suffix) for suffix in 38 | (".so", ".pyd", ".dll", ".pyc")): 39 | os.unlink(os.path.join(dirpath, filename)) 40 | continue 41 | extension = os.path.splitext(filename)[1] 42 | if remove_c_files and extension in ['.c', '.cpp']: 43 | pyx_file = str.replace(filename, extension, '.pyx') 44 | if os.path.exists(os.path.join(dirpath, pyx_file)): 45 | os.unlink(os.path.join(dirpath, filename)) 46 | for dirname in dirnames: 47 | if dirname == '__pycache__': 48 | shutil.rmtree(os.path.join(dirpath, dirname)) 49 | 50 | 51 | cmdclass = {'clean': CleanCommand, 'sdist': sdist} 52 | 53 | 54 | def setup_packages(): 55 | metadata = dict( 56 | name='boda', 57 | version='0.0.1', 58 | install_requires=['torch', 'numpy', 'cython'], 59 | author='Kang, Kyung-Su', 60 | author_email='unerue@me.com', 61 | maintainer='Kang, Kyung-Su', 62 | maintainer_email='unerue@me.com', 63 | description='boda is a library for instance segmentation.', 64 | packages=find_packages(), 65 | # include_package_data=True, 66 | classifiers=[ 67 | 'Programming Language :: C', 68 | 'Programming Language :: Python', 69 | 'Programming Language :: Python :: 3.6', 70 | 'Programming Language :: Python :: 3.7', 71 | 'Programming Language :: Python :: 3.8'], 72 | cmdclass=cmdclass, 73 | configuration=configuration, 74 | python_requires='>=3.6') 75 | 76 | setup(**metadata) 77 | 78 | 79 | if __name__ == '__main__': 80 | setup_packages() 81 | -------------------------------------------------------------------------------- /boda/ops/anchor_generators.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import itertools 3 | import math 4 | from collections import defaultdict 5 | from typing import List, Tuple 6 | 7 | import torch 8 | from torch import Tensor 9 | 10 | 11 | def default_box_cache(func): 12 | cache = defaultdict() 13 | 14 | @functools.wraps(func) 15 | def wrapper(*args): 16 | k, v = func(*args) 17 | if k not in cache: 18 | cache[k] = v 19 | return k, cache[k] 20 | 21 | return wrapper 22 | 23 | 24 | class DefaultBoxGenerator: 25 | """ 26 | Args: 27 | aspect_ratios (:obj:`List[int]`): 28 | scales (:obj:): 29 | max_size (): 30 | use_preapply_sqrt (): 31 | use_pixel_scales (): 32 | use_square_anchors (:obj:`bool`): default `True` 33 | """ 34 | 35 | def __init__( 36 | self, 37 | aspect_ratios: List[int], 38 | scales: List[float], 39 | max_size: Tuple[int] = (550, 550), 40 | use_preapply_sqrt: bool = True, 41 | use_pixel_scales: bool = True, 42 | use_square_anchors: bool = True, 43 | ) -> None: 44 | self.aspect_ratios = aspect_ratios 45 | self.scales = scales 46 | self.clip = False 47 | self.max_size = max_size 48 | self.use_preapply_sqrt = use_preapply_sqrt 49 | self.use_pixel_scales = use_pixel_scales 50 | self.use_square_anchors = use_square_anchors 51 | 52 | @default_box_cache 53 | def generate( 54 | self, h: int, w: int, device: str = "cuda:0" 55 | ) -> Tuple[Tuple[int], Tensor]: 56 | """DefaultBoxGenerator is 57 | 58 | Args: 59 | h (:obj:`int`): feature map size from backbone 60 | w (:obj:`int`): feature map size from backbone 61 | device (:obj:`str`): default `cuda` 62 | 63 | Returns 64 | size (:obj:`Tuple[int]`): feature map size 65 | prior_boxes (:obj:`FloatTensor[N, 4]`): 66 | """ 67 | size = (h, w) 68 | default_boxes = [] 69 | for j, i in itertools.product(range(h), range(w)): 70 | cx = (i + 0.5) / w 71 | cy = (j + 0.5) / h 72 | for ratios in self.aspect_ratios: 73 | for scale in self.scales: 74 | for ratio in ratios: 75 | if not self.use_preapply_sqrt: 76 | ratio = math.sqrt(ratio) 77 | 78 | if self.use_pixel_scales: 79 | _h = scale / ratio / self.max_size[0] 80 | _w = scale * ratio / self.max_size[1] 81 | else: 82 | _h = scale / ratio / h 83 | _w = scale * ratio / w 84 | 85 | if self.use_square_anchors: 86 | _h = _w 87 | 88 | default_boxes += [cx, cy, _w, _h] 89 | 90 | default_boxes = torch.tensor( 91 | default_boxes, dtype=torch.float32, device=device, requires_grad=False 92 | ).view(-1, 4) 93 | if self.clip: 94 | default_boxes.clamp_(min=0, max=1) 95 | # prior_boxes.requires_grad = False 96 | 97 | return size, default_boxes 98 | -------------------------------------------------------------------------------- /boda/models/yolact/configuration_yolact.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional, Tuple, List, Union, Any 3 | 4 | from ...base_configuration import BaseConfig 5 | 6 | 7 | YOLACT_PRETRAINED_CONFIG_ARCHIVE_MAP = { 8 | "yolact-base": "https://unerue.synology.me/boda/models/yolact/yolact-base.json", 9 | "yolact-550-r50": "", 10 | "yolact-300-r101": "", 11 | "yolact-700-r101": "", 12 | } 13 | 14 | 15 | class YolactConfig(BaseConfig): 16 | """Configuration for YOLACT 17 | 18 | Args: 19 | num_classes (:obj:`int`): 20 | max_size (:obj:`Union[int, Tuple[int]]`): 21 | num_grids (:obj:`int`): 22 | num_grid_sizes (:obj:`int`): 23 | num_mask_dim (:obj:`int`): 24 | fpn_channels (:obj:`int`): 25 | extra_fpn_layers (:obj:`bool`): 26 | num_extra_fpn_layers (:obj:`int`): 27 | mask_dim (:obj:`int`): 28 | num_grid_sizes (:obj:`int`): 29 | num_mask_dim (:obj:`int`): 30 | """ 31 | 32 | model_name = "yolact" 33 | 34 | def __init__( 35 | self, 36 | num_classes: int = 80, 37 | max_size: Tuple[int] = (550, 550), 38 | preserve_aspect_ratio: bool = False, 39 | selected_backbone_layers: List[int] = [1, 2, 3], 40 | fpn_channels: int = 256, 41 | extra_fpn_layers: bool = True, 42 | num_extra_fpn_layers: int = 2, 43 | aspect_ratios: List = [1, 1 / 2, 2], 44 | scales: List = [24, 48, 96, 192, 384], 45 | num_extra_box_layers: int = 0, 46 | num_extra_mask_layers: int = 0, 47 | num_extra_score_layers: int = 0, 48 | use_preapply_sqrt: bool = False, 49 | use_pixel_scales: bool = True, 50 | use_square_anchors: bool = True, 51 | num_grids: int = 0, 52 | mask_size: int = 16, 53 | mask_dim: int = 0, 54 | box_weight: float = 1.0, 55 | mask_weight: float = 6.125, 56 | score_weight: float = 1.0, 57 | semantic_weight: float = 1.0, 58 | **kwargs 59 | ) -> None: 60 | super().__init__(max_size=max_size, **kwargs) 61 | self.num_classes = num_classes + 1 62 | self.preserve_aspect_ratio = preserve_aspect_ratio 63 | self.fpn_channels = fpn_channels 64 | self.extra_fpn_layers = extra_fpn_layers 65 | self.num_extra_fpn_layers = num_extra_fpn_layers 66 | self.selected_backbone_layers = selected_backbone_layers 67 | self.aspect_ratios = aspect_ratios 68 | self.scales = scales 69 | self.num_grids = num_grids 70 | self.mask_size = mask_size 71 | self.use_preapply_sqrt = use_preapply_sqrt 72 | self.use_pixel_scales = use_pixel_scales 73 | self.use_square_anchors = use_square_anchors 74 | 75 | self.num_extra_box_layers = num_extra_box_layers 76 | self.num_extra_mask_layers = num_extra_mask_layers 77 | self.num_extra_score_layers = num_extra_score_layers 78 | self.num_grids = num_grids 79 | self.mask_size = mask_size 80 | self.mask_dim = mask_dim 81 | 82 | self.box_weight = box_weight 83 | self.mask_weight = mask_weight 84 | self.score_weight = score_weight 85 | self.semantic_weight = semantic_weight 86 | 87 | self.label_map = kwargs.get("label_map", None) 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | Kyungsu 6 | 7 | 8 | 9 | Kyungsu 10 | Kyungsu 11 | 12 |

13 | 14 | ## Deep learning-based Computer Vision Models for PyTorch 15 | 16 | Boda (보다) means to see in Korean. This library was inspired by 🤗 Transformers. 17 | 18 | ## Get started 19 | 20 | ```bash 21 | git clone https://github.com/unerue/boda.git && cd boda 22 | conda env create -f environment.yml 23 | conda activate boda 24 | python setup.py install 25 | ``` 26 | 27 | ```python 28 | from boda.models import YolactConfig, YolactModel, YolactLoss 29 | 30 | config = YolactConfig(num_classes=80) 31 | model = YolactModel(config) 32 | criterion = YolactLoss() 33 | 34 | outputs = model(images) 35 | losses = criterion(outputs, targets) 36 | print(losses) 37 | ``` 38 | 39 | ## Comparison 40 | 41 | |Model|State|Training|Inference|Original|Ours| 42 | |:----|:---:|:------:|:-------:|-------:|---:| 43 | |Mask R-CNN|😡|❌|❌||| 44 | |[YOLACT](boda/models/yolact/)|😆|✔️|✔️||| 45 | |SOLOv2|🙂|❌|✔️||| 46 | |[CenterMask]()|😡|❌|❌||| 47 | |YOLACT EDGE|😡|❌|❌||| 48 | || 49 | 50 | ### Misc 51 | 52 | |Model|State|Training|Inference|Original|Ours| 53 | |:----|:---:|:------:|:-------:|-------:|---:| 54 | |[SSD](boda/models/ssd/)|🙂|❌|✔️||| 55 | |Faster R-CNN|🙂|❌|✔️||| 56 | |[FCOS](boda/models/fcos/)|🙂|❌|✔️||| 57 | |Keypoint R-CNN|🙂|❌|✔️||| 58 | |YOLOv4|😡|❌|❌||| 59 | || 60 | 61 | ## Pretrained Model Configurations 62 | 63 | |Model|Config name|Status|Original|Ours| 64 | |:----|:----|:------:|-------:|---:| 65 | |[SSD](boda/models/ssd/)|`ssd-base`|🙂||| 66 | | |`ssd-512`|😡||| 67 | |[Faster R-CNN]()|`faster-rcnn-base`|🙂||| 68 | | |`faster-rcnn-r101`|😡||| 69 | |Mask R-CNN|`mask-rcnn-base`|😡||| 70 | | |`mask-rcnn-r50`|😡||| 71 | |Keypoint R-CNN|`keypoint-rcnn-base`|🙂||| 72 | | |`keypoint-rcnn-mobile`|😡||| 73 | |[FCOS](boda/models/fcos/)|`fcos-base`|🙂||| 74 | |PolarMask|`polarmask-base`|😡||| 75 | |YOLOv4|`yolov4-base`|😡||| 76 | |[YOLACT](boda/models/yolact/)|`yolact-base`|😆||| 77 | | |`yolact-r101`|😡||| 78 | | |`yolact-r101-300`|😡||| 79 | | |`yolact-r101-700`|😡||| 80 | |[SOLOv1](boda/models/solov1/)|`solov1-base`|🙂||| 81 | | |`solov1-r101`|😡||| 82 | |SOLOv2|`solov2-base`|😡|||| 83 | |[CenterMask]()|`centermask-base`|😡||| 84 | |YOLACT EDGE|`yolact-edge-base`|😡||| 85 | || -------------------------------------------------------------------------------- /boda/models/feature_extractor/vggnet.py: -------------------------------------------------------------------------------- 1 | import math 2 | from collections import OrderedDict 3 | from typing import Tuple, List, Dict, Optional 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch import nn, Tensor 8 | from torch.nn.modules.batchnorm import BatchNorm2d 9 | 10 | # from ..base_architecture import Backbone 11 | 12 | 13 | class VGG(nn.Module): 14 | """ 15 | This function is derived from torchvision VGG make_layers() 16 | https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py 17 | https://github.com/dbolya/yolact/blob/master/backbone.py 18 | """ 19 | 20 | def __init__( 21 | self, 22 | structure, 23 | bn: bool = False, 24 | norm_layer: Optional[nn.Module] = nn.BatchNorm2d, 25 | num_classes: int = 1000, 26 | ) -> None: 27 | super().__init__() 28 | self.bn = bn 29 | self.in_channels = 3 30 | self.channels = [] 31 | self.layers = nn.ModuleList() 32 | 33 | for layer in structure: 34 | self._make_layer(layer) 35 | 36 | def forward(self, inputs): 37 | outputs = [] 38 | for layer in self.layers: 39 | inputs = layer(inputs) 40 | outputs.append(inputs) 41 | 42 | return outputs 43 | 44 | def _make_layer(self, config): 45 | # _layers = [] 46 | _layers = OrderedDict() 47 | i = 0 48 | for v in config: 49 | kwargs = None 50 | if isinstance(v, tuple): 51 | kwargs = v[1] 52 | v = v[0] 53 | 54 | if v == "M": 55 | if kwargs is None: 56 | kwargs = {"kernel_size": 2, "stride": 2} 57 | 58 | # _layers.append(nn.MaxPool2d(**kwargs)) 59 | # _layers.update({'maxpool': nn.MaxPool2d(**kwargs)}) 60 | _layers.update({f"maxpool{i}": nn.MaxPool2d(**kwargs)}) 61 | else: 62 | if kwargs is None: 63 | kwargs = {"kernel_size": 3, "padding": 1} 64 | 65 | conv2d = nn.Conv2d( 66 | in_channels=self.in_channels, out_channels=v, **kwargs 67 | ) 68 | 69 | if self.bn: 70 | # _layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU()] 71 | # _layers.update({'conv': conv2d, 'bn': nn.BatchNorm2d(v), 'relu': nn.ReLU()}) 72 | _layers.update( 73 | { 74 | f"{i}": conv2d, 75 | f"bn{i}": nn.BatchNorm2d(v), 76 | f"relu{i}": nn.ReLU(), 77 | } 78 | ) 79 | else: 80 | # _layers += [conv2d, nn.ReLU()] 81 | # _layers.update({'conv': conv2d, 'relu': nn.ReLU()}) 82 | _layers.update({f"{i}": conv2d, f"relu{i}": nn.ReLU()}) 83 | 84 | self.in_channels = v 85 | i += 1 86 | 87 | self.channels.append(self.in_channels) 88 | self.layers.append(nn.Sequential(_layers)) 89 | 90 | 91 | structures = { 92 | "vgg16": [ 93 | [64, 64], 94 | ["M", 128, 128], 95 | ["M", 256, 256, 256], 96 | [("M", {"kernel_size": 2, "stride": 2, "ceil_mode": True}), 512, 512, 512], 97 | ["M", 512, 512, 512], 98 | ] 99 | } 100 | 101 | 102 | def vgg16(config: Dict = None): 103 | model = VGG(structures["vgg16"]) 104 | 105 | return model 106 | -------------------------------------------------------------------------------- /boda/models/ssd/README.md: -------------------------------------------------------------------------------- 1 | # SSD (Single Shot MultiBox Object Detector) 2 | 3 | ``` 4 | ██████╗ ██████╗ ███████╗ 5 | ██╔════╝ ██╔════╝ ██╔═══██╗ 6 | ╚██████╗ ╚██████╗ ██║ ██║ 7 | ╚════██╗ ╚════██╗██║ ██║ 8 | ██████╔╝ ██████╔╝███████╔╝ 9 | ╚═════╝ ╚═════╝ ╚══════╝ 10 | ``` 11 | 12 | ## SSD Architecture 13 | 14 | ```{bash} 15 | ========================================================================================== 16 | Layer (type:depth-idx) Output Shape Param # 17 | ========================================================================================== 18 | ├─VGG: 1-1 [-1, 64, 300, 300] -- 19 | | └─ModuleList: 2 [] -- 20 | | | └─Sequential: 3-1 [-1, 64, 300, 300] 38,720 21 | | | └─Sequential: 3-2 [-1, 128, 150, 150] 221,440 22 | | | └─Sequential: 3-3 [-1, 256, 75, 75] 1,475,328 23 | | | └─Sequential: 3-4 [-1, 512, 38, 38] 5,899,776 24 | | | └─Sequential: 3-5 [-1, 512, 19, 19] 7,079,424 25 | ├─SsdPredictNeck: 1-2 [-1, 512, 38, 38] -- 26 | | └─L2Norm: 2-1 [-1, 512, 38, 38] 512 27 | | └─ModuleList: 2 [] -- 28 | | | └─Sequential: 3-6 [-1, 1024, 19, 19] 5,769,216 29 | | | └─Sequential: 3-7 [-1, 512, 10, 10] 1,442,560 30 | | | └─Sequential: 3-8 [-1, 256, 5, 5] 360,832 31 | | | └─Sequential: 3-9 [-1, 256, 3, 3] 328,064 32 | | | └─Sequential: 3-10 [-1, 256, 1, 1] 328,064 33 | ├─ModuleList: 1 [] -- 34 | | └─SsdPredictHead: 2-2 [[-1, 4]] -- 35 | | | └─Sequential: 3-11 [-1, 16, 38, 38] 73,744 36 | | | └─Sequential: 3-12 [-1, 84, 38, 38] 387,156 37 | | └─SsdPredictHead: 2-3 [[-1, 4]] -- 38 | | | └─Sequential: 3-13 [-1, 24, 19, 19] 221,208 39 | | | └─Sequential: 3-14 [-1, 126, 19, 19] 1,161,342 40 | | └─SsdPredictHead: 2-4 [[-1, 4]] -- 41 | | | └─Sequential: 3-15 [-1, 24, 10, 10] 110,616 42 | | | └─Sequential: 3-16 [-1, 126, 10, 10] 580,734 43 | | └─SsdPredictHead: 2-5 [[-1, 4]] -- 44 | | | └─Sequential: 3-17 [-1, 24, 5, 5] 55,320 45 | | | └─Sequential: 3-18 [-1, 126, 5, 5] 290,430 46 | | └─SsdPredictHead: 2-6 [[-1, 4]] -- 47 | | | └─Sequential: 3-19 [-1, 16, 3, 3] 36,880 48 | | | └─Sequential: 3-20 [-1, 84, 3, 3] 193,620 49 | | └─SsdPredictHead: 2-7 [[-1, 4]] -- 50 | | | └─Sequential: 3-21 [-1, 16, 1, 1] 36,880 51 | | | └─Sequential: 3-22 [-1, 84, 1, 1] 193,620 52 | ========================================================================================== 53 | Total params: 26,285,486 54 | Trainable params: 26,285,486 55 | Non-trainable params: 0 56 | Total mult-adds (G): 31.43 57 | ========================================================================================== 58 | Input size (MB): 1.03 59 | Forward/backward pass size (MB): 200.19 60 | Params size (MB): 100.27 61 | Estimated Total Size (MB): 301.49 62 | ``` 63 | 64 | weight https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth 65 | 66 | 67 | ## References 68 | 69 | [](https://github.com/amdegroot/ssd.pytorch) 70 | [](https://github.com/open-mmlab/mmdetection) 71 | []() -------------------------------------------------------------------------------- /boda/models/feature_extractor/pafpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from .backbone_darknet import BaseConv, CSPLayer, DWConv 5 | 6 | 7 | class YOLOPAFPN(nn.Module): 8 | """ 9 | YOLOv3 model. Darknet 53 is the default backbone of this model. 10 | """ 11 | 12 | def __init__( 13 | self, 14 | in_channels, 15 | depth=1.0, 16 | width=1.0, 17 | depthwise=False, 18 | act="silu", 19 | ): 20 | super().__init__() 21 | self.in_channels = in_channels 22 | print(self.in_channels) 23 | Conv = DWConv if depthwise else BaseConv 24 | 25 | self.upsample = nn.Upsample(scale_factor=2, mode="nearest") 26 | self.lateral_conv0 = BaseConv( 27 | int(self.in_channels[2] * width), 28 | int(self.in_channels[1] * width), 29 | 1, 30 | 1, 31 | act=act, 32 | ) 33 | self.C3_p4 = CSPLayer( 34 | int(2 * self.in_channels[1] * width), 35 | int(self.in_channels[1] * width), 36 | round(3 * depth), 37 | False, 38 | depthwise=depthwise, 39 | act=act, 40 | ) # cat 41 | 42 | self.reduce_conv1 = BaseConv( 43 | int(self.in_channels[1] * width), 44 | int(self.in_channels[0] * width), 45 | 1, 46 | 1, 47 | act=act, 48 | ) 49 | self.C3_p3 = CSPLayer( 50 | int(2 * self.in_channels[0] * width), 51 | int(self.in_channels[0] * width), 52 | round(3 * depth), 53 | False, 54 | depthwise=depthwise, 55 | act=act, 56 | ) 57 | 58 | # bottom-up conv 59 | self.bu_conv2 = Conv( 60 | int(self.in_channels[0] * width), 61 | int(self.in_channels[0] * width), 62 | 3, 63 | 2, 64 | act=act, 65 | ) 66 | self.C3_n3 = CSPLayer( 67 | int(2 * self.in_channels[0] * width), 68 | int(self.in_channels[1] * width), 69 | round(3 * depth), 70 | False, 71 | depthwise=depthwise, 72 | act=act, 73 | ) 74 | 75 | # bottom-up conv 76 | self.bu_conv1 = Conv( 77 | int(self.in_channels[1] * width), 78 | int(self.in_channels[1] * width), 79 | 3, 80 | 2, 81 | act=act, 82 | ) 83 | self.C3_n4 = CSPLayer( 84 | int(2 * self.in_channels[1] * width), 85 | int(self.in_channels[2] * width), 86 | round(3 * depth), 87 | False, 88 | depthwise=depthwise, 89 | act=act, 90 | ) 91 | 92 | def forward(self, inputs): 93 | """ 94 | Args: 95 | inputs: input images. 96 | 97 | Returns: 98 | Tuple[Tensor]: FPN feature. 99 | """ 100 | 101 | # backbone 102 | [x2, x1, x0] = inputs 103 | 104 | fpn_out0 = self.lateral_conv0(x0) # 1024->512/32 105 | f_out0 = self.upsample(fpn_out0) # 512/16 106 | f_out0 = torch.cat([f_out0, x1], 1) # 512->1024/16 107 | f_out0 = self.C3_p4(f_out0) # 1024->512/16 108 | 109 | fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16 110 | f_out1 = self.upsample(fpn_out1) # 256/8 111 | f_out1 = torch.cat([f_out1, x2], 1) # 256->512/8 112 | pan_out2 = self.C3_p3(f_out1) # 512->256/8 113 | 114 | p_out1 = self.bu_conv2(pan_out2) # 256->256/16 115 | p_out1 = torch.cat([p_out1, fpn_out1], 1) # 256->512/16 116 | pan_out1 = self.C3_n3(p_out1) # 512->512/16 117 | 118 | p_out0 = self.bu_conv1(pan_out1) # 512->512/32 119 | p_out0 = torch.cat([p_out0, fpn_out0], 1) # 512->1024/32 120 | pan_out0 = self.C3_n4(p_out0) # 1024->1024/32 121 | 122 | outputs = (pan_out2, pan_out1, pan_out0) 123 | return outputs 124 | -------------------------------------------------------------------------------- /boda/models/yolox/utils.py: -------------------------------------------------------------------------------- 1 | # import torch 2 | # import torch.nn.functional as F 3 | # import torchvision 4 | 5 | 6 | # def preproc(img, input_size): 7 | # padded_img = torch.ones(3, input_size[0], input_size[1]) * 0.48 8 | # r = min(input_size[0] / img.shape[1], input_size[1] / img.shape[2]) 9 | # resized_img = F.interpolate( 10 | # img[None], 11 | # size=(int(img.shape[1] * r), int(img.shape[2] * r)), 12 | # mode='bilinear', 13 | # align_corners=False 14 | # )[0] 15 | # print(resized_img.shape) 16 | 17 | # padded_img[:, :int(img.shape[1] * r), :int(img.shape[2] * r)] = resized_img 18 | # padded_img = padded_img.contiguous().type(torch.float32) 19 | 20 | # return padded_img, r 21 | 22 | 23 | # def bboxes_iou(bboxes_a, bboxes_b, xyxy=True): 24 | # if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4: 25 | # raise IndexError 26 | 27 | # if xyxy: 28 | # tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2]) 29 | # br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:]) 30 | # area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1) 31 | # area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1) 32 | # else: 33 | # tl = torch.max( 34 | # (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2), 35 | # (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2), 36 | # ) 37 | # br = torch.min( 38 | # (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2), 39 | # (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2), 40 | # ) 41 | 42 | # area_a = torch.prod(bboxes_a[:, 2:], 1) 43 | # area_b = torch.prod(bboxes_b[:, 2:], 1) 44 | # en = (tl < br).type(tl.type()).prod(dim=2) 45 | # area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all()) 46 | # return area_i / (area_a[:, None] + area_b - area_i) 47 | 48 | 49 | # def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): 50 | # box_corner = prediction.new(prediction.shape) 51 | # box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 52 | # box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 53 | # box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 54 | # box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 55 | # prediction[:, :, :4] = box_corner[:, :, :4] 56 | 57 | # output = [None for _ in range(len(prediction))] 58 | # for i, image_pred in enumerate(prediction): 59 | 60 | # # If none are remaining => process next image 61 | # if not image_pred.size(0): 62 | # continue 63 | # # Get score and class with highest confidence 64 | # class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True) 65 | 66 | # conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze() 67 | # # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) 68 | # detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) 69 | # detections = detections[conf_mask] 70 | # if not detections.size(0): 71 | # continue 72 | 73 | # if class_agnostic: 74 | # nms_out_index = torchvision.ops.nms( 75 | # detections[:, :4], 76 | # detections[:, 4] * detections[:, 5], 77 | # nms_thre, 78 | # ) 79 | # else: 80 | # nms_out_index = torchvision.ops.batched_nms( 81 | # detections[:, :4], 82 | # detections[:, 4] * detections[:, 5], 83 | # detections[:, 6], 84 | # nms_thre, 85 | # ) 86 | 87 | # detections = detections[nms_out_index] 88 | # if output[i] is None: 89 | # output[i] = detections 90 | # else: 91 | # output[i] = torch.cat((output[i], detections)) 92 | 93 | # output = [{ 94 | # 'boxes': o[:, :4], 95 | # 'labels': o[:, 6], 96 | # 'scores': o[:, 4] * o[:, 5], 97 | # } for o in output] 98 | 99 | # return output 100 | -------------------------------------------------------------------------------- /boda/models/feature_extractor/fpn.py: -------------------------------------------------------------------------------- 1 | from typing import List, Sequence 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn, Tensor 6 | 7 | 8 | class FeaturePyramidNetworks(nn.Module): 9 | """Pyramid Feature Networks 10 | 11 | Example:: 12 | >>> backbone = resnet101() 13 | >>> neck = FeaturePyramidNetworks(backbone.channels, [1, 2, 3]) 14 | >>> print(neck.channels, neck.selected_layers) 15 | """ 16 | 17 | def __init__( 18 | self, 19 | in_channels: Sequence[int] = [256, 512, 1024, 2048], 20 | selected_layers: Sequence[int] = [1, 2, 3], 21 | out_channels: int = 256, 22 | extra_layers: bool = False, 23 | num_extra_predict_layers: int = 2, 24 | **kwargs 25 | ) -> None: 26 | """ 27 | Args: 28 | channels (:obj:`List[int]`): out channels from backbone 29 | selected_layers (:obj:`List[int]`): to use selected backbone layers 30 | out_channels (:obj:`int`): 31 | num_extra_predict_layers (:obj:`int`): make extra predict layers for training 32 | num_downsamples: (:obj:`int`): use predict layers does not training 33 | """ 34 | super().__init__() 35 | self.in_channels = [in_channels[i] for i in selected_layers] 36 | self.selected_layers = selected_layers 37 | self.selected_backbones = selected_layers 38 | 39 | self.extra_layers = extra_layers 40 | self.num_extra_layers = 0 41 | self.num_extra_predict_layers = num_extra_predict_layers 42 | 43 | self.selected_layers = list( 44 | range(len(self.selected_layers) + self.num_extra_predict_layers) 45 | ) 46 | 47 | self.lateral_layers = nn.ModuleList() 48 | for _in_channels in reversed(self.in_channels): 49 | self.lateral_layers.append( 50 | nn.Conv2d( 51 | _in_channels, 52 | out_channels, 53 | kernel_size=kwargs.get("lateral_kernel_size", 1), 54 | stride=kwargs.get("lateral_stride", 1), 55 | padding=kwargs.get("lateral_padding", 0), 56 | ) 57 | ) 58 | 59 | self.predict_layers = nn.ModuleList() 60 | for _ in self.in_channels: 61 | self.predict_layers.append( 62 | nn.Conv2d( 63 | out_channels, 64 | out_channels, 65 | kernel_size=kwargs.get("", 3), 66 | stride=kwargs.get("", 1), 67 | padding=kwargs.get("", 1), 68 | ) 69 | ) 70 | 71 | if self.num_extra_predict_layers > 0: 72 | self.extra_layers = nn.ModuleList( 73 | [ 74 | nn.Conv2d( 75 | out_channels, out_channels, kernel_size=3, stride=2, padding=1 76 | ) 77 | for _ in range(self.num_extra_predict_layers) 78 | ] 79 | ) 80 | # self.channels.append(self.out_channels) 81 | 82 | self.channels = [out_channels] * len(self.selected_layers) 83 | 84 | def forward(self, inputs: List[Tensor]) -> List[Tensor]: 85 | """ 86 | Args: 87 | inputs (:obj:`FloatTensor[B, C, H, W]`) 88 | 89 | Returns: 90 | outputs (:obj:`List[FloatTensor[B, C, H, W]]`) 91 | """ 92 | device = inputs[0].device 93 | inputs = [inputs[i] for i in self.selected_backbones] 94 | 95 | x = torch.zeros(1, device=device) 96 | outputs = [x for _ in range(len(inputs))] 97 | 98 | i = len(inputs) 99 | for lateral_layer in self.lateral_layers: 100 | i -= 1 101 | if i < len(inputs) - 1: 102 | _, _, h, w = inputs[i].size() 103 | x = F.interpolate(x, size=(h, w), mode="bilinear", align_corners=False) 104 | 105 | x = x + lateral_layer(inputs[i]) 106 | outputs[i] = x 107 | 108 | i = len(inputs) 109 | for predict_layer in self.predict_layers: 110 | i -= 1 111 | outputs[i] = F.relu(predict_layer(outputs[i])) 112 | 113 | if self.extra_layers: 114 | for extra_layer in self.extra_layers: 115 | outputs.append(extra_layer(outputs[-1])) 116 | 117 | elif self.num_extra_predict_layers > 0: 118 | for _ in range(self.num_extra_predict_layers): 119 | outputs.append(self.predict_layers[-1](outputs[-1])) 120 | 121 | return outputs 122 | -------------------------------------------------------------------------------- /run_test.py: -------------------------------------------------------------------------------- 1 | from boda.models import YolactConfig, YolactModel 2 | from boda.models.feature_extractor import resnet50, resnet101 3 | # from boda.lib.torchinfo import summary 4 | from boda.lib.torchsummary import summary 5 | import torch 6 | 7 | config = YolactConfig(num_classes=80) 8 | model = YolactModel(config, backbone=resnet101()).to('cuda') 9 | model.train() 10 | print(model) 11 | # print(summary(model, input_size=(16, 3, 550, 550), verbose=0)) 12 | print(summary(model, input_data=(3, 550, 550), verbose=0)) 13 | 14 | # model.load_weights('cache/yolact-base.pth') 15 | 16 | 17 | from boda.models import PostprocessYolact 18 | from PIL import Image 19 | from torchvision import transforms 20 | 21 | image = Image.open('test6.jpg') 22 | model = YolactModel.from_pretrained('yolact-base').cuda() 23 | model.eval() 24 | 25 | aug = transforms.Compose([ 26 | transforms.ToTensor(), 27 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 28 | # transforms.Normalize([0.406, 0.456, 0.485], [0.225, 0.224, 0.229]) 29 | ]) 30 | 31 | outputs = model([aug(image).cuda()]) 32 | 33 | print(outputs.keys()) 34 | post = PostprocessYolact() 35 | outputs = post(outputs, outputs['image_sizes']) 36 | print(outputs[0]['boxes']) 37 | import cv2 38 | import numpy as np 39 | import matplotlib.pyplot as plt 40 | import matplotlib.patches as patches 41 | from skimage.measure import find_contours 42 | import adjustText 43 | 44 | np_image = np.array(image) 45 | np_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR) 46 | # for box in outputs[0]['boxes']: 47 | # # box = list(map(int, boxes[j, :])) 48 | # x1, y1, x2, y2 = box.detach().cpu().numpy() 49 | # # score = scores[j] 50 | # # label = labels[j] 51 | # cv2.rectangle(np_image, (x1, y1), (x2, y2), (0, 0, 255), thickness=1) 52 | 53 | plt.imshow(image) 54 | ax = plt.gca() 55 | threshold = 0 56 | COCO_CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 57 | 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 58 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 59 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 60 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 61 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 62 | 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 63 | 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 64 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 65 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 66 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 67 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 68 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 69 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush') 70 | 71 | COLORS = { 72 | 1: 'deepskyblue', 73 | 2: 'orangered', 74 | 3: 'yellowgreen', 75 | 4: 'darkorange', 76 | 5: 'chocolate', 77 | 6: 'slategrey', 78 | 7: 'darkgoldenrod', 79 | 8: 'purple', 80 | 9: 'saddlebrown', 81 | 10: 'olive', 82 | } 83 | 84 | for output in outputs: 85 | boxes = output['boxes'] 86 | scores = output['scores'] 87 | labels = output['labels'] 88 | masks = output['masks'] 89 | print(scores) 90 | 91 | for i, box in enumerate(boxes): 92 | x1, y1, x2, y2 = box.detach().cpu().numpy() 93 | score = scores[i].detach().cpu().numpy() 94 | label = labels[i].detach().cpu().numpy() 95 | mask = masks[i].detach().cpu().numpy().astype(np.int64) 96 | 97 | color = COLORS[(label+1) % 11] 98 | contours = find_contours(mask, 0.5) 99 | 100 | if score >= threshold: 101 | cx = x2 - x1 102 | cy = y2 - y1 103 | ax.text(x1, y1, f"{COCO_CLASSES[label]}", c='black', size=8, va='bottom', ha='left', alpha=0.5) 104 | 105 | rect = patches.Rectangle( 106 | (x1, y1), 107 | cx, cy, 108 | linewidth=1, 109 | edgecolor=color, 110 | facecolor='none' 111 | ) 112 | ax.add_patch(rect) 113 | 114 | ## contours 115 | for contour in contours: 116 | shapes = [] 117 | for point in contour: 118 | shapes.append([int(point[1]), int(point[0])]) 119 | 120 | polygon_edge = patches.Polygon( 121 | (shapes), 122 | edgecolor=color, 123 | facecolor='none', 124 | linewidth=1, 125 | fill=False, 126 | ) 127 | 128 | polygon_fill = patches.Polygon( 129 | (shapes), 130 | alpha=0.5, 131 | edgecolor='none', 132 | facecolor=color, 133 | fill=True 134 | ) 135 | 136 | ax.add_patch(polygon_edge) 137 | ax.add_patch(polygon_fill) 138 | 139 | 140 | plt.axis('off') 141 | plt.savefig('test.jpg' ,dpi=100, bbox_inches='tight', pad_inches=0) 142 | -------------------------------------------------------------------------------- /boda/lib/torchsummary/layer_info.py: -------------------------------------------------------------------------------- 1 | """ layer_info.py """ 2 | from typing import Any, Dict, List, Optional, Sequence, Union 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | 8 | DETECTED_INPUT_OUTPUT_TYPES = Union[ 9 | Sequence[Any], Dict[Any, torch.Tensor], torch.Tensor 10 | ] 11 | 12 | 13 | class LayerInfo: 14 | """Class that holds information about a layer module.""" 15 | 16 | def __init__( 17 | self, 18 | module: nn.Module, 19 | depth: int, 20 | depth_index: Optional[int] = None, 21 | parent_info: Optional["LayerInfo"] = None, 22 | ): 23 | # Identifying information 24 | self.layer_id = id(module) 25 | self.module = module 26 | self.class_name = str(module.__class__).split(".")[-1].split("'")[0] 27 | self.inner_layers: Dict[str, List[int]] = {} 28 | self.depth = depth 29 | self.depth_index = depth_index 30 | self.executed = False 31 | self.parent_info = parent_info 32 | 33 | # Statistics 34 | self.trainable = True 35 | self.is_recursive = False 36 | self.input_size: List[int] = [] 37 | self.output_size: List[int] = [] 38 | self.kernel_size: List[int] = [] 39 | self.num_params = 0 40 | self.macs = 0 41 | self.calculate_num_params() 42 | 43 | def __repr__(self) -> str: 44 | if self.depth_index is None: 45 | return f"{self.class_name}: {self.depth}" 46 | return f"{self.class_name}: {self.depth}-{self.depth_index}" 47 | 48 | @staticmethod 49 | def calculate_size( 50 | inputs: DETECTED_INPUT_OUTPUT_TYPES, batch_dim: Optional[int] 51 | ) -> List[int]: 52 | """Set input_size or output_size using the model's inputs.""" 53 | 54 | def nested_list_size(inputs: Sequence[Any]) -> List[int]: 55 | """Flattens nested list size.""" 56 | if hasattr(inputs[0], "size") and callable(inputs[0].size): 57 | return list(inputs[0].size()) 58 | if isinstance(inputs, (list, tuple)): 59 | return nested_list_size(inputs[0]) 60 | return [] 61 | 62 | # pack_padded_seq and pad_packed_seq store feature into data attribute 63 | if isinstance(inputs, (list, tuple)) and len(inputs) == 0: 64 | size = [] 65 | elif isinstance(inputs, (list, tuple)) and hasattr(inputs[0], "data"): 66 | size = list(inputs[0].data.size()) 67 | if batch_dim is not None: 68 | size = size[:batch_dim] + [-1] + size[batch_dim + 1 :] 69 | 70 | elif isinstance(inputs, dict): 71 | # TODO avoid overwriting the previous size every time? 72 | for _, output in inputs.items(): 73 | size = list(output.size()) 74 | if batch_dim is not None: 75 | size = [size[:batch_dim] + [-1] + size[batch_dim + 1 :]] 76 | 77 | elif isinstance(inputs, torch.Tensor): 78 | size = list(inputs.size()) 79 | if batch_dim is not None: 80 | size[batch_dim] = -1 81 | 82 | elif isinstance(inputs, (list, tuple)): 83 | size = nested_list_size(inputs) 84 | 85 | else: 86 | raise TypeError( 87 | "Model contains a layer with an unsupported " 88 | "input or output type: {}".format(inputs) 89 | ) 90 | 91 | return size 92 | 93 | def calculate_num_params(self) -> None: 94 | """ 95 | Set num_params, trainable, inner_layers, and kernel_size 96 | using the module's parameters. 97 | """ 98 | for name, param in self.module.named_parameters(): 99 | self.num_params += param.nelement() 100 | self.trainable &= param.requires_grad 101 | 102 | if name == "weight": 103 | ksize = list(param.size()) 104 | # to make [in_shape, out_shape, ksize, ksize] 105 | if len(ksize) > 1: 106 | ksize[0], ksize[1] = ksize[1], ksize[0] 107 | self.kernel_size = ksize 108 | 109 | # RNN modules have inner weights such as weight_ih_l0 110 | elif "weight" in name: 111 | self.inner_layers[name] = list(param.size()) 112 | 113 | def calculate_macs(self) -> None: 114 | """ 115 | Set MACs using the module's parameters and layer's output size, which is 116 | used for computing number of operations for Conv layers. 117 | """ 118 | for name, param in self.module.named_parameters(): 119 | if name == "weight": 120 | # ignore N, C when calculate Mult-Adds in ConvNd 121 | if "Conv" in self.class_name: 122 | self.macs += int(param.nelement() * np.prod(self.output_size[2:])) 123 | else: 124 | self.macs += param.nelement() 125 | # RNN modules have inner weights such as weight_ih_l0 126 | elif "weight" in name: 127 | self.macs += param.nelement() 128 | 129 | def check_recursive(self, summary_list: List["LayerInfo"]) -> None: 130 | """ 131 | If the current module is already-used, mark as (recursive). 132 | Must check before adding line to the summary. 133 | """ 134 | if list(self.module.named_parameters()): 135 | for other_layer in summary_list: 136 | if self.layer_id == other_layer.layer_id: 137 | self.is_recursive = True 138 | 139 | def macs_to_str(self, reached_max_depth: bool) -> str: 140 | """Convert MACs to string.""" 141 | if self.num_params > 0 and ( 142 | reached_max_depth or not any(self.module.children()) 143 | ): 144 | return f"{self.macs:,}" 145 | return "--" 146 | 147 | def num_params_to_str(self, reached_max_depth: bool = False) -> str: 148 | """Convert num_params to string.""" 149 | if self.is_recursive: 150 | return "(recursive)" 151 | if self.num_params > 0: 152 | param_count_str = f"{self.num_params:,}" 153 | if reached_max_depth or not any(self.module.children()): 154 | if not self.trainable: 155 | return f"({param_count_str})" 156 | return param_count_str 157 | return "--" 158 | -------------------------------------------------------------------------------- /boda/lib/torchinfo/layer_info.py: -------------------------------------------------------------------------------- 1 | """ layer_info.py """ 2 | from typing import Any, Dict, Iterable, List, Optional, Sequence, Union 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | DETECTED_INPUT_OUTPUT_TYPES = Union[ 8 | Sequence[Any], Dict[Any, torch.Tensor], torch.Tensor 9 | ] 10 | 11 | 12 | class LayerInfo: 13 | """Class that holds information about a layer module.""" 14 | 15 | def __init__( 16 | self, 17 | module: nn.Module, 18 | depth: int, 19 | depth_index: Optional[int] = None, 20 | parent_info: Optional["LayerInfo"] = None, 21 | ): 22 | # Identifying information 23 | self.layer_id = id(module) 24 | self.module = module 25 | self.class_name = str(module.__class__).split(".")[-1].split("'")[0] 26 | self.inner_layers: Dict[str, List[int]] = {} 27 | self.depth = depth 28 | self.depth_index = depth_index 29 | self.executed = False 30 | self.parent_info = parent_info 31 | 32 | # Statistics 33 | self.trainable = True 34 | self.is_recursive = False 35 | self.input_size: List[int] = [] 36 | self.output_size: List[int] = [] 37 | self.kernel_size: List[int] = [] 38 | self.num_params = 0 39 | self.macs = 0 40 | self.calculate_num_params() 41 | 42 | def __repr__(self) -> str: 43 | layer_name = f"{self.class_name}: {self.depth}" 44 | if self.depth_index is None: 45 | return layer_name 46 | return f"{layer_name}-{self.depth_index}" 47 | 48 | @staticmethod 49 | def calculate_size( 50 | inputs: DETECTED_INPUT_OUTPUT_TYPES, batch_dim: Optional[int] 51 | ) -> List[int]: 52 | """Set input_size or output_size using the model's inputs.""" 53 | 54 | def nested_list_size(inputs: Sequence[Any]) -> List[int]: 55 | """Flattens nested list size.""" 56 | if hasattr(inputs[0], "size") and callable(inputs[0].size): 57 | return list(inputs[0].size()) 58 | if isinstance(inputs, (list, tuple)): 59 | return nested_list_size(inputs[0]) 60 | return [] 61 | 62 | size = [] 63 | # pack_padded_seq and pad_packed_seq store feature into data attribute 64 | if isinstance(inputs, (list, tuple)) and inputs and hasattr(inputs[0], "data"): 65 | size = list(inputs[0].data.size()) 66 | if batch_dim is not None: 67 | size = size[:batch_dim] + [-1] + size[batch_dim + 1 :] 68 | 69 | elif isinstance(inputs, dict): 70 | # TODO avoid overwriting the previous size every time? 71 | for _, output in inputs.items(): 72 | size = list(output.size()) 73 | if batch_dim is not None: 74 | size = [size[:batch_dim] + [-1] + size[batch_dim + 1 :]] 75 | 76 | elif isinstance(inputs, torch.Tensor): 77 | size = list(inputs.size()) 78 | if batch_dim is not None: 79 | size[batch_dim] = -1 80 | 81 | elif isinstance(inputs, (list, tuple)): 82 | size = nested_list_size(inputs) 83 | 84 | else: 85 | raise TypeError( 86 | "Model contains a layer with an unsupported " 87 | f"input or output type: {inputs}" 88 | ) 89 | 90 | return size 91 | 92 | def calculate_num_params(self) -> None: 93 | """ 94 | Set num_params, trainable, inner_layers, and kernel_size 95 | using the module's parameters. 96 | """ 97 | for name, param in self.module.named_parameters(): 98 | self.num_params += param.nelement() 99 | self.trainable &= param.requires_grad 100 | 101 | if name == "weight": 102 | ksize = list(param.size()) 103 | # to make [in_shape, out_shape, ksize, ksize] 104 | if len(ksize) > 1: 105 | ksize[0], ksize[1] = ksize[1], ksize[0] 106 | self.kernel_size = ksize 107 | 108 | # RNN modules have inner weights such as weight_ih_l0 109 | elif "weight" in name: 110 | self.inner_layers[name] = list(param.size()) 111 | 112 | def calculate_macs(self) -> None: 113 | """ 114 | Set MACs using the module's parameters and layer's output size, which is 115 | used for computing number of operations for Conv layers. 116 | """ 117 | for name, param in self.module.named_parameters(): 118 | if name == "weight": 119 | # ignore N, C when calculate Mult-Adds in ConvNd 120 | if "Conv" in self.class_name: 121 | self.macs += int(param.nelement() * prod(self.output_size[2:])) 122 | else: 123 | self.macs += param.nelement() 124 | # RNN modules have inner weights such as weight_ih_l0 125 | elif "weight" in name: 126 | self.macs += param.nelement() 127 | 128 | def check_recursive(self, summary_list: List["LayerInfo"]) -> None: 129 | """ 130 | If the current module is already-used, mark as (recursive). 131 | Must check before adding line to the summary. 132 | """ 133 | if list(self.module.named_parameters()): 134 | for other_layer in summary_list: 135 | if self.layer_id == other_layer.layer_id: 136 | self.is_recursive = True 137 | 138 | def macs_to_str(self, reached_max_depth: bool) -> str: 139 | """Convert MACs to string.""" 140 | if self.num_params > 0 and ( 141 | reached_max_depth or not any(self.module.children()) 142 | ): 143 | return f"{self.macs:,}" 144 | return "--" 145 | 146 | def num_params_to_str(self, reached_max_depth: bool = False) -> str: 147 | """Convert num_params to string.""" 148 | if self.is_recursive: 149 | return "(recursive)" 150 | if self.num_params > 0: 151 | param_count_str = f"{self.num_params:,}" 152 | if reached_max_depth or not any(self.module.children()): 153 | if not self.trainable: 154 | return f"({param_count_str})" 155 | return param_count_str 156 | return "--" 157 | 158 | 159 | def prod(num_list: Union[Iterable[Any], torch.Size]) -> int: 160 | result = 1 161 | for num in num_list: 162 | result *= num 163 | return abs(result) 164 | -------------------------------------------------------------------------------- /boda/models/solov2/architecture_decoupled_solov1.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import itertools 3 | import math 4 | import os 5 | from collections import defaultdict, OrderedDict 6 | from typing import Tuple, List, Dict, Any, Callable, TypeVar, Union, Sequence 7 | 8 | import torch 9 | import torch.nn.functional as F 10 | from torch import nn, Tensor 11 | 12 | from ...base_architecture import Neck, Head, Model 13 | from ...utils.mask import points_nms 14 | from ..backbone_resnet import resnet101, resnet50 15 | from ..neck_fpn import FeaturePyramidNetworks 16 | from .architecture_solov1 import ( 17 | InstanceLayer, 18 | CategoryLayer, 19 | Solov1PredictNeck, 20 | Solov1PredictHead, 21 | Solov1Model, 22 | ) 23 | from .configuration_solov1 import Solov1Config 24 | 25 | 26 | class DecoupledSolov1PredictHead(Solov1PredictHead): 27 | def __init__( 28 | self, 29 | config: Solov1Config, 30 | in_channels: int = 256, 31 | fpn_channels: int = 256, 32 | num_head_layers: int = 7, 33 | grids: List = [40, 36, 24, 16, 12], 34 | strides: List = [4, 8, 16, 32, 64], 35 | base_edges: List = [16, 32, 64, 128, 256], 36 | scales: List = [[8, 32], [16, 64], [32, 128], [64, 256], [128, 512]], 37 | num_classes: int = 80, 38 | ) -> None: 39 | super().__init__() 40 | self.config = config 41 | self.in_channels = in_channels 42 | self.fpn_channels = fpn_channels 43 | self.num_head_layers = num_head_layers 44 | self.grids = grids 45 | self.strides = strides 46 | self.base_edges = base_edges 47 | self.scales = scales 48 | self.num_classes = num_classes 49 | 50 | self.cate_down_pos = 0 51 | 52 | delattr(self, "instance_layers") 53 | 54 | self.x_instance_layers = nn.ModuleList() 55 | self.y_instance_layers = nn.ModuleList() 56 | self.category_layers = nn.ModuleList() 57 | for i in range(self.num_head_layers): 58 | if i == 0: 59 | in_channels = self.in_channels + 1 60 | else: 61 | in_channels = self.fpn_channels 62 | 63 | self.x_instance_layers.append( 64 | InstanceLayer( 65 | in_channels, 66 | self.fpn_channels, 67 | kernel_size=3, 68 | stride=1, 69 | padding=1, 70 | bias=True, 71 | num_groups=32, 72 | ) 73 | ) 74 | 75 | self.y_instance_layers.append( 76 | InstanceLayer( 77 | in_channels, 78 | self.fpn_channels, 79 | kernel_size=3, 80 | stride=1, 81 | padding=1, 82 | bias=True, 83 | num_groups=32, 84 | ) 85 | ) 86 | 87 | if i == 0: 88 | in_channels = self.in_channels 89 | else: 90 | in_channels = self.fpn_channels 91 | 92 | self.category_layers.append( 93 | CategoryLayer( 94 | in_channels, 95 | self.fpn_channels, 96 | kernel_size=3, 97 | stride=1, 98 | padding=1, 99 | bias=True, 100 | num_groups=32, 101 | ) 102 | ) 103 | 104 | self.x_decoupled_instance_layers = nn.ModuleList() 105 | self.y_decoupled_instance_layers = nn.ModuleList() 106 | self.pred_instance_layers = nn.ModuleList() 107 | for grid in self.grids: 108 | self.x_decoupled_instance_layers.append( 109 | nn.Conv2d(self.fpn_channels, grid, kernel_size=3, padding=1) 110 | ) 111 | self.y_decoupled_instance_layers.append( 112 | nn.Conv2d(self.fpn_channels, grid, kernel_size=3, padding=1) 113 | ) 114 | 115 | self.pred_category_layer = nn.Conv2d( 116 | self.fpn_channels, self.num_classes - 1, kernel_size=3, padding=1 117 | ) 118 | 119 | def forward(self, inputs: List[Tensor]): 120 | inputs = self.split_feature_maps(inputs) 121 | feature_map_sizes = [feature_map.size()[-2:] for feature_map in inputs] 122 | upsampled_size = (feature_map_sizes[0][0] * 2, feature_map_sizes[0][1] * 2) 123 | 124 | pred_masks, pred_labels = self.multi_apply( 125 | self.forward_single, 126 | inputs, 127 | list(range(len(self.grids))), 128 | upsampled_size=upsampled_size, 129 | ) 130 | 131 | return pred_masks, pred_labels 132 | 133 | def split_feature_maps(self, inputs: List[Tensor]) -> Tuple[Tensor]: 134 | """ 135 | Returns: 136 | """ 137 | return ( 138 | F.interpolate( 139 | inputs[0], 140 | scale_factor=0.5, 141 | mode="bilinear", 142 | align_corners=False, 143 | recompute_scale_factor=True, 144 | ), 145 | inputs[1], 146 | inputs[2], 147 | inputs[3], 148 | F.interpolate( 149 | inputs[4], 150 | size=inputs[3].shape[-2:], 151 | mode="bilinear", 152 | align_corners=False, 153 | ), 154 | ) 155 | 156 | def forward_single(self, inputs, idx, upsampled_size: Tuple = None): 157 | instances = inputs 158 | categories = inputs 159 | 160 | x_range = torch.linspace(-1, 1, instances.shape[-1], device=instances.device) 161 | y_range = torch.linspace(-1, 1, instances.shape[-2], device=categories.device) 162 | y, x = torch.meshgrid(y_range, x_range) 163 | y = y.expand([instances.shape[0], 1, -1, -1]) 164 | x = x.expand([instances.shape[0], 1, -1, -1]) 165 | coords = torch.cat([x, y], 1) 166 | instances = torch.cat([instances, coords], 1) 167 | 168 | for i, ins_layer in enumerate(self.instance_layers): 169 | instances = ins_layer(instances) 170 | 171 | instances = F.interpolate( 172 | instances, scale_factor=2.0, mode="bilinear", align_corners=False 173 | ) 174 | pred_masks = self.pred_instance_layers[idx](instances) 175 | 176 | for i, cate_layer in enumerate(self.category_layers): 177 | if i == self.cate_down_pos: 178 | seg_num_grid = self.grids[idx] 179 | categories = F.interpolate( 180 | categories, size=seg_num_grid, mode="bilinear", align_corners=False 181 | ) 182 | categories = cate_layer(categories) 183 | 184 | pred_labels = self.pred_category_layer(categories) 185 | 186 | return pred_masks, pred_labels 187 | -------------------------------------------------------------------------------- /boda/lib/torchinfo/model_statistics.py: -------------------------------------------------------------------------------- 1 | """ model_statistics.py """ 2 | from typing import Any, Dict, Iterable, List, Tuple, Union 3 | 4 | import torch 5 | 6 | from .formatting import FormattingOptions, Verbosity 7 | from .layer_info import LayerInfo, prod 8 | 9 | HEADER_TITLES = { 10 | "kernel_size": "Kernel Shape", 11 | "input_size": "Input Shape", 12 | "output_size": "Output Shape", 13 | "num_params": "Param #", 14 | "mult_adds": "Mult-Adds", 15 | } 16 | CORRECTED_INPUT_SIZE_TYPE = List[Union[Iterable[Any], torch.Size]] 17 | 18 | 19 | class ModelStatistics: 20 | """Class for storing results of the summary.""" 21 | 22 | def __init__( 23 | self, 24 | summary_list: List[LayerInfo], 25 | input_size: CORRECTED_INPUT_SIZE_TYPE, 26 | formatting: FormattingOptions, 27 | ): 28 | self.summary_list = summary_list 29 | self.input_size = input_size 30 | self.total_input = sum(prod(sz) for sz in input_size) if input_size else 0 31 | self.formatting = formatting 32 | self.total_params, self.trainable_params = 0, 0 33 | self.total_output, self.total_mult_adds = 0, 0 34 | for layer_info in summary_list: 35 | self.total_mult_adds += layer_info.macs 36 | if not layer_info.is_recursive: 37 | if layer_info.depth == formatting.max_depth or ( 38 | not any(layer_info.module.children()) 39 | and layer_info.depth < formatting.max_depth 40 | ): 41 | self.total_params += layer_info.num_params 42 | if layer_info.trainable: 43 | self.trainable_params += layer_info.num_params 44 | if layer_info.num_params > 0 and not any(layer_info.module.children()): 45 | # x2 for gradients 46 | self.total_output += 2 * prod(layer_info.output_size) 47 | 48 | def __repr__(self) -> str: 49 | """Print results of the summary.""" 50 | header_row = self.formatting.format_row("Layer (type:depth-idx)", HEADER_TITLES) 51 | layer_rows = self.layers_to_str() 52 | divider = "=" * self.formatting.get_total_width() 53 | summary_str = ( 54 | "{0}\n{1}{0}\n{2}{0}" 55 | "\nTotal params: {3:,}\n" 56 | "Trainable params: {4:,}\n" 57 | "Non-trainable params: {5:,}\n".format( 58 | divider, 59 | header_row, 60 | layer_rows, 61 | self.total_params, 62 | self.trainable_params, 63 | self.total_params - self.trainable_params, 64 | ) 65 | ) 66 | if self.input_size: 67 | summary_str += ( 68 | "Total mult-adds ({}): {:0.2f}\n" 69 | "{}\n" 70 | "Input size (MB): {:0.2f}\n" 71 | "Forward/backward pass size (MB): {:0.2f}\n" 72 | "Params size (MB): {:0.2f}\n" 73 | "Estimated Total Size (MB): {:0.2f}\n".format( 74 | *self.to_readable(self.total_mult_adds), 75 | divider, 76 | self.to_bytes(self.total_input), 77 | self.to_bytes(self.total_output), 78 | self.to_bytes(self.total_params), 79 | self.to_bytes( 80 | self.total_input + self.total_output + self.total_params 81 | ), 82 | ) 83 | ) 84 | summary_str += divider 85 | return summary_str 86 | 87 | @staticmethod 88 | def to_bytes(num: int) -> float: 89 | """Converts a number (assume floats, 4 bytes each) to megabytes.""" 90 | return num * 4 / 1e6 91 | 92 | @staticmethod 93 | def to_readable(num: int) -> Tuple[str, float]: 94 | """Converts a number to millions, billions, or trillions.""" 95 | if num >= 1e12: 96 | return "T", num / 1e12 97 | if num >= 1e9: 98 | return "G", num / 1e9 99 | return "M", num / 1e6 100 | 101 | def layer_info_to_row( 102 | self, layer_info: LayerInfo, reached_max_depth: bool = False 103 | ) -> str: 104 | """Convert layer_info to string representation of a row.""" 105 | 106 | def get_start_str(depth: int) -> str: 107 | return "├─" if depth == 1 else "| " * (depth - 1) + "└─" 108 | 109 | row_values = { 110 | "kernel_size": str(layer_info.kernel_size) 111 | if layer_info.kernel_size 112 | else "--", 113 | "input_size": str(layer_info.input_size), 114 | "output_size": str(layer_info.output_size), 115 | "num_params": layer_info.num_params_to_str(reached_max_depth), 116 | "mult_adds": layer_info.macs_to_str(reached_max_depth), 117 | } 118 | depth = layer_info.depth 119 | name = get_start_str(depth) + str(layer_info) 120 | new_line = self.formatting.format_row(name, row_values) 121 | if self.formatting.verbose == Verbosity.VERBOSE.value: 122 | for inner_name, inner_shape in layer_info.inner_layers.items(): 123 | prefix = get_start_str(depth + 1) 124 | extra_row_values = {"kernel_size": str(inner_shape)} 125 | new_line += self.formatting.format_row( 126 | prefix + inner_name, extra_row_values 127 | ) 128 | return new_line 129 | 130 | def layers_to_str(self) -> str: 131 | """Print each layer of the model using a fancy branching diagram.""" 132 | new_str = "" 133 | current_hierarchy: Dict[int, LayerInfo] = {} 134 | 135 | for layer_info in self.summary_list: 136 | if layer_info.depth > self.formatting.max_depth: 137 | continue 138 | 139 | # create full hierarchy of current layer 140 | hierarchy = {} 141 | parent = layer_info.parent_info 142 | while parent is not None and parent.depth > 0: 143 | hierarchy[parent.depth] = parent 144 | parent = parent.parent_info 145 | 146 | # show hierarchy if it is not there already 147 | for d in range(1, layer_info.depth): 148 | if ( 149 | d not in current_hierarchy 150 | or current_hierarchy[d].module is not hierarchy[d].module 151 | ): 152 | new_str += self.layer_info_to_row(hierarchy[d]) 153 | current_hierarchy[d] = hierarchy[d] 154 | 155 | reached_max_depth = layer_info.depth == self.formatting.max_depth 156 | new_str += self.layer_info_to_row(layer_info, reached_max_depth) 157 | current_hierarchy[layer_info.depth] = layer_info 158 | 159 | # remove deeper hierarchy 160 | d = layer_info.depth + 1 161 | while d in current_hierarchy: 162 | current_hierarchy.pop(d) 163 | d += 1 164 | 165 | return new_str 166 | -------------------------------------------------------------------------------- /boda/models/ssd/inference_ssd.py: -------------------------------------------------------------------------------- 1 | # import torch 2 | # from torch.autograd import Function 3 | # from ..box_utils import decode, nms 4 | # from data import voc as cfg 5 | # from torchvision.ops import nms 6 | 7 | 8 | # # Adapted from https://github.com/Hakuyume/chainer-ssd 9 | # def decode(loc, priors, variances): 10 | # """Decode locations from predictions using priors to undo 11 | # the encoding we did for offset regression at train time. 12 | # Args: 13 | # loc (tensor): location predictions for loc layers, 14 | # Shape: [num_priors,4] 15 | # priors (tensor): Prior boxes in center-offset form. 16 | # Shape: [num_priors,4]. 17 | # variances: (list[float]) Variances of priorboxes 18 | # Return: 19 | # decoded bounding box predictions 20 | # """ 21 | 22 | # boxes = torch.cat(( 23 | # priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], 24 | # priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) 25 | # boxes[:, :2] -= boxes[:, 2:] / 2 26 | # boxes[:, 2:] += boxes[:, :2] 27 | # return boxes 28 | 29 | 30 | # def nms(boxes, scores, overlap=0.5, top_k=200): 31 | # """Apply non-maximum suppression at test time to avoid detecting too many 32 | # overlapping bounding boxes for a given object. 33 | # Args: 34 | # boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. 35 | # scores: (tensor) The class predscores for the img, Shape:[num_priors]. 36 | # overlap: (float) The overlap thresh for suppressing unnecessary boxes. 37 | # top_k: (int) The Maximum number of box preds to consider. 38 | # Return: 39 | # The indices of the kept boxes with respect to num_priors. 40 | # """ 41 | 42 | # keep = scores.new(scores.size(0)).zero_().long() 43 | # if boxes.numel() == 0: 44 | # return keep 45 | # x1 = boxes[:, 0] 46 | # y1 = boxes[:, 1] 47 | # x2 = boxes[:, 2] 48 | # y2 = boxes[:, 3] 49 | # area = torch.mul(x2 - x1, y2 - y1) 50 | # v, idx = scores.sort(0) # sort in ascending order 51 | # # I = I[v >= 0.01] 52 | # idx = idx[-top_k:] # indices of the top-k largest vals 53 | # xx1 = boxes.new() 54 | # yy1 = boxes.new() 55 | # xx2 = boxes.new() 56 | # yy2 = boxes.new() 57 | # w = boxes.new() 58 | # h = boxes.new() 59 | 60 | # # keep = torch.Tensor() 61 | # count = 0 62 | # while idx.numel() > 0: 63 | # i = idx[-1] # index of current largest val 64 | # # keep.append(i) 65 | # keep[count] = i 66 | # count += 1 67 | # if idx.size(0) == 1: 68 | # break 69 | # idx = idx[:-1] # remove kept element from view 70 | # # load bboxes of next highest vals 71 | # torch.index_select(x1, 0, idx, out=xx1) 72 | # torch.index_select(y1, 0, idx, out=yy1) 73 | # torch.index_select(x2, 0, idx, out=xx2) 74 | # torch.index_select(y2, 0, idx, out=yy2) 75 | # # store element-wise max with next highest score 76 | # xx1 = torch.clamp(xx1, min=x1[i]) 77 | # yy1 = torch.clamp(yy1, min=y1[i]) 78 | # xx2 = torch.clamp(xx2, max=x2[i]) 79 | # yy2 = torch.clamp(yy2, max=y2[i]) 80 | # w.resize_as_(xx2) 81 | # h.resize_as_(yy2) 82 | # w = xx2 - xx1 83 | # h = yy2 - yy1 84 | # # check sizes of xx1 and xx2.. after each iteration 85 | # w = torch.clamp(w, min=0.0) 86 | # h = torch.clamp(h, min=0.0) 87 | # inter = w*h 88 | # # IoU = i / (area(a) + area(b) - i) 89 | # rem_areas = torch.index_select(area, 0, idx) # load remaining areas) 90 | # union = (rem_areas - inter) + area[i] 91 | # IoU = inter/union # store result in iou 92 | # # keep only elements with an IoU <= overlap 93 | # idx = idx[IoU.le(overlap)] 94 | # return keep, count 95 | 96 | 97 | # class Detect(Function): 98 | # """At test time, Detect is the final layer of SSD. Decode location preds, 99 | # apply non-maximum suppression to location predictions based on conf 100 | # scores and threshold to a top_k number of output predictions for both 101 | # confidence score and locations. 102 | # """ 103 | # def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh): 104 | # self.num_classes = num_classes 105 | # self.background_label = bkg_label 106 | # self.top_k = top_k 107 | # # Parameters used in nms. 108 | # self.nms_thresh = nms_thresh 109 | # if nms_thresh <= 0: 110 | # raise ValueError('nms_threshold must be non negative.') 111 | # self.conf_thresh = conf_thresh 112 | # self.variance = cfg['variance'] 113 | 114 | # def forward(self, boxes, scores, prior_boxes): 115 | # """ 116 | # Args: 117 | # boxes (:obj:`Tensor`): [B, N, 4] 118 | # scores (:obj:`Tensor`): [N, C] 119 | # prior_boxes (:obj:`Tensor`): [N, 4] 120 | 121 | # loc_data: (tensor) Loc preds from loc layers 122 | # Shape: [batch, num_priors*4] 123 | # conf_data: (tensor) Shape: Conf preds from conf layers 124 | # Shape: [batch*num_priors,num_classes] 125 | # prior_data: (tensor) Prior boxes and variances from priorbox layers 126 | # Shape: [1,num_priors,4] 127 | # """ 128 | # num = boxes.size(0) # batch size 129 | # num_priors = prior_boxes.size(0) 130 | # output = torch.zeros(num, self.num_classes, self.top_k, 5) 131 | # conf_preds = scores.view(num, num_priors, self.num_classes).transpose(2, 1) 132 | 133 | # # Decode predictions into bboxes. 134 | # for i in range(num): 135 | # decoded_boxes = decode(boxes[i], prior_data, self.variance) 136 | # # For each class, perform nms 137 | # conf_scores = conf_preds[i].clone() 138 | 139 | # for cl in range(1, self.num_classes): 140 | # c_mask = conf_scores[cl].gt(self.conf_thresh) 141 | # scores = conf_scores[cl][c_mask] 142 | 143 | # score_mask = scores[i].gt(0.05) 144 | 145 | # score = scores[score_mask] 146 | # index = index[score_mask] 147 | 148 | # if scores.size(0) == 0: 149 | # continue 150 | 151 | # l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) 152 | # boxes = decoded_boxes[l_mask].view(-1, 4) 153 | # # idx of highest scoring and non-overlapping boxes per class 154 | # # ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) 155 | # # boxes (Tensor[N, 4])) – boxes to perform NMS on. They are expected to be in (x1, y1, x2, y2) format 156 | # # scores (Tensor[N]) – scores for each one of the boxes 157 | # # iou_threshold (float) – discards all overlapping boxes with IoU > iou_threshold 158 | # keep = nms(boxes, scores, self.nms_thresh) 159 | # output[i, cl, :count] = \ 160 | # torch.cat((scores[ids[:count]].unsqueeze(1), 161 | # boxes[ids[:count]]), 1) 162 | 163 | # flt = output.contiguous().view(num, -1, 5) 164 | # _, idx = flt[:, :, 0].sort(1, descending=True) 165 | # _, rank = idx.sort(1) 166 | # flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) 167 | # return output 168 | -------------------------------------------------------------------------------- /boda/models/ssd/loss_ssd.py: -------------------------------------------------------------------------------- 1 | # from typing import Tuple, List, Dict 2 | 3 | # import torch 4 | # from torch import nn, Tensor 5 | # import torch.nn.functional as F 6 | 7 | # from ...base_architecture import LossFunction 8 | # from ...ops.box import jaccard, cxywh_to_xyxy 9 | # from ...ops.loss import log_sum_exp 10 | 11 | 12 | # class Matcher: 13 | # """Matcher for SSD 14 | 15 | # Arguments: 16 | # threshold (float): 17 | # variances (List[float]): 18 | # """ 19 | # def __init__( 20 | # self, 21 | # threshold: float = 0.5, 22 | # variances: List[float] = [0.1, 0.2] 23 | # ) -> None: 24 | # self.threshold = threshold 25 | # self.variances = variances 26 | 27 | # def __call__( 28 | # self, 29 | # pred_boxes, 30 | # pred_scores, 31 | # pred_priors, 32 | # true_boxes, 33 | # ) -> Tuple[Tensor]: 34 | # """ 35 | # Arguments: 36 | # pred_boxes (Tensor): Size([N, ]) 37 | # pred_priors (Tensor): default boxes Size([N, 4]) 38 | # true_boxes (Tensor): ground truth of bounding boxes Size([N, 4]) 39 | 40 | # Returns: 41 | # matched_boxes (Tensor): Size([num_priors, 4]) 42 | # matched_scores (Tensor): Size([num_priors]) 43 | # """ 44 | # overlaps = jaccard( 45 | # true_boxes, cxcywh_to_xyxy(pred_priors)) 46 | 47 | # # Best prior for each ground truth 48 | # best_prior_overlaps, best_prior_indexes = overlaps.max(1, keepdim=True) 49 | # best_prior_indexes.squeeze_(1) 50 | # best_prior_overlaps.squeeze_(1) 51 | 52 | # # Best ground truth for each prior boxes (default boxes) 53 | # best_truth_overlaps, best_truth_indexes = overlaps.max(0, keepdim=True) 54 | # best_truth_indexes.squeeze_(0) 55 | # best_truth_overlaps.squeeze_(0) 56 | # best_truth_overlaps.index_fill_(0, best_prior_indexes, 2) 57 | 58 | # # TODO refactor: index best_prior_idx with long tensor 59 | # # Ensure every gt matches with its prior of max overlap 60 | # for j in range(best_prior_indexes.size(0)): 61 | # best_truth_indexes[best_prior_indexes[j]] = j 62 | 63 | # matched_boxes = true_boxes[best_truth_indexes] # Size([N, 4]) 64 | # matched_scores = pred_scores[best_truth_indexes] + 1 # Size([N]) 65 | # matched_scores[best_truth_overlaps < self.threshold] = 0 # Size([]) 66 | # matched_boxes = self.encode(matched_boxes, pred_priors) 67 | 68 | # return matched_boxes, matched_scores 69 | 70 | # def encode(self, matched_boxes, pred_priors): 71 | # """ 72 | # Return: 73 | # (Tensor): Size([num_priors, 4]) 74 | # """ 75 | # gcxcy = (matched_boxes[:, :2] + matched_boxes[:, 2:])/2 - pred_priors[:, :2] 76 | # gcxcy /= (self.variances[0] * pred_priors[:, 2:]) 77 | # gwh = (matched_boxes[:, 2:] - matched_boxes[:, :2]) / pred_priors[:, 2:] 78 | # gwh = torch.log(gwh) / self.variances[1] 79 | # return torch.cat([gcxcy, gwh], dim=1) 80 | 81 | # def decode(self, pred_boxes, pred_priors): 82 | # boxes = torch.cat(( 83 | # pred_priors[:, :2] + pred_boxes[:, :2] * self.variances[0] * pred_priors[:, 2:], 84 | # pred_priors[:, 2:] * torch.exp(pred_boxes[:, 2:] * self.variances[1])), dim=1) 85 | # boxes[:, :2] -= boxes[:, 2:] / 2 86 | # boxes[:, 2:] += boxes[:, :2] 87 | # return boxes 88 | 89 | 90 | # class SsdLoss(LossFunction): 91 | # def __init__( 92 | # self, 93 | # size, 94 | # overlap_thresh, 95 | # prior_for_matching, 96 | # bkg_label, 97 | # neg_mining, 98 | # neg_pos, 99 | # neg_overlap, 100 | # encode_target, 101 | # variances: List[float] = [0.1, 0.2] 102 | # ) -> None: 103 | # super().__init__() 104 | # self.num_classes = config.num_classes + 1 105 | # self.variances = variances 106 | # self.threshold = overlap_thresh 107 | # self.background_label = bkg_label 108 | # self.encode_target = encode_target 109 | # self.use_prior_for_matching = prior_for_matching 110 | # self.do_neg_mining = neg_mining 111 | # self.negpos_ratio = neg_pos 112 | # self.neg_overlap = neg_overlap 113 | 114 | # def forward(self, inputs, targets): 115 | # """ 116 | # """ 117 | # self.check_targets(targets) 118 | # targets = self.copy_targets(targets) 119 | 120 | # pred_boxes = inputs['boxes'] 121 | # num_boxes = pred_boxes.size(0) 122 | # pred_scores = inputs['scores'] 123 | # pred_priors = inputs['priors'] 124 | # pred_priors = pred_priors[:pred_boxes.size(1), :] 125 | 126 | # batch_size = len(targets) 127 | # num_priors = pred_priors.size(0) 128 | 129 | # # match priors (default boxes) and ground truth boxes 130 | # matched_true_boxes = pred_boxes.new_tensor(batch_size, num_priors, 4) 131 | # matched_true_scores = pred_boxes.new_tensor(batch_size, num_priors, dtype=torch.int64) 132 | 133 | # for i, target in enumerate(targets): 134 | # true_boxes = target['boxes'] 135 | # true_labels = target['labels'] 136 | # matched_boxes, matched_scores = Matcher(self.threshold)( 137 | # pred_boxes, pred_priors, true_boxes, true_labels) 138 | 139 | # matched_true_boxes[i] = matched_boxes 140 | # matched_true_scores[i] = matched_scores 141 | 142 | # matched_true_boxes.requires_grad = False 143 | # matched_true_scores.requires_grad = False 144 | 145 | # # TODO: positive_scores or pos_scores 146 | # pos = matched_true_scores > 0 147 | # num_pred_scores = pos.sum(dim=1, keepdim=True) 148 | 149 | # pos_indexes = pos.unsqueeze(pos.dim()).expand_as(pred_boxes) 150 | # matched_pred_boxes = pred_boxes[pos_indexes].view(-1, 4) 151 | # matched_true_boxes = matched_true_boxes[pos_indexes].view(-1, 4) 152 | 153 | # loss_box = F.smooth_l1_loss( 154 | # matched_pred_boxes, matched_true_boxes, size_average=False) 155 | 156 | # # Compute hard negative mining 157 | # pred_scores = pred_scores.view(-1, self.num_classes) 158 | # loss_score = log_sum_exp(pred_scores) - pred_scores.gather(1, matched_true_scores.view(-1, 1)) 159 | 160 | # # Hard negative mining 161 | # loss_score[pos] = 0 162 | # loss_score = loss_score.view(num_boxes, -1) 163 | 164 | # _, loss_index = loss_score.sort(1, descending=True) 165 | # _, rank_index = loss_index.sort(1) 166 | 167 | # num_pos = pos.long().sum(1, keepdim=True) 168 | # num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) 169 | # neg = rank_index < num_neg.expand_as(rank_index) 170 | 171 | # # Confidence loss including positive and negative samples 172 | # pos_index = pos.unsqueeze(2).expand_as(pred_scores) 173 | # neg_index = neg.unsqueeze(2).expand_as(pred_scores) 174 | 175 | # pred_scores = pred_boxes[(pos_index + neg_index).gt(0)].view(-1, self.num_classes) 176 | # weighted_targets = matched_true_scores[(pos+neg).gt(0)] 177 | # loss_score = F.cross_entropy(pred_scores, weighted_targets, size_average=False) 178 | 179 | # losses = { 180 | # 'loss_bbox': None, 181 | # 'loss_conf': None, 182 | # } 183 | 184 | # return losses 185 | -------------------------------------------------------------------------------- /boda/models/yolact/README.md: -------------------------------------------------------------------------------- 1 | # YOLACT (You Only Look At CoefficienTs) 2 | 3 | ``` 4 | ██╗ ██╗ ██████╗ ██╗ █████╗ ██████╗████████╗ 5 | ╚██╗ ██╔╝██╔═══██╗██║ ██╔══██╗██╔════╝╚══██╔══╝ 6 | ╚████╔╝ ██║ ██║██║ ███████║██║ ██║ 7 | ╚██╔╝ ██║ ██║██║ ██╔══██║██║ ██║ 8 | ██║ ╚██████╔╝███████╗██║ ██║╚██████╗ ██║ 9 | ╚═╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝ ╚═════╝ ╚═╝ 10 | ``` 11 | 12 | ## YOLACT Architecture 13 | 14 | ```{bash} 15 | ============================================================================== 16 | Layer (type:depth-idx) Output Shape Param # 17 | ============================================================================== 18 | ├─ResNet: 1-1 [-1, 256, 138, 138] -- 19 | | └─Conv2d: 2-1 [-1, 64, 275, 275] 9,408 20 | | └─BatchNorm2d: 2-2 [-1, 64, 275, 275] 128 21 | | └─ReLU: 2-3 [-1, 64, 275, 275] -- 22 | | └─MaxPool2d: 2-4 [-1, 64, 138, 138] -- 23 | | └─ModuleList: 2 -- -- 24 | | | └─Sequential: 3-1 [-1, 256, 138, 138] 215,808 25 | | | └─Sequential: 3-2 [-1, 512, 69, 69] 1,219,584 26 | | | └─Sequential: 3-3 [-1, 1024, 35, 35] 26,090,496 27 | | | └─Sequential: 3-4 [-1, 2048, 18, 18] 14,964,736 28 | ├─YolactPredictNeck: 1-2 [-1, 256, 69, 69] -- 29 | | └─ModuleList: 2 -- -- 30 | | | └─Conv2d: 3-5 [-1, 256, 18, 18] 524,544 31 | | | └─Conv2d: 3-6 [-1, 256, 35, 35] 262,400 32 | | | └─Conv2d: 3-7 [-1, 256, 69, 69] 131,328 33 | | └─ModuleList: 2 -- -- 34 | | | └─Conv2d: 3-8 [-1, 256, 18, 18] 590,080 35 | | | └─Conv2d: 3-9 [-1, 256, 35, 35] 590,080 36 | | | └─Conv2d: 3-10 [-1, 256, 69, 69] 590,080 37 | | └─ModuleList: 2 -- -- 38 | | | └─Conv2d: 3-11 [-1, 256, 9, 9] 590,080 39 | | | └─Conv2d: 3-12 [-1, 256, 5, 5] 590,080 40 | ├─YolactPredictHead: 1 -- -- 41 | | └─HeadBranch: 2-5 [[-1, 4]] -- 42 | | | └─Conv2d: 3-13 [-1, 256, 69, 69] 590,080 43 | | | └─Sequential: 3-14 [-1, 12, 69, 69] 27,660 44 | | | └─Sequential: 3-15 [-1, 96, 69, 69] 221,280 45 | | | └─Sequential: 3-16 [-1, 243, 69, 69] 560,115 46 | | └─HeadBranch: 2-6 [[-1, 4]] -- 47 | | └─HeadBranch: 2 -- -- 48 | | | └─Conv2d: 3-17 [-1, 256, 35, 35] (recursive) 49 | | | └─Sequential: 3-18 [-1, 12, 35, 35] (recursive) 50 | | | └─Sequential: 3-19 [-1, 96, 35, 35] (recursive) 51 | | | └─Sequential: 3-20 [-1, 243, 35, 35] (recursive) 52 | | └─HeadBranch: 2-7 [[-1, 4]] -- 53 | | └─HeadBranch: 2 -- -- 54 | | | └─Conv2d: 3-21 [-1, 256, 18, 18] (recursive) 55 | | | └─Sequential: 3-22 [-1, 12, 18, 18] (recursive) 56 | | | └─Sequential: 3-23 [-1, 96, 18, 18] (recursive) 57 | | | └─Sequential: 3-24 [-1, 243, 18, 18] (recursive) 58 | ├─ProtoNet: 1-3 [-1, 32, 138, 138] -- 59 | | └─Conv2d: 2-8 [-1, 256, 69, 69] 590,080 60 | | └─Conv2d: 2-9 [-1, 256, 69, 69] 590,080 61 | | └─Conv2d: 2-10 [-1, 256, 69, 69] 590,080 62 | | └─Upsample: 2-11 [-1, 256, 138, 138] -- 63 | | └─Conv2d: 2-12 [-1, 256, 138, 138] 590,080 64 | | └─Conv2d: 2-13 [-1, 32, 138, 138] 8,224 65 | ├─SemanticSegmentation: 1-4 [-1, 80, 69, 69] -- 66 | | └─Conv2d: 2-14 [-1, 80, 69, 69] 20,560 67 | ============================================================================== 68 | Total params: 50,157,071 69 | Trainable params: 50,157,071 70 | Non-trainable params: 0 71 | Total mult-adds (G): 34.48 72 | ============================================================================== 73 | Input size (MB): 3.46 74 | Forward/backward pass size (MB): 193.40 75 | Params size (MB): 191.33 76 | Estimated Total Size (MB): 388.20 77 | ============================================================================== 78 | ``` 79 | 80 | ```{python} 81 | class CocoDataset(Dataset): 82 | def __getitem__(self, index: int) -> Tuple[Tensor, Dict]: 83 | """ 84 | Returns: 85 | image (Tensor[C, H, W]): Original size 86 | targets (Dict[str, Any]): 87 | """ 88 | return image, { 89 | 'boxes': FloatTensor[N, 4]: [x1, y1, x2, y2], 90 | 'labels': LongTensor[N], 91 | 'masks': ByteTensor[N, H, W], 92 | 'keypoints' FloatTensor[N, K, 3]: [x, y, visibility], 93 | 'area': float, 94 | 'iscrowd': 0 or 1, 95 | 'width': int, # width of an original image 96 | 'height': int, # height of an original image 97 | } 98 | ``` 99 | 100 | ```{python} 101 | from boda.models import YolactConfig, YolactModel, YolactLoss 102 | 103 | config = YolactConfig(num_classes=80) 104 | model = YolactModel(config).to('cuda') 105 | criterion = YolactLoss() 106 | 107 | for epoch in range(num_epochs): 108 | for images, targets in train_loader: 109 | outputs = model(images) 110 | losses = criterion(outputs, targets) 111 | loss = sum(loss for loss in losses.values()) 112 | ``` 113 | 114 | ```{python} 115 | class YolacModel: 116 | def forward(self, images): 117 | if self.training: 118 | # 전처리가 끝난 outputs? 119 | return { 120 | 'boxes': FloatTensor, 121 | 'masks: Tensor 122 | 'scores': FloatTensor, 123 | 'prior_boxes': 'anchors' ??? 124 | 'proposals'?? 125 | 'proto_masks':?? 126 | 'semantic_masks':?? 127 | } 128 | else: 129 | # 전처리가 끝난 outputs 130 | return { 131 | 'boxes': Tensor, 132 | 'masks': 133 | 'scores': Tensor, 134 | 'labels': Tensor, 135 | 'keypoints': Tensor, 136 | } 137 | ``` 138 | 139 | 140 | ```{python} 141 | outputs = model(images) 142 | outputs 143 | 144 | # SSD 145 | {'boxes', 'scores', 'prior_boxes'} 146 | 147 | # Faster R-CNN 148 | {'boxes', 'proposals', 'scores', 'anchors'} 149 | 150 | # Keypoint R-CNN 151 | {'boxes', 'proposals', 'scores', 'keypoints'} 152 | 153 | # YOLACT 154 | {'boxes', 'masks', 'scores', 'prior_boxes', 'proto_masks', 'semantic_masks'} 155 | 156 | # SOLO 157 | {'category', 'masks'} 158 | 159 | # CenterMask 160 | ``` 161 | 162 | -------------------------------------------------------------------------------- /boda/lib/torchsummary/model_statistics.py: -------------------------------------------------------------------------------- 1 | """ model_statistics.py """ 2 | from typing import Any, Dict, Iterable, List, Union 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from .formatting import FormattingOptions, Verbosity 8 | from .layer_info import LayerInfo 9 | 10 | HEADER_TITLES = { 11 | "kernel_size": "Kernel Shape", 12 | "input_size": "Input Shape", 13 | "output_size": "Output Shape", 14 | "num_params": "Param #", 15 | "mult_adds": "Mult-Adds", 16 | } 17 | CORRECTED_INPUT_SIZE_TYPE = List[Union[Iterable[Any], torch.Size]] 18 | 19 | 20 | class ModelStatistics: 21 | """Class for storing results of the summary.""" 22 | 23 | def __init__( 24 | self, 25 | summary_list: List[LayerInfo], 26 | input_size: CORRECTED_INPUT_SIZE_TYPE, 27 | formatting: FormattingOptions, 28 | ): 29 | self.summary_list = summary_list 30 | self.input_size = input_size 31 | self.total_input = ( 32 | sum(abs(np.prod(sz)) for sz in input_size) if input_size else 0 33 | ) 34 | self.formatting = formatting 35 | self.total_params, self.trainable_params = 0, 0 36 | self.total_output, self.total_mult_adds = 0, 0 37 | for layer_info in summary_list: 38 | self.total_mult_adds += layer_info.macs 39 | if not layer_info.is_recursive: 40 | if layer_info.depth == formatting.max_depth or ( 41 | not any(layer_info.module.children()) 42 | and layer_info.depth < formatting.max_depth 43 | ): 44 | self.total_params += layer_info.num_params 45 | if layer_info.trainable: 46 | self.trainable_params += layer_info.num_params 47 | if layer_info.num_params > 0 and not any(layer_info.module.children()): 48 | # x2 for gradients 49 | self.total_output += 2.0 * abs(np.prod(layer_info.output_size)) 50 | 51 | def __repr__(self) -> str: 52 | """Print results of the summary.""" 53 | header_row = self.formatting.format_row("Layer (type:depth-idx)", HEADER_TITLES) 54 | layer_rows = self.layers_to_str() 55 | divider = "=" * self.formatting.get_total_width() 56 | summary_str = ( 57 | "{0}\n{1}{0}\n{2}{0}" 58 | "\nTotal params: {3:,}\n" 59 | "Trainable params: {4:,}\n" 60 | "Non-trainable params: {5:,}\n".format( 61 | divider, 62 | header_row, 63 | layer_rows, 64 | self.total_params, 65 | self.trainable_params, 66 | self.total_params - self.trainable_params, 67 | ) 68 | ) 69 | if self.input_size: 70 | summary_str += ( 71 | "Total mult-adds ({}): {:0.2f}\n" 72 | "{}\n" 73 | "Input size (MB): {:0.2f}\n" 74 | "Forward/backward pass size (MB): {:0.2f}\n" 75 | "Params size (MB): {:0.2f}\n" 76 | "Estimated Total Size (MB): {:0.2f}\n".format( 77 | "G" if self.total_mult_adds >= 1e9 else "M", 78 | self.to_readable(self.total_mult_adds), 79 | divider, 80 | self.to_bytes(self.total_input), 81 | self.to_bytes(self.total_output), 82 | self.to_bytes(self.total_params), 83 | self.to_bytes( 84 | self.total_input + self.total_output + self.total_params 85 | ), 86 | ) 87 | ) 88 | summary_str += divider 89 | return summary_str 90 | 91 | @staticmethod 92 | def to_bytes(num: int) -> float: 93 | """Converts a number (assume floats, 4 bytes each) to megabytes.""" 94 | return num * 4 / (1024 ** 2) 95 | 96 | @staticmethod 97 | def to_readable(num: int) -> float: 98 | """Converts a number to millions or billions.""" 99 | if num >= 1e9: 100 | return num / 1e9 101 | return num / 1e6 102 | 103 | def layer_info_to_row( 104 | self, layer_info: LayerInfo, reached_max_depth: bool = False 105 | ) -> str: 106 | """Convert layer_info to string representation of a row.""" 107 | 108 | def get_start_str(depth: int) -> str: 109 | return "├─" if depth == 1 else "| " * (depth - 1) + "└─" 110 | 111 | row_values = { 112 | "kernel_size": str(layer_info.kernel_size) 113 | if layer_info.kernel_size 114 | else "--", 115 | "input_size": str(layer_info.input_size), 116 | "output_size": str(layer_info.output_size), 117 | "num_params": layer_info.num_params_to_str(reached_max_depth), 118 | "mult_adds": layer_info.macs_to_str(reached_max_depth), 119 | } 120 | depth = layer_info.depth 121 | name = (get_start_str(depth) if self.formatting.use_branching else "") + str( 122 | layer_info 123 | ) 124 | new_line = self.formatting.format_row(name, row_values) 125 | if self.formatting.verbose == Verbosity.VERBOSE.value: 126 | for inner_name, inner_shape in layer_info.inner_layers.items(): 127 | prefix = ( 128 | get_start_str(depth + 1) if self.formatting.use_branching else " " 129 | ) 130 | extra_row_values = {"kernel_size": str(inner_shape)} 131 | new_line += self.formatting.format_row( 132 | prefix + inner_name, extra_row_values 133 | ) 134 | return new_line 135 | 136 | def layers_to_str(self) -> str: 137 | """Print each layer of the model as tree or as a list.""" 138 | if self.formatting.use_branching: 139 | return self._layer_tree_to_str() 140 | 141 | layer_rows = "" 142 | for layer_info in self.summary_list: 143 | layer_rows += self.layer_info_to_row(layer_info) 144 | return layer_rows 145 | 146 | def _layer_tree_to_str(self) -> str: 147 | """Print each layer of the model using a fancy branching diagram.""" 148 | new_str = "" 149 | current_hierarchy: Dict[int, LayerInfo] = {} 150 | 151 | for layer_info in self.summary_list: 152 | if layer_info.depth > self.formatting.max_depth: 153 | continue 154 | 155 | # create full hierarchy of current layer 156 | hierarchy = {} 157 | parent = layer_info.parent_info 158 | while parent is not None and parent.depth > 0: 159 | hierarchy[parent.depth] = parent 160 | parent = parent.parent_info 161 | 162 | # show hierarchy if it is not there already 163 | for d in range(1, layer_info.depth): 164 | if ( 165 | d not in current_hierarchy 166 | or current_hierarchy[d].module is not hierarchy[d].module 167 | ): 168 | new_str += self.layer_info_to_row(hierarchy[d]) 169 | current_hierarchy[d] = hierarchy[d] 170 | 171 | reached_max_depth = layer_info.depth == self.formatting.max_depth 172 | new_str += self.layer_info_to_row(layer_info, reached_max_depth) 173 | current_hierarchy[layer_info.depth] = layer_info 174 | 175 | # remove deeper hierarchy 176 | d = layer_info.depth + 1 177 | while d in current_hierarchy: 178 | current_hierarchy.pop(d) 179 | d += 1 180 | 181 | return new_str 182 | -------------------------------------------------------------------------------- /boda/custom_modules.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import math 3 | import re 4 | from functools import partial 5 | 6 | import torch 7 | from torch import nn 8 | from torch.nn import functional as F 9 | 10 | 11 | class Conv2dDynamicSamePadding(nn.Conv2d): 12 | """ 13 | Adapted from: 14 | https://github.com/lukemelas/EfficientNet-PyTorch 15 | https://github.com/rwightman/pytorch-image-models 16 | 17 | 2D Convolutions like TensorFlow, for a dynamic image size. 18 | The padding is operated in forward function by calculating dynamically. 19 | 20 | Tips for 'SAME' mode padding. 21 | Given the following: 22 | i: width or height 23 | s: stride 24 | k: kernel size 25 | d: dilation 26 | p: padding 27 | Output after Conv2d: 28 | o = floor((i+p-((k-1)*d+1))/s+1) 29 | If o equals i, i = floor((i+p-((k-1)*d+1))/s+1), 30 | => p = (i-1)*s+((k-1)*d+1)-i 31 | """ 32 | 33 | def __init__( 34 | self, 35 | in_channels: int, 36 | out_channels: int, 37 | kernel_size: int, 38 | stride: int = 1, 39 | dilation: int = 1, 40 | groups: int = 1, 41 | bias: bool = True, 42 | ) -> None: 43 | super().__init__( 44 | in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias 45 | ) 46 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 47 | 48 | def forward(self, x): 49 | ih, iw = x.size()[-2:] 50 | kh, kw = self.weight.size()[-2:] 51 | sh, sw = self.stride 52 | oh, ow = math.ceil(ih / sh), math.ceil( 53 | iw / sw 54 | ) # change the output size according to stride 55 | 56 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 57 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 58 | if pad_h > 0 or pad_w > 0: 59 | x = F.pad( 60 | x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2] 61 | ) 62 | 63 | return F.conv2d( 64 | x, 65 | self.weight, 66 | self.bias, 67 | self.stride, 68 | self.padding, 69 | self.dilation, 70 | self.groups, 71 | ) 72 | 73 | 74 | class Conv2dStaticSamePadding(nn.Conv2d): 75 | """ 76 | 2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size. 77 | The padding mudule is calculated in construction function, then used in forward. 78 | """ 79 | 80 | def __init__( 81 | self, 82 | in_channels, 83 | out_channels, 84 | kernel_size, 85 | stride=1, 86 | image_size=None, 87 | **kwargs 88 | ) -> None: 89 | super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs) 90 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 91 | assert image_size is not None 92 | 93 | ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size 94 | kh, kw = self.weight.size()[-2:] 95 | sh, sw = self.stride 96 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 97 | 98 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 99 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 100 | if pad_h > 0 or pad_w > 0: 101 | self.static_padding = nn.ZeroPad2d( 102 | (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2) 103 | ) 104 | else: 105 | self.static_padding = nn.Identity() 106 | 107 | def forward(self, x): 108 | x = self.static_padding(x) 109 | x = F.conv2d( 110 | x, 111 | self.weight, 112 | self.bias, 113 | self.stride, 114 | self.padding, 115 | self.dilation, 116 | self.groups, 117 | ) 118 | return x 119 | 120 | 121 | class MaxPool2dDynamicSamePadding(nn.MaxPool2d): 122 | """ 123 | 2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size. 124 | The padding is operated in forward function by calculating dynamically. 125 | """ 126 | 127 | def __init__( 128 | self, 129 | kernel_size, 130 | stride, 131 | padding=0, 132 | dilation=1, 133 | return_indices=False, 134 | ceil_mode=False, 135 | ) -> None: 136 | super().__init__( 137 | kernel_size, stride, padding, dilation, return_indices, ceil_mode 138 | ) 139 | self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride 140 | self.kernel_size = ( 141 | [self.kernel_size] * 2 142 | if isinstance(self.kernel_size, int) 143 | else self.kernel_size 144 | ) 145 | self.dilation = ( 146 | [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation 147 | ) 148 | 149 | def forward(self, x): 150 | ih, iw = x.size()[-2:] 151 | kh, kw = self.kernel_size 152 | sh, sw = self.stride 153 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 154 | 155 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 156 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 157 | if pad_h > 0 or pad_w > 0: 158 | x = F.pad( 159 | x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2] 160 | ) 161 | 162 | return F.max_pool2d( 163 | x, 164 | self.kernel_size, 165 | self.stride, 166 | self.padding, 167 | self.dilation, 168 | self.ceil_mode, 169 | self.return_indices, 170 | ) 171 | 172 | 173 | class MaxPool2dStaticSamePadding(nn.MaxPool2d): 174 | """ 175 | 2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size. 176 | The padding mudule is calculated in construction function, then used in forward. 177 | """ 178 | 179 | def __init__(self, kernel_size, stride, image_size=None, **kwargs) -> None: 180 | super().__init__(kernel_size, stride, **kwargs) 181 | self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride 182 | self.kernel_size = ( 183 | [self.kernel_size] * 2 184 | if isinstance(self.kernel_size, int) 185 | else self.kernel_size 186 | ) 187 | self.dilation = ( 188 | [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation 189 | ) 190 | assert image_size is not None 191 | 192 | ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size 193 | kh, kw = self.kernel_size 194 | sh, sw = self.stride 195 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 196 | 197 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 198 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 199 | if pad_h > 0 or pad_w > 0: 200 | self.static_padding = nn.ZeroPad2d( 201 | (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2) 202 | ) 203 | else: 204 | self.static_padding = nn.Identity() 205 | 206 | def forward(self, x): 207 | x = self.static_padding(x) 208 | x = F.max_pool2d( 209 | x, 210 | self.kernel_size, 211 | self.stride, 212 | self.padding, 213 | self.dilation, 214 | self.ceil_mode, 215 | self.return_indices, 216 | ) 217 | return x 218 | -------------------------------------------------------------------------------- /boda/models/feature_extractor/resnet.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List, Optional, Callable 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn, Tensor 6 | 7 | 8 | # TODO: BACKBONE_ARCHIVE_MAP or _MAPS? or ARCHIVES? 9 | BACKBONE_ARCHIVE_MAP = { 10 | "resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth", 11 | "resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth", 12 | "resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth", 13 | "resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth", 14 | } 15 | 16 | 17 | class Conv2d1x1(nn.Sequential): 18 | """1x1 convolution""" 19 | 20 | def __init__( 21 | self, 22 | in_planes: int, 23 | out_planes: int, 24 | stride: int = 1, 25 | ) -> None: 26 | super().__init__( 27 | nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 28 | ) 29 | 30 | 31 | class Conv2d3x3(nn.Sequential): 32 | """3x3 convolution with padding""" 33 | 34 | def __init__( 35 | self, 36 | in_planes: int, 37 | out_planes: int, 38 | stride: int = 1, 39 | groups: int = 1, 40 | dilation: int = 1, 41 | ) -> None: 42 | super().__init__( 43 | nn.Conv2d( 44 | in_planes, 45 | out_planes, 46 | kernel_size=3, 47 | stride=stride, 48 | padding=dilation, 49 | groups=groups, 50 | bias=False, 51 | dilation=dilation, 52 | ) 53 | ) 54 | 55 | 56 | class BasicBlock(nn.Module): 57 | expansion: int = 1 58 | 59 | def __init__( 60 | self, 61 | inplanes: int, 62 | planes: int, 63 | stride: int = 1, 64 | downsample: Optional[nn.Module] = None, 65 | groups: int = 1, 66 | base_width: int = 64, 67 | dilation: int = 1, 68 | norm_layer: Optional[Callable[..., nn.Module]] = None, 69 | ) -> None: 70 | super().__init__() 71 | if norm_layer is None: 72 | norm_layer = nn.BatchNorm2d # (track_running_stats=False) 73 | 74 | if groups != 1 or base_width != 64: 75 | raise ValueError("BasicBlock only supports groups=1 and base_width=64") 76 | if dilation > 1: 77 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 78 | 79 | self.conv1 = Conv2d3x3(inplanes, planes, stride) 80 | self.bn1 = norm_layer(planes) 81 | self.relu = nn.ReLU(inplace=True) 82 | self.conv2 = Conv2d3x3(planes, planes) 83 | self.bn2 = norm_layer(planes) 84 | self.downsample = downsample 85 | self.stride = stride 86 | 87 | def forward(self, x: Tensor) -> Tensor: 88 | identity = x 89 | 90 | out = self.conv1(x) 91 | out = self.bn1(out) 92 | out = self.relu(out) 93 | 94 | out = self.conv2(out) 95 | out = self.bn2(out) 96 | 97 | if self.downsample is not None: 98 | identity = self.downsample(x) 99 | 100 | out += identity 101 | out = self.relu(out) 102 | 103 | return out 104 | 105 | 106 | class Bottleneck(nn.Module): 107 | expansion = 4 108 | 109 | def __init__( 110 | self, 111 | in_planes: int, 112 | planes: int, 113 | stride: int = 1, 114 | downsample: Optional[nn.Module] = None, 115 | norm_layer: Optional[Callable[..., nn.Module]] = None, 116 | ) -> None: 117 | super().__init__() 118 | if norm_layer is None: 119 | norm_layer = nn.BatchNorm2d 120 | 121 | self.conv1 = Conv2d1x1(in_planes, planes) 122 | self.bn1 = norm_layer(planes) 123 | 124 | self.conv2 = Conv2d3x3(planes, planes, stride=stride) 125 | self.bn2 = norm_layer(planes) 126 | 127 | self.conv3 = Conv2d1x1(planes, planes * self.expansion) 128 | self.bn3 = norm_layer(planes * self.expansion) 129 | 130 | self.downsample = downsample 131 | self.stride = stride 132 | 133 | def forward(self, inputs) -> Tensor: 134 | residual = inputs 135 | 136 | outputs = F.relu(self.bn1(self.conv1(inputs)), inplace=True) 137 | outputs = F.relu(self.bn2(self.conv2(outputs)), inplace=True) 138 | outputs = self.bn3(self.conv3(outputs)) 139 | 140 | if self.downsample is not None: 141 | residual = self.downsample(inputs) 142 | 143 | outputs += residual 144 | outputs = F.relu(outputs, inplace=True) 145 | 146 | return outputs 147 | 148 | 149 | class ResNet(nn.Module): 150 | def __init__(self, layers, block=Bottleneck): 151 | super().__init__() 152 | self.num_base_layers = len(layers) 153 | self.layers = nn.ModuleList() 154 | self.channels = [] 155 | 156 | self.inplanes = 64 157 | 158 | # TODO self.stem = nn.Sequential() ?? 159 | self.conv = nn.Conv2d( 160 | 3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False 161 | ) 162 | self.bn = nn.BatchNorm2d(self.inplanes) 163 | self.relu = nn.ReLU(inplace=True) 164 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 165 | 166 | # TODO self._make_stage ??? 167 | self._make_layer(block, 64, layers[0]) 168 | self._make_layer(block, 128, layers[1], stride=2) 169 | self._make_layer(block, 256, layers[2], stride=2) 170 | self._make_layer(block, 512, layers[3], stride=2) 171 | 172 | # self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)] 173 | # self.backbone_modules = [m for m in self.modules()] 174 | 175 | def _make_layer(self, block, planes, blocks, stride=1): 176 | downsample = None 177 | if stride != 1 or self.inplanes != planes * block.expansion: 178 | downsample = nn.Sequential( 179 | Conv2d1x1( 180 | self.inplanes, 181 | planes * block.expansion, 182 | stride=stride, 183 | ), 184 | nn.BatchNorm2d(planes * block.expansion), 185 | ) 186 | 187 | layers = [block(self.inplanes, planes, stride, downsample)] 188 | self.inplanes = planes * block.expansion 189 | 190 | # Add identity block 191 | for _ in range(1, blocks): 192 | layers.append(block(self.inplanes, planes)) 193 | 194 | # layer = nn.Sequential(*layers) 195 | 196 | self.channels.append(planes * block.expansion) 197 | self.layers.append(nn.Sequential(*layers)) 198 | 199 | def forward(self, inputs): 200 | inputs = self.conv(inputs) 201 | inputs = self.bn(inputs) 202 | inputs = self.relu(inputs) 203 | inputs = self.maxpool(inputs) 204 | 205 | outputs = [] 206 | for layer in self.layers: 207 | inputs = layer(inputs) 208 | outputs.append(inputs) 209 | 210 | return outputs 211 | 212 | def add_layer(self, conv_channels=1024, downsample=2, depth=1, block=Bottleneck): 213 | self._make_layer( 214 | block, conv_channels // block.expansion, blocks=depth, stride=downsample 215 | ) 216 | 217 | def from_pretrained(self, path): 218 | state_dict = torch.load(path) 219 | 220 | try: 221 | state_dict.pop("fc.weight") 222 | state_dict.pop("fc.bias") 223 | except KeyError: 224 | pass 225 | 226 | keys = list(state_dict) 227 | for key in keys: 228 | if key.startswith("layer"): 229 | idx = int(key[5]) 230 | new_key = "layers." + str(idx - 1) + key[6:] 231 | state_dict[new_key] = state_dict.pop(key) 232 | 233 | self.load_state_dict(state_dict, strict=False) 234 | 235 | 236 | def resnet18(): 237 | backbone = ResNet([2, 2, 2, 2], BasicBlock) 238 | return backbone 239 | 240 | 241 | def resnet34(): 242 | backbone = ResNet([3, 4, 6, 3], BasicBlock) 243 | print(backbone.channels) 244 | return backbone 245 | 246 | 247 | def resnet50(pretrained: bool = False): 248 | backbone = ResNet([3, 4, 6, 3], Bottleneck) 249 | return backbone 250 | 251 | 252 | def resnet101(pretrained: bool = False): 253 | backbone = ResNet([3, 4, 23, 3], Bottleneck) 254 | return backbone 255 | -------------------------------------------------------------------------------- /boda/models/solov2/inference_solov1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def matrix_nms( 6 | seg_masks, cate_labels, cate_scores, kernel="gaussian", sigma=2.0, sum_masks=None 7 | ): 8 | """Matrix NMS for multi-class masks. 9 | 10 | Args: 11 | seg_masks (Tensor): shape (n, h, w) 12 | cate_labels (Tensor): shape (n), mask labels in descending order 13 | cate_scores (Tensor): shape (n), mask scores in descending order 14 | kernel (str): 'linear' or 'gauss' 15 | sigma (float): std in gaussian method 16 | sum_masks (Tensor): The sum of seg_masks 17 | 18 | Returns: 19 | Tensor: cate_scores_update, tensors of shape (n) 20 | """ 21 | n_samples = len(cate_labels) 22 | if n_samples == 0: 23 | return [] 24 | if sum_masks is None: 25 | sum_masks = seg_masks.sum((1, 2)).float() 26 | seg_masks = seg_masks.reshape(n_samples, -1).float() 27 | # inter. 28 | inter_matrix = torch.mm(seg_masks, seg_masks.transpose(1, 0)) 29 | # union. 30 | sum_masks_x = sum_masks.expand(n_samples, n_samples) 31 | # iou. 32 | iou_matrix = ( 33 | inter_matrix / (sum_masks_x + sum_masks_x.transpose(1, 0) - inter_matrix) 34 | ).triu(diagonal=1) 35 | # label_specific matrix. 36 | cate_labels_x = cate_labels.expand(n_samples, n_samples) 37 | label_matrix = ( 38 | (cate_labels_x == cate_labels_x.transpose(1, 0)).float().triu(diagonal=1) 39 | ) 40 | 41 | # IoU compensation 42 | compensate_iou, _ = (iou_matrix * label_matrix).max(0) 43 | compensate_iou = compensate_iou.expand(n_samples, n_samples).transpose(1, 0) 44 | 45 | # IoU decay 46 | decay_iou = iou_matrix * label_matrix 47 | 48 | # matrix nms 49 | if kernel == "gaussian": 50 | decay_matrix = torch.exp(-1 * sigma * (decay_iou ** 2)) 51 | compensate_matrix = torch.exp(-1 * sigma * (compensate_iou ** 2)) 52 | decay_coefficient, _ = (decay_matrix / compensate_matrix).min(0) 53 | elif kernel == "linear": 54 | decay_matrix = (1 - decay_iou) / (1 - compensate_iou) 55 | decay_coefficient, _ = decay_matrix.min(0) 56 | else: 57 | raise NotImplementedError 58 | 59 | # update the score. 60 | cate_scores_update = cate_scores * decay_coefficient 61 | return cate_scores_update 62 | 63 | 64 | def get_seg(seg_preds, cate_preds, img_metas=[1]): 65 | assert len(seg_preds) == len(cate_preds) 66 | 67 | num_levels = len(cate_preds) 68 | featmap_size = seg_preds[0].size()[-2:] 69 | 70 | result_list = [] 71 | for img_id in range(len(img_metas)): 72 | cate_pred_list = [ 73 | cate_preds[i][img_id].view(-1, 80).detach() 74 | for i in range(num_levels) 75 | # cate_preds[i][img_id].view(-1, self.cate_out_channels).detach() for i in range(num_levels) 76 | ] 77 | seg_pred_list = [seg_preds[i][img_id].detach() for i in range(num_levels)] 78 | 79 | # img_shape = img_metas[img_id]['img_shape'] 80 | # scale_factor = img_metas[img_id]['scale_factor'] 81 | # ori_shape = img_metas[img_id]['ori_shape'] 82 | size = (1333, 800, 3) 83 | # size = (800, 1333, 3) 84 | img_shape = size 85 | ori_shape = size 86 | 87 | cate_pred_list = torch.cat(cate_pred_list, dim=0) 88 | seg_pred_list = torch.cat(seg_pred_list, dim=0) 89 | 90 | result = get_seg_single( 91 | cate_pred_list, seg_pred_list, featmap_size, img_shape, ori_shape 92 | ) 93 | 94 | result_list.append(result) 95 | 96 | return result_list 97 | 98 | 99 | def get_seg_single(cate_preds, seg_preds, featmap_size, img_shape, ori_shape): 100 | assert len(cate_preds) == len(seg_preds) 101 | 102 | # test_seg_masks = seg_preds > 0.5 # cfg.mask_thr 103 | # test_masks = test_seg_masks.detach().cpu().numpy()[0] * 255 104 | # print(test_masks.shape) 105 | # import cv2 106 | # cv2.imwrite('solo-test12.jpg', test_masks) 107 | 108 | # overall info. 109 | h, w, _ = img_shape 110 | upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4) 111 | 112 | # process. 113 | inds = cate_preds > 0.1 # cfg.score_thr 114 | # category scores. 115 | cate_scores = cate_preds[inds] 116 | if len(cate_scores) == 0: 117 | return None 118 | # category labels. 119 | # inds = inds.nonzero() 120 | inds = inds.nonzero() 121 | # print(inds.nonzero()) 122 | cate_labels = inds[:, 1] 123 | 124 | # strides. 125 | # size_trans = cate_labels.new_tensor(self.seg_num_grids).pow(2).cumsum(0) 126 | size_trans = cate_labels.new_tensor([40, 36, 24, 16, 12]).pow(2).cumsum(0) 127 | strides = cate_scores.new_ones(size_trans[-1]) 128 | n_stage = len([40, 36, 24, 16, 12]) # len(self.seg_num_grids) 129 | strides[: size_trans[0]] *= (4, 8, 16, 32, 64)[0] # self.strides[0] 130 | for ind_ in range(1, n_stage): 131 | # strides[size_trans[ind_ - 1]:size_trans[ind_]] *= self.strides[ind_] 132 | strides[size_trans[ind_ - 1] : size_trans[ind_]] *= (4, 8, 16, 32, 64)[ind_] 133 | strides = strides[inds[:, 0]] 134 | 135 | # masks. 136 | seg_preds = seg_preds[inds[:, 0]] 137 | seg_masks = seg_preds > 0.5 # cfg.mask_thr 138 | sum_masks = seg_masks.sum((1, 2)).float() 139 | 140 | # filter. 141 | keep = sum_masks > strides 142 | if keep.sum() == 0: 143 | return None 144 | 145 | seg_masks = seg_masks[keep, ...] 146 | seg_preds = seg_preds[keep, ...] 147 | sum_masks = sum_masks[keep] 148 | cate_scores = cate_scores[keep] 149 | cate_labels = cate_labels[keep] 150 | 151 | # print('#'*50) 152 | # print(seg_masks.size()) 153 | test_seg_masks = seg_masks > 0.5 # cfg.mask_thr 154 | test_masks = test_seg_masks.detach().cpu().numpy()[0] * 255 155 | print(test_masks.shape) 156 | # test_masks = test_masks.transpose(1, 2, 0) 157 | import cv2 158 | 159 | cv2.imwrite("solo-test11.jpg", test_masks) 160 | 161 | # maskness. 162 | seg_scores = (seg_preds * seg_masks.float()).sum((1, 2)) / sum_masks 163 | cate_scores *= seg_scores 164 | 165 | # sort and keep top nms_pre 166 | sort_inds = torch.argsort(cate_scores, descending=True) 167 | if len(sort_inds) > 500: # cfg.nms_pre 168 | sort_inds = sort_inds[:500] # [:cfg.nms_pre] 169 | seg_masks = seg_masks[sort_inds, :, :] 170 | seg_preds = seg_preds[sort_inds, :, :] 171 | sum_masks = sum_masks[sort_inds] 172 | cate_scores = cate_scores[sort_inds] 173 | cate_labels = cate_labels[sort_inds] 174 | 175 | # Matrix NMS 176 | cate_scores = matrix_nms( 177 | seg_masks, 178 | cate_labels, 179 | cate_scores, 180 | kernel="gaussian", 181 | sigma=2.0, 182 | sum_masks=sum_masks, 183 | ) 184 | 185 | # filter. 186 | keep = cate_scores >= 0.05 # cfg.update_thr 187 | if keep.sum() == 0: 188 | return None 189 | seg_preds = seg_preds[keep, :, :] 190 | cate_scores = cate_scores[keep] 191 | cate_labels = cate_labels[keep] 192 | 193 | # sort and keep top_k 194 | sort_inds = torch.argsort(cate_scores, descending=True) 195 | if len(sort_inds) > 100: # cfg.max_per_img: 196 | sort_inds = sort_inds[:100] # [:cfg.max_per_img] 197 | seg_preds = seg_preds[sort_inds, :, :] 198 | cate_scores = cate_scores[sort_inds] 199 | cate_labels = cate_labels[sort_inds] 200 | 201 | print(seg_preds.size()) 202 | print(upsampled_size_out) 203 | seg_preds = F.interpolate( 204 | seg_preds.unsqueeze(0), size=upsampled_size_out, mode="bilinear" 205 | ) # [:, :, :h, :w] 206 | 207 | # seg_masks = F.interpolate( 208 | # seg_preds, size=ori_shape[:2], mode='bilinear').squeeze(0) 209 | size = (1333, 800) 210 | # size = (800, 1333) 211 | seg_masks = F.interpolate(seg_preds, size=size, mode="bilinear").squeeze(0) 212 | 213 | print("#" * 50) 214 | print(seg_masks.size()) 215 | seg_masks = seg_masks > 0.5 # cfg.mask_thr 216 | 217 | test_masks = seg_masks.detach().cpu().numpy()[0] * 255 218 | print(test_masks.shape) 219 | # test_masks = test_masks.transpose(1, 2, 0) 220 | print(test_masks.shape) 221 | import cv2 222 | 223 | # test_masks = cv2.flip(test_masks, 1) 224 | # test_masks = cv2.rotate(test_masks, cv2.ROTATE_90_COUNTERCLOCKWISE) 225 | # print(test_masks.shape) 226 | # test_masks = cv2.resize(test_masks, (1333, 800), cv2.INTER_AREA) 227 | # print(test_masks.shape) 228 | cv2.imwrite("solo-test1.jpg", test_masks) 229 | 230 | return seg_masks, cate_labels, cate_scores 231 | -------------------------------------------------------------------------------- /boda/postprocessing.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import Tuple, List, Dict 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | from torch import Tensor 7 | from torchvision.ops import batched_nms 8 | 9 | 10 | def decode(boxes: Tensor, prior_boxes: Tensor, variances: List[float] = [0.1, 0.2]): 11 | """Decode locations from predictions using priors to undo 12 | the encoding we did for offset regression at train time. 13 | 14 | https://github.com/Hakuyume/chainer-ssd 15 | 16 | Args: 17 | loc (tensor): location predictions for loc layers, 18 | Shape: [num_priors, 4] 19 | priors (tensor): Prior boxes in center-offset form. 20 | Shape: [num_priors, 4]. 21 | variances: (`List[float]`) Variances of priorboxes 22 | Return: 23 | decoded bounding box predictions 24 | """ 25 | boxes = torch.cat( 26 | ( 27 | prior_boxes[:, :2] + boxes[:, :2] * variances[0] * prior_boxes[:, 2:], 28 | prior_boxes[:, 2:] * torch.exp(boxes[:, 2:] * variances[1]), 29 | ), 30 | dim=1, 31 | ) 32 | boxes[:, :2] -= boxes[:, 2:] / 2 33 | boxes[:, 2:] += boxes[:, :2] 34 | 35 | return boxes 36 | 37 | 38 | def sanitize_coordinates( 39 | _x1, _x2, img_size: int, padding: int = 0, cast: bool = True 40 | ) -> Tuple[Tensor, Tensor]: 41 | """ 42 | Sanitizes the input coordinates so that x1 < x2, x1 != x2, x1 >= 0, and x2 <= image_size. 43 | Also converts from relative to absolute coordinates and casts the results to long tensors. 44 | If cast is false, the result won't be cast to longs. 45 | Warning: this does things in-place behind the scenes so copy if necessary. 46 | """ 47 | _x1 = _x1 * img_size 48 | _x2 = _x2 * img_size 49 | if cast: 50 | _x1 = _x1.long() 51 | _x2 = _x2.long() 52 | x1 = torch.min(_x1, _x2) 53 | x2 = torch.max(_x1, _x2) 54 | x1 = torch.clamp(x1 - padding, min=0) 55 | x2 = torch.clamp(x2 + padding, max=img_size) 56 | 57 | return x1, x2 58 | 59 | 60 | def crop(masks, boxes, padding: int = 1) -> Tensor: 61 | """ 62 | "Crop" predicted masks by zeroing out everything not in the predicted bbox. 63 | Vectorized by Chong (thanks Chong). 64 | Args: 65 | # TODO: torchvision mask rcnn masks UInt8Tensor[N, H, W] 66 | # TODO: torchvision boxes FloatTensor[N, 4] 67 | - masks should be a size [h, w, n] tensor of masks 68 | - boxes should be a size [n, 4] tensor of bbox coords in relative point form 69 | """ 70 | h, w, n = masks.size() 71 | x1, x2 = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, padding, cast=False) 72 | y1, y2 = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, padding, cast=False) 73 | 74 | rows = ( 75 | torch.arange(w, device=masks.device, dtype=x1.dtype) 76 | .view(1, -1, 1) 77 | .expand(h, w, n) 78 | ) 79 | cols = ( 80 | torch.arange(h, device=masks.device, dtype=x1.dtype) 81 | .view(-1, 1, 1) 82 | .expand(h, w, n) 83 | ) 84 | 85 | masks_left = rows >= x1.view(1, 1, -1) 86 | masks_right = rows < x2.view(1, 1, -1) 87 | masks_up = cols >= y1.view(1, 1, -1) 88 | masks_down = cols < y2.view(1, 1, -1) 89 | 90 | crop_mask = masks_left * masks_right * masks_up * masks_down 91 | 92 | return masks * crop_mask.float() 93 | 94 | 95 | class PostprocessOutputs: 96 | def __init__( 97 | self, 98 | num_classes: int = 80, 99 | top_k: int = 10, 100 | nms_threshold: float = 0.3, 101 | score_threshold: float = 0.2, 102 | ) -> None: 103 | """ 104 | Args: 105 | num_classes (int) 106 | top_k 107 | nms_threshold 108 | score_threshold 109 | nms () 110 | """ 111 | self.config = None 112 | self.num_classes = num_classes + 1 113 | self.background_label = 0 114 | self.top_k = top_k 115 | self.nms_threshold = 0.5 116 | self.score_threshold = 0.2 117 | 118 | self.nms = batched_nms 119 | # if self.nms is None: 120 | # self.nms = fast_nms 121 | 122 | def __call__( 123 | self, preds: Dict[str, Tensor], image_sizes: List[Tuple[int]] 124 | ) -> List[Dict[str, Tensor]]: 125 | """ """ 126 | pred_boxes = None 127 | pred_scores = None 128 | default_boxes = None 129 | pred_masks = None 130 | proto_masks = None 131 | if "boxes" in preds: 132 | pred_boxes = preds["boxes"] 133 | if "scores" in preds: 134 | pred_scores = preds["scores"] 135 | if "default_boxes" in preds: 136 | default_boxes = preds["default_boxes"] 137 | if "mask_coefs" in preds: 138 | pred_masks = preds["mask_coefs"] 139 | if "proto_masks" in preds: 140 | proto_masks = preds["proto_masks"] 141 | 142 | batch_size = pred_boxes.size(0) 143 | num_prior_boxes = default_boxes.size(0) 144 | pred_scores = ( 145 | preds["scores"] 146 | .view(batch_size, num_prior_boxes, self.num_classes) 147 | .transpose(2, 1) 148 | .contiguous() 149 | ) 150 | 151 | # test_scores, test_index = torch.max(preds['scores'], dim=1) 152 | 153 | return_list = [] 154 | # print(image_sizes) 155 | for i, image_size in enumerate(image_sizes): 156 | print(i, proto_masks.size()) 157 | decoded_boxes = decode(pred_boxes[i], default_boxes) 158 | results = self._filter_overlaps(i, decoded_boxes, pred_masks, pred_scores) 159 | print(proto_masks[i].dtype) 160 | results["proto_masks"] = proto_masks[i] 161 | 162 | return_list.append(_convert_boxes_and_masks(results, image_size)) 163 | # return_list.append(results) 164 | 165 | for result in return_list: 166 | scores = result["scores"].detach().cpu() 167 | sorted_index = range(len(scores))[: self.top_k] 168 | # sorted_index = scores.argsort(0, descending=True)[:5] 169 | 170 | boxes = result["boxes"][sorted_index] 171 | labels = result["labels"][sorted_index] 172 | scores = scores[sorted_index] 173 | masks = result["masks"][sorted_index] 174 | 175 | result["boxes"] = boxes 176 | result["scores"] = scores 177 | result["labels"] = labels 178 | result["masks"] = masks 179 | 180 | return return_list 181 | 182 | def _filter_overlaps( 183 | self, 184 | batch_index, 185 | decoded_boxes, 186 | pred_masks, 187 | pred_scores, 188 | ) -> Dict[str, Tensor]: 189 | scores = pred_scores[batch_index, 1:, :] 190 | max_scores, labels = torch.max(scores, dim=0) 191 | 192 | keep = max_scores > self.score_threshold # 0.05 193 | scores = scores[:, keep] 194 | boxes = decoded_boxes[keep, :] 195 | labels = labels[keep] 196 | masks = pred_masks[batch_index, keep, :] 197 | 198 | if scores.size(1) == 0: 199 | return None 200 | 201 | # print(max_scores[0], max_class[0]) 202 | print(boxes.size(), scores.size(), keep.size(), labels.size()) 203 | # boxes, masks, labels, scores = self.nms(boxes, scores, keep, iou_threshold=0.3) 204 | 205 | return_dict = defaultdict() 206 | for _class in range(scores.size(0)): 207 | _scores = scores[_class, :] 208 | indices = self.nms(boxes, _scores, labels, iou_threshold=0.3) 209 | 210 | return_dict["boxes"] = boxes[indices] 211 | return_dict["scores"] = _scores[indices] 212 | return_dict["mask_coefs"] = masks[indices] 213 | return_dict["labels"] = labels[indices] 214 | 215 | return dict(return_dict) 216 | 217 | 218 | def _convert_boxes_and_masks(preds, size): 219 | """ 220 | Args: 221 | preds 222 | size (): (h, w) 223 | 224 | """ 225 | h, w = size 226 | boxes = preds["boxes"] 227 | mask_coefs = preds["mask_coefs"] 228 | proto_masks = preds["proto_masks"] 229 | 230 | masks = proto_masks @ mask_coefs.t() 231 | masks = torch.sigmoid(masks) 232 | 233 | masks = crop(masks, boxes) 234 | masks = F.interpolate( 235 | masks.unsqueeze(0), (h, w), mode="bilinear", align_corners=False 236 | ).squeeze(0) 237 | masks.gt_(0.5) # Binarize the masks 238 | 239 | boxes[:, 0], boxes[:, 2] = sanitize_coordinates( 240 | boxes[:, 0], boxes[:, 2], w, cast=False 241 | ) 242 | boxes[:, 1], boxes[:, 3] = sanitize_coordinates( 243 | boxes[:, 1], boxes[:, 3], h, cast=False 244 | ) 245 | boxes = boxes.long() 246 | 247 | preds["boxes"] = boxes 248 | preds["masks"] = masks 249 | 250 | del preds["proto_masks"] 251 | del preds["mask_coefs"] 252 | 253 | return preds 254 | -------------------------------------------------------------------------------- /boda/base_configuration.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | import os 4 | import sys 5 | import time 6 | from typing import Tuple, List, Dict, Any, Union, Sequence 7 | from urllib.parse import urlparse 8 | from urllib.request import urlretrieve 9 | 10 | from .file_utils import DataEncoder 11 | 12 | 13 | class BaseConfig: 14 | """ 15 | Class attributes: 16 | model_type (:obj:`str`): 17 | Args: 18 | name_or_path (:obj:`str`): 19 | """ 20 | 21 | model_name: str = "" 22 | cache_dir = "cache" 23 | 24 | def __init__(self, **kwargs): 25 | self.use_torchscript = kwargs.pop("use_torchscript", False) 26 | # self.use_fp16 = kwargs.pop('use_fp16', False) 27 | self.label_map = kwargs.pop("label_map", {}) 28 | self.num_classes = kwargs.pop("num_classes", 0) 29 | self.min_size = kwargs.pop("min_size", None) 30 | self.max_size = kwargs.pop("max_size", None) 31 | self.preserve_aspect_ratio = kwargs.pop("preserve_aspect_ratio", False) 32 | if not isinstance(self.max_size, Sequence): 33 | if not self.preserve_aspect_ratio: 34 | self.max_size = (self.max_size, self.max_size) 35 | else: 36 | self.max_size = (self.min_size, self.max_size) 37 | 38 | self.num_grids = kwargs.pop("num_grids", 0) 39 | self.top_k = kwargs.pop("top_k", 5) 40 | self.score_thresh = kwargs.pop("score_thresh", 0.15) 41 | 42 | # backbone 43 | self.backbone_name = kwargs.pop("backbone_name", "resnet101") 44 | self.backbone_structure = kwargs.pop("backbone_structure", None) 45 | 46 | # neck 47 | self.neck_name = kwargs.pop("neck_name", "fpn") 48 | self.selected_layers = kwargs.pop("selected_layers", [1, 2, 3]) 49 | self.aspect_ratios = kwargs.pop("aspect_ratios", [1, 1 / 2, 2]) 50 | self.scales = kwargs.pop("scales", [24, 48, 96, 192, 384]) 51 | self.fpn_channels = kwargs.pop("fpn_channels", 256) 52 | 53 | # head 54 | self.anchors = kwargs.pop("anchors", None) 55 | 56 | for k, v in kwargs.items(): 57 | print(k, v) 58 | try: 59 | setattr(k, v) 60 | except AttributeError as e: 61 | print(k, v, e) 62 | 63 | def __repr__(self): 64 | return f"{self.__class__.__name__} {self.to_dict()}" 65 | 66 | def to_json(self): 67 | config_dict = self.to_dict() 68 | return json.dumps(config_dict, indent=4, cls=DataEncoder) 69 | 70 | def to_dict(self): 71 | output = copy.deepcopy(self.__dict__) 72 | if hasattr(self.__class__, "model_name"): 73 | output["model_name"] = self.__class__.model_name 74 | return output 75 | 76 | def save_json(self, path: str): 77 | # if os.path.isfile(path): 78 | # raise AssertionError 79 | 80 | # os.makedirs(path, exist_ok=True) 81 | # config_file = os.path.join(path, CONFIG_NAME) 82 | with open(path, "w", encoding="utf-8") as writer: 83 | writer.write(self.to_json()) 84 | 85 | def update(self, config_dict: Dict[str, Any]): 86 | for key, value in config_dict.items(): 87 | setattr(self, key, value) 88 | 89 | @classmethod 90 | def from_pretrained(cls, name_or_path: str, **kwargs): 91 | config_dict = cls._get_config_dict(name_or_path) 92 | return cls(**config_dict) 93 | 94 | @classmethod 95 | def _dict_from_json_file(self, path): 96 | with open(path, "r", encoding="utf-8") as json_file: 97 | config_dict = json.load(json_file) 98 | return config_dict 99 | 100 | @classmethod 101 | def from_json(cls, json_file: str): 102 | config_dict = cls._dict_from_json_file(json_file) 103 | print(config_dict) 104 | return cls(**config_dict) 105 | 106 | @classmethod 107 | def _get_config_dict(cls, name_or_path, **kwargs): 108 | if os.path.isdir(name_or_path): 109 | # TODO: Thinking idea!! 110 | config_file = os.path.join(name_or_path, "config.json") 111 | elif os.path.isfile(name_or_path): 112 | config_file = name_or_path 113 | else: 114 | url = "https://unerue.synology.me/boda/models/" 115 | config_dir = os.path.join(cls.cache_dir, cls.model_name) 116 | config_file = os.path.join(config_dir, f"{name_or_path}.json") 117 | if not os.path.isfile(config_file): 118 | from urllib import request 119 | 120 | from .file_utils import reporthook 121 | 122 | if not os.path.isdir(config_dir): 123 | os.mkdir(config_dir) 124 | 125 | # file_name = f'{config_file}.json' 126 | # print(f'Downloading {name_or_path}.{extension}...', end=' ') 127 | request.urlretrieve( 128 | f"{url}{cls.model_name}/{name_or_path}.json", 129 | config_file, 130 | reporthook, 131 | ) 132 | print() 133 | 134 | return cls._dict_from_json_file(config_file) 135 | 136 | # if not os.path.isfile(os.path.join(config_dir, f'{name_or_path}.pth')): 137 | # from urllib import request 138 | # from .models.yolact.configuration_yolact import yolact_pretrained_models 139 | 140 | # dd = urlparse(yolact_pretrained_models[name_or_path]) 141 | # request.urlretrieve( 142 | # yolact_pretrained_models[name_or_path].replace('json', 'pth'), 143 | # 'cache/yolact/yolact-base.pth', reporthook) 144 | 145 | # if os.path.isdir(config_dir): 146 | # config_file = os.path.join(config_dir, f'{name_or_path}.json') 147 | # if os.path.isfile(config_file): 148 | # return cls._dict_from_json_file(config_file) 149 | # else: 150 | # config_file = urlparse() 151 | # else: 152 | # os.mkdir(config_dir) 153 | # return 154 | 155 | # config_dict = cls._dict_from_json_file(config_file) 156 | 157 | # return config_dict, kwargs 158 | 159 | # @classmethod 160 | # def from_json(cls, json_file: str): 161 | # with open(path, 'r') as json_file: 162 | # config_dict = json.loads(json_file) 163 | # config_dict = cls.dict_from_json_fiel(json_file) 164 | # return cls(**config_dict) 165 | 166 | # @classmethod 167 | # def from_pretrained(cls, pretrained_model_or_path: str, **kwargs): 168 | # raise NotImplementedError 169 | 170 | # @classmethod 171 | # def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs) -> Tuple[Dict[str, Any], Dict[str, Any]]: 172 | # cache_dir = kwargs.pop('cache_dir', None) 173 | # force_download = kwargs.pop('force_download', False) 174 | # resume_download = kwargs.pop('resume_download', False) 175 | # proxies = kwargs.pop("proxies", None) 176 | # local_files_only = kwargs.pop("local_files_only", False) 177 | # revision = kwargs.pop("revision", None) 178 | 179 | # if os.path.isdir(pretrained_model_name_or_path): 180 | # config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME) 181 | # elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path): 182 | # config_file = pretrained_model_name_or_path 183 | # else: 184 | # config_file = hf_bucket_url( 185 | # pretrained_model_name_or_path, filename=CONFIG_NAME, revision=revision, mirror=None 186 | # ) 187 | 188 | # try: 189 | # # Load from URL or cache if already cached 190 | # resolved_config_file = cached_path( 191 | # config_file, 192 | # cache_dir=cache_dir, 193 | # force_download=force_download, 194 | # proxies=proxies, 195 | # resume_download=resume_download, 196 | # local_files_only=local_files_only, 197 | # ) 198 | # # Load config dict 199 | # config_dict = cls._dict_from_json_file(resolved_config_file) 200 | 201 | # except EnvironmentError as err: 202 | # logger.error(err) 203 | # msg = ( 204 | # f"Can't load config for '{pretrained_model_name_or_path}'. Make sure that:\n\n" 205 | # f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n" 206 | # f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a {CONFIG_NAME} file\n\n" 207 | # ) 208 | # raise EnvironmentError(msg) 209 | 210 | # except json.JSONDecodeError: 211 | # msg = ( 212 | # "Couldn't reach server at '{}' to download configuration file or " 213 | # "configuration file is not a valid JSON file. " 214 | # "Please check network or file content here: {}.".format(config_file, resolved_config_file) 215 | # ) 216 | # raise EnvironmentError(msg) 217 | 218 | # if resolved_config_file == config_file: 219 | # logger.info("loading configuration file {}".format(config_file)) 220 | # else: 221 | # logger.info("loading configuration file {} from cache at {}".format(config_file, resolved_config_file)) 222 | 223 | # return config_dict, kwargs 224 | -------------------------------------------------------------------------------- /boda/models/yolact/inference_yolact.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import Tuple, List, Dict 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | from torch import Tensor 7 | from torchvision.ops import batched_nms 8 | 9 | 10 | def decode(boxes: Tensor, default_boxes: Tensor, variances: List[float] = [0.1, 0.2]): 11 | """Decode locations from predictions using priors to undo 12 | the encoding we did for offset regression at train time. 13 | 14 | https://github.com/Hakuyume/chainer-ssd 15 | 16 | Args: 17 | loc (FloatTensor[N, 4]): location predictions for loc layers, 18 | Shape: [num_priors, 4] 19 | priors (tensor): Prior boxes in center-offset form. 20 | Shape: [num_priors, 4]. 21 | variances: (`List[float]`) Variances of priorboxes 22 | Return: 23 | decoded bounding box predictions 24 | """ 25 | boxes = torch.cat( 26 | ( 27 | default_boxes[:, :2] + boxes[:, :2] * variances[0] * default_boxes[:, 2:], 28 | default_boxes[:, 2:] * torch.exp(boxes[:, 2:] * variances[1]), 29 | ), 30 | dim=1, 31 | ) 32 | boxes[:, :2] -= boxes[:, 2:] / 2 33 | boxes[:, 2:] += boxes[:, :2] 34 | 35 | return boxes 36 | 37 | 38 | def sanitize_coordinates( 39 | _x1, _x2, img_size: int, padding: int = 0, cast: bool = True 40 | ) -> Tuple[Tensor, Tensor]: 41 | """ 42 | Sanitizes the input coordinates so that x1 < x2, x1 != x2, x1 >= 0, and x2 <= image_size. 43 | Also converts from relative to absolute coordinates and casts the results to long tensors. 44 | If cast is false, the result won't be cast to longs. 45 | Warning: this does things in-place behind the scenes so copy if necessary. 46 | """ 47 | _x1 = _x1 * img_size 48 | _x2 = _x2 * img_size 49 | if cast: 50 | _x1 = _x1.long() 51 | _x2 = _x2.long() 52 | x1 = torch.min(_x1, _x2) 53 | x2 = torch.max(_x1, _x2) 54 | x1 = torch.clamp(x1 - padding, min=0) 55 | x2 = torch.clamp(x2 + padding, max=img_size) 56 | 57 | return x1, x2 58 | 59 | 60 | def crop(masks, boxes, padding: int = 1) -> Tensor: 61 | """ 62 | "Crop" predicted masks by zeroing out everything not in the predicted bbox. 63 | Vectorized by Chong (thanks Chong). 64 | Args: 65 | # TODO: torchvision mask rcnn masks UInt8Tensor[N, H, W] 66 | # TODO: torchvision boxes FloatTensor[N, 4] 67 | - masks should be a size [h, w, n] tensor of masks 68 | - boxes should be a size [n, 4] tensor of bbox coords in relative point form 69 | """ 70 | h, w, n = masks.size() 71 | x1, x2 = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, padding, cast=False) 72 | y1, y2 = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, padding, cast=False) 73 | 74 | rows = ( 75 | torch.arange(w, device=masks.device, dtype=x1.dtype) 76 | .view(1, -1, 1) 77 | .expand(h, w, n) 78 | ) 79 | cols = ( 80 | torch.arange(h, device=masks.device, dtype=x1.dtype) 81 | .view(-1, 1, 1) 82 | .expand(h, w, n) 83 | ) 84 | 85 | masks_left = rows >= x1.view(1, 1, -1) 86 | masks_right = rows < x2.view(1, 1, -1) 87 | masks_up = cols >= y1.view(1, 1, -1) 88 | masks_down = cols < y2.view(1, 1, -1) 89 | 90 | crop_mask = masks_left * masks_right * masks_up * masks_down 91 | 92 | return masks * crop_mask.float() 93 | 94 | 95 | class PostprocessYolact: 96 | def __init__( 97 | self, 98 | num_classes: int = 80, 99 | top_k: int = 10, 100 | nms_threshold: float = 0.3, 101 | score_threshold: float = 0.2, 102 | ) -> None: 103 | """ 104 | Args: 105 | num_classes (int) 106 | top_k 107 | nms_threshold 108 | score_threshold 109 | nms () 110 | """ 111 | self.config = None 112 | self.num_classes = num_classes + 1 113 | self.background_label = 0 114 | self.top_k = top_k 115 | self.nms_threshold = 0.5 116 | self.score_threshold = 0.2 117 | 118 | self.nms = batched_nms 119 | # if self.nms is None: 120 | # self.nms = fast_nms 121 | 122 | def __call__( 123 | self, preds: Dict[str, Tensor], image_sizes: List[Tuple[int]] 124 | ) -> List[Dict[str, Tensor]]: 125 | """ 126 | preds (Dict[str, Tensor]): 127 | boxes (FloatTensor[B, N, 4]) 128 | scores (FloatTensor[B, N, 81]) 129 | 130 | mask_coefs (FloatTensor[B, N, 32]) 131 | default_boxes (FloatTensor[N, 4]) 132 | proto_masks (FloatTensor[1, 138, 138, 32]) 133 | """ 134 | pred_boxes = None 135 | pred_scores = None 136 | default_boxes = None 137 | pred_masks = None 138 | proto_masks = None 139 | if "boxes" in preds: 140 | pred_boxes = preds["boxes"] 141 | if "scores" in preds: 142 | pred_scores = preds["scores"] 143 | print("before", pred_scores.size()) 144 | if "default_boxes" in preds: 145 | default_boxes = preds["default_boxes"] 146 | if "mask_coefs" in preds: 147 | pred_masks = preds["mask_coefs"] 148 | if "proto_masks" in preds: 149 | proto_masks = preds["proto_masks"] 150 | 151 | batch_size = pred_boxes.size(0) 152 | num_prior_boxes = default_boxes.size(0) 153 | # pred_scores = preds['scores'].view(batch_size, num_prior_boxes, self.num_classes).transpose(2, 1).contiguous() 154 | 155 | pred_scores = preds["scores"].view( 156 | batch_size, num_prior_boxes, self.num_classes 157 | ) 158 | pred_scores = pred_scores.transpose(2, 1).contiguous() 159 | # test_scores, test_index = torch.max(preds['scores'], dim=1) 160 | 161 | return_list = [] 162 | for i, image_size in enumerate(image_sizes): 163 | decoded_boxes = decode(pred_boxes[i], default_boxes) 164 | results = self._filter_overlaps(i, decoded_boxes, pred_masks, pred_scores) 165 | results["proto_masks"] = proto_masks[i] 166 | 167 | return_list.append(_convert_boxes_and_masks(results, image_size)) 168 | 169 | for result in return_list: 170 | scores = result["scores"].detach() 171 | sorted_index = range(len(scores))[: self.top_k] 172 | # sorted_index = scores.argsort(0, descending=True)[:5] 173 | 174 | boxes = result["boxes"][sorted_index] 175 | labels = result["labels"][sorted_index] 176 | scores = scores[sorted_index] 177 | masks = result["masks"][sorted_index] 178 | print(masks[0].sum()) 179 | 180 | result["boxes"] = boxes 181 | result["scores"] = scores 182 | result["labels"] = labels 183 | result["masks"] = masks 184 | 185 | return return_list 186 | 187 | def _filter_overlaps( 188 | self, batch_index, decoded_boxes, pred_masks, pred_scores 189 | ) -> Dict[str, Tensor]: 190 | """ 191 | batch_index (int) 192 | decoded_boxes () 193 | pred_masks (FloatTensor[B, N, 32]) 194 | pred_scores () 195 | """ 196 | scores = pred_scores[batch_index, 1:, :] 197 | max_scores, labels = torch.max(scores, dim=0) 198 | 199 | keep = max_scores > self.score_threshold # 0.05 200 | scores = scores[:, keep] 201 | boxes = decoded_boxes[keep, :] 202 | labels = labels[keep] 203 | masks = pred_masks[batch_index, keep, :] 204 | 205 | if scores.size(1) == 0: 206 | return None 207 | 208 | return_dict = defaultdict() 209 | for _class in range(scores.size(0)): 210 | _scores = scores[_class, :] 211 | indices = self.nms(boxes, _scores, labels, iou_threshold=0.3) 212 | 213 | return_dict["boxes"] = boxes[indices] 214 | return_dict["scores"] = scores[indices] 215 | return_dict["mask_coefs"] = masks[indices] 216 | return_dict["labels"] = labels[indices] 217 | 218 | return dict(return_dict) 219 | 220 | 221 | def _convert_boxes_and_masks(preds, size): 222 | """ 223 | Args: 224 | preds 225 | boxes (FloatTensor[N, 4]) 226 | mask_coefs (FloatTensor[N, 32]) 227 | proto_masks (FloatTensor[138, 138, 32]) 228 | size (): (h, w) 229 | 230 | """ 231 | h, w = size 232 | boxes = preds["boxes"] 233 | mask_coefs = preds["mask_coefs"] 234 | proto_masks = preds["proto_masks"] 235 | print(boxes.size(), mask_coefs.size(), proto_masks.size()) 236 | 237 | # masks = proto_masks @ mask_coefs.t() 238 | masks = torch.matmul(proto_masks, mask_coefs.t()) 239 | print(mask_coefs) 240 | masks = torch.sigmoid(masks) 241 | print(masks.size()) 242 | print(masks[0].sum().long()) 243 | 244 | masks = crop(masks, boxes) 245 | 246 | masks = masks.permute(2, 0, 1).contiguous() 247 | print(masks.size()) 248 | 249 | masks = F.interpolate( 250 | masks.unsqueeze(0), (h, w), mode="bilinear", align_corners=False 251 | ).squeeze(0) 252 | masks.gt_(0.5) # Binarize the masks 253 | print(masks[0].sum()) 254 | boxes[:, 0], boxes[:, 2] = sanitize_coordinates( 255 | boxes[:, 0], boxes[:, 2], w, cast=False 256 | ) 257 | boxes[:, 1], boxes[:, 3] = sanitize_coordinates( 258 | boxes[:, 1], boxes[:, 3], h, cast=False 259 | ) 260 | boxes = boxes.long() 261 | 262 | preds["boxes"] = boxes 263 | preds["masks"] = masks 264 | 265 | del preds["proto_masks"] 266 | del preds["mask_coefs"] 267 | 268 | return preds 269 | -------------------------------------------------------------------------------- /boda/models/feature_extractor/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Any, Optional, List 2 | 3 | import torch 4 | from torch import Tensor 5 | from torch import nn 6 | from torch.hub import load_state_dict_from_url 7 | 8 | 9 | # __all__ = ['MobileNetV2', 'mobilenet_v2'] 10 | 11 | 12 | model_urls = { 13 | "mobilenet_v2": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth", 14 | } 15 | 16 | 17 | def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int: 18 | """ 19 | This function is taken from the original tf repo. 20 | It ensures that all layers have a channel number that is divisible by 8 21 | It can be seen here: 22 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 23 | """ 24 | if min_value is None: 25 | min_value = divisor 26 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 27 | # Make sure that round down does not go down by more than 10%. 28 | if new_v < 0.9 * v: 29 | new_v += divisor 30 | return new_v 31 | 32 | 33 | class ConvBNActivation(nn.Sequential): 34 | def __init__( 35 | self, 36 | in_planes: int, 37 | out_planes: int, 38 | kernel_size: int = 3, 39 | stride: int = 1, 40 | groups: int = 1, 41 | norm_layer: Optional[Callable[..., nn.Module]] = None, 42 | activation_layer: Optional[Callable[..., nn.Module]] = None, 43 | dilation: int = 1, 44 | ) -> None: 45 | padding = (kernel_size - 1) // 2 * dilation 46 | if norm_layer is None: 47 | norm_layer = nn.BatchNorm2d 48 | if activation_layer is None: 49 | activation_layer = nn.ReLU6 50 | super(ConvBNReLU, self).__init__( 51 | nn.Conv2d( 52 | in_planes, 53 | out_planes, 54 | kernel_size, 55 | stride, 56 | padding, 57 | dilation=dilation, 58 | groups=groups, 59 | bias=False, 60 | ), 61 | norm_layer(out_planes), 62 | activation_layer(inplace=True), 63 | ) 64 | self.out_channels = out_planes 65 | 66 | 67 | # necessary for backwards compatibility 68 | ConvBNReLU = ConvBNActivation 69 | 70 | 71 | class InvertedResidual(nn.Module): 72 | def __init__( 73 | self, 74 | inp: int, 75 | oup: int, 76 | stride: int, 77 | expand_ratio: int, 78 | norm_layer: Optional[Callable[..., nn.Module]] = None, 79 | ) -> None: 80 | super(InvertedResidual, self).__init__() 81 | self.stride = stride 82 | assert stride in [1, 2] 83 | 84 | if norm_layer is None: 85 | norm_layer = nn.BatchNorm2d 86 | 87 | hidden_dim = int(round(inp * expand_ratio)) 88 | self.use_res_connect = self.stride == 1 and inp == oup 89 | 90 | layers: List[nn.Module] = [] 91 | if expand_ratio != 1: 92 | # pw 93 | layers.append( 94 | ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer) 95 | ) 96 | layers.extend( 97 | [ 98 | # dw 99 | ConvBNReLU( 100 | hidden_dim, 101 | hidden_dim, 102 | stride=stride, 103 | groups=hidden_dim, 104 | norm_layer=norm_layer, 105 | ), 106 | # pw-linear 107 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 108 | norm_layer(oup), 109 | ] 110 | ) 111 | self.conv = nn.Sequential(*layers) 112 | self.out_channels = oup 113 | self._is_cn = stride > 1 114 | 115 | def forward(self, x: Tensor) -> Tensor: 116 | if self.use_res_connect: 117 | return x + self.conv(x) 118 | else: 119 | return self.conv(x) 120 | 121 | 122 | class MobileNetV2(nn.Module): 123 | def __init__( 124 | self, 125 | num_classes: int = 1000, 126 | width_mult: float = 1.0, 127 | inverted_residual_setting: Optional[List[List[int]]] = None, 128 | round_nearest: int = 8, 129 | block: Optional[Callable[..., nn.Module]] = None, 130 | norm_layer: Optional[Callable[..., nn.Module]] = None, 131 | ) -> None: 132 | """ 133 | MobileNet V2 main class 134 | Args: 135 | num_classes (int): Number of classes 136 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount 137 | inverted_residual_setting: Network structure 138 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number 139 | Set to 1 to turn off rounding 140 | block: Module specifying inverted residual building block for mobilenet 141 | norm_layer: Module specifying the normalization layer to use 142 | """ 143 | super(MobileNetV2, self).__init__() 144 | 145 | if block is None: 146 | block = InvertedResidual 147 | 148 | if norm_layer is None: 149 | norm_layer = nn.BatchNorm2d 150 | 151 | input_channel = 32 152 | last_channel = 1280 153 | 154 | if inverted_residual_setting is None: 155 | inverted_residual_setting = [ 156 | # t, c, n, s 157 | [1, 16, 1, 1], 158 | [6, 24, 2, 2], 159 | [6, 32, 3, 2], 160 | [6, 64, 4, 2], 161 | [6, 96, 3, 1], 162 | [6, 160, 3, 2], 163 | [6, 320, 1, 1], 164 | ] 165 | 166 | # only check the first element, assuming user knows t,c,n,s are required 167 | if ( 168 | len(inverted_residual_setting) == 0 169 | or len(inverted_residual_setting[0]) != 4 170 | ): 171 | raise ValueError( 172 | "inverted_residual_setting should be non-empty " 173 | "or a 4-element list, got {}".format(inverted_residual_setting) 174 | ) 175 | 176 | # building first layer 177 | input_channel = _make_divisible(input_channel * width_mult, round_nearest) 178 | self.last_channel = _make_divisible( 179 | last_channel * max(1.0, width_mult), round_nearest 180 | ) 181 | features: List[nn.Module] = [ 182 | ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer) 183 | ] 184 | # building inverted residual blocks 185 | for t, c, n, s in inverted_residual_setting: 186 | output_channel = _make_divisible(c * width_mult, round_nearest) 187 | for i in range(n): 188 | stride = s if i == 0 else 1 189 | features.append( 190 | block( 191 | input_channel, 192 | output_channel, 193 | stride, 194 | expand_ratio=t, 195 | norm_layer=norm_layer, 196 | ) 197 | ) 198 | input_channel = output_channel 199 | # building last several layers 200 | features.append( 201 | ConvBNReLU( 202 | input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer 203 | ) 204 | ) 205 | # make it nn.Sequential 206 | self.features = nn.Sequential(*features) 207 | 208 | # building classifier 209 | self.classifier = nn.Sequential( 210 | nn.Dropout(0.2), 211 | nn.Linear(self.last_channel, num_classes), 212 | ) 213 | 214 | # weight initialization 215 | for m in self.modules(): 216 | if isinstance(m, nn.Conv2d): 217 | nn.init.kaiming_normal_(m.weight, mode="fan_out") 218 | if m.bias is not None: 219 | nn.init.zeros_(m.bias) 220 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 221 | nn.init.ones_(m.weight) 222 | nn.init.zeros_(m.bias) 223 | elif isinstance(m, nn.Linear): 224 | nn.init.normal_(m.weight, 0, 0.01) 225 | nn.init.zeros_(m.bias) 226 | 227 | def _forward_impl(self, x: Tensor) -> Tensor: 228 | # This exists since TorchScript doesn't support inheritance, so the superclass method 229 | # (this one) needs to have a name other than `forward` that can be accessed in a subclass 230 | x = self.features(x) 231 | # Cannot use "squeeze" as batch-size can be 1 232 | x = nn.functional.adaptive_avg_pool2d(x, (1, 1)) 233 | x = torch.flatten(x, 1) 234 | x = self.classifier(x) 235 | return x 236 | 237 | def forward(self, x: Tensor) -> Tensor: 238 | return self._forward_impl(x) 239 | 240 | 241 | def mobilenet_v2( 242 | pretrained: bool = False, progress: bool = True, **kwargs: Any 243 | ) -> MobileNetV2: 244 | """ 245 | Constructs a MobileNetV2 architecture from 246 | `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. 247 | Args: 248 | pretrained (bool): If True, returns a model pre-trained on ImageNet 249 | progress (bool): If True, displays a progress bar of the download to stderr 250 | """ 251 | model = MobileNetV2(**kwargs) 252 | if pretrained: 253 | state_dict = load_state_dict_from_url( 254 | model_urls["mobilenet_v2"], progress=progress 255 | ) 256 | model.load_state_dict(state_dict) 257 | return model 258 | -------------------------------------------------------------------------------- /boda/models/solov2/README.md: -------------------------------------------------------------------------------- 1 | # SOLO (Segmenting Objects by Locations) 2 | 3 | ``` 4 | ██████╗ ██████╗ ██╗ ██████╗ 5 | ██╔════╝ ██╔═══██╗██║ ██╔═══██╗ 6 | ╚██████╗ ██║ ██║██║ ██║ ██║██╗ ██╗ 7 | ╚════██╗██║ ██║██║ ██║ ██║ ██╗ ██╔╝ 8 | ██████╔╝╚██████╔╝███████╗╚██████╔╝ ████╔╝ 9 | ╚═════╝ ╚═════╝ ╚══════╝ ╚═════╝ ╚═══╝ 10 | ``` 11 | 12 | ## SOLO Architecture 13 | 14 | ```{bash} 15 | ========================================================================================== 16 | Layer (type:depth-idx) Output Shape Param # 17 | ========================================================================================== 18 | ├─ResNet: 1-1 [-1, 256, 334, 200] -- 19 | | └─Conv2d: 2-1 [-1, 64, 667, 400] 9,408 20 | | └─BatchNorm2d: 2-2 [-1, 64, 667, 400] 128 21 | | └─ReLU: 2-3 [-1, 64, 667, 400] -- 22 | | └─MaxPool2d: 2-4 [-1, 64, 334, 200] -- 23 | | └─ModuleList: 2 [] -- 24 | | | └─Sequential: 3-1 [-1, 256, 334, 200] 215,808 25 | | | └─Sequential: 3-2 [-1, 512, 167, 100] 1,219,584 26 | | | └─Sequential: 3-3 [-1, 1024, 84, 50] 7,098,368 27 | | | └─Sequential: 3-4 [-1, 2048, 42, 25] 14,964,736 28 | ├─Solov1PredictNeck: 1-2 [-1, 256, 334, 200] -- 29 | | └─ModuleList: 2 [] -- 30 | | | └─Conv2d: 3-5 [-1, 256, 42, 25] 524,544 31 | | | └─Conv2d: 3-6 [-1, 256, 84, 50] 262,400 32 | | | └─Conv2d: 3-7 [-1, 256, 167, 100] 131,328 33 | | | └─Conv2d: 3-8 [-1, 256, 334, 200] 65,792 34 | | └─ModuleList: 2 [] -- 35 | | | └─Conv2d: 3-9 [-1, 256, 42, 25] 590,080 36 | | | └─Conv2d: 3-10 [-1, 256, 84, 50] 590,080 37 | | | └─Conv2d: 3-11 [-1, 256, 167, 100] 590,080 38 | | | └─Conv2d: 3-12 [-1, 256, 334, 200] 590,080 39 | | └─ModuleList: 2 [] -- 40 | | | └─Conv2d: 3-13 [-1, 256, 21, 13] 590,080 41 | ├─Solov1PredictHead: 1-3 [2, 1600, 334, 200] -- 42 | | └─ModuleList: 2 [] -- 43 | | | └─Sequential: 3-14 [-1, 256, 167, 100] 595,200 44 | | | └─Sequential: 3-15 [-1, 256, 167, 100] 590,592 45 | | | └─Sequential: 3-16 [-1, 256, 167, 100] 590,592 46 | | | └─Sequential: 3-17 [-1, 256, 167, 100] 590,592 47 | | | └─Sequential: 3-18 [-1, 256, 167, 100] 590,592 48 | | | └─Sequential: 3-19 [-1, 256, 167, 100] 590,592 49 | | | └─Sequential: 3-20 [-1, 256, 167, 100] 590,592 50 | | └─ModuleList: 2 [] -- 51 | | | └─Conv2d: 3-21 [-1, 1600, 334, 200] 411,200 52 | | └─ModuleList: 2 [] -- 53 | | | └─Sequential: 3-22 [-1, 256, 40, 40] 590,592 54 | | | └─Sequential: 3-23 [-1, 256, 40, 40] 590,592 55 | | | └─Sequential: 3-24 [-1, 256, 40, 40] 590,592 56 | | | └─Sequential: 3-25 [-1, 256, 40, 40] 590,592 57 | | | └─Sequential: 3-26 [-1, 256, 40, 40] 590,592 58 | | | └─Sequential: 3-27 [-1, 256, 40, 40] 590,592 59 | | | └─Sequential: 3-28 [-1, 256, 40, 40] 590,592 60 | | └─Conv2d: 2-5 [-1, 79, 40, 40] 182,095 61 | | └─ModuleList: 2 [] -- 62 | | | └─Sequential: 3-29 [-1, 256, 167, 100] (recursive) 63 | | | └─Sequential: 3-30 [-1, 256, 167, 100] (recursive) 64 | | | └─Sequential: 3-31 [-1, 256, 167, 100] (recursive) 65 | | | └─Sequential: 3-32 [-1, 256, 167, 100] (recursive) 66 | | | └─Sequential: 3-33 [-1, 256, 167, 100] (recursive) 67 | | | └─Sequential: 3-34 [-1, 256, 167, 100] (recursive) 68 | | | └─Sequential: 3-35 [-1, 256, 167, 100] (recursive) 69 | | └─ModuleList: 2 [] -- 70 | | | └─Conv2d: 3-36 [-1, 1296, 334, 200] 333,072 71 | | └─ModuleList: 2 [] -- 72 | | | └─Sequential: 3-37 [-1, 256, 36, 36] (recursive) 73 | | | └─Sequential: 3-38 [-1, 256, 36, 36] (recursive) 74 | | | └─Sequential: 3-39 [-1, 256, 36, 36] (recursive) 75 | | | └─Sequential: 3-40 [-1, 256, 36, 36] (recursive) 76 | | | └─Sequential: 3-41 [-1, 256, 36, 36] (recursive) 77 | | | └─Sequential: 3-42 [-1, 256, 36, 36] (recursive) 78 | | | └─Sequential: 3-43 [-1, 256, 36, 36] (recursive) 79 | | └─Conv2d: 2-6 [-1, 79, 36, 36] (recursive) 80 | | └─ModuleList: 2 [] -- 81 | | | └─Sequential: 3-44 [-1, 256, 84, 50] (recursive) 82 | | | └─Sequential: 3-45 [-1, 256, 84, 50] (recursive) 83 | | | └─Sequential: 3-46 [-1, 256, 84, 50] (recursive) 84 | | | └─Sequential: 3-47 [-1, 256, 84, 50] (recursive) 85 | | | └─Sequential: 3-48 [-1, 256, 84, 50] (recursive) 86 | | | └─Sequential: 3-49 [-1, 256, 84, 50] (recursive) 87 | | | └─Sequential: 3-50 [-1, 256, 84, 50] (recursive) 88 | | └─ModuleList: 2 [] -- 89 | | | └─Conv2d: 3-51 [-1, 576, 168, 100] 148,032 90 | | └─ModuleList: 2 [] -- 91 | | | └─Sequential: 3-52 [-1, 256, 24, 24] (recursive) 92 | | | └─Sequential: 3-53 [-1, 256, 24, 24] (recursive) 93 | | | └─Sequential: 3-54 [-1, 256, 24, 24] (recursive) 94 | | | └─Sequential: 3-55 [-1, 256, 24, 24] (recursive) 95 | | | └─Sequential: 3-56 [-1, 256, 24, 24] (recursive) 96 | | | └─Sequential: 3-57 [-1, 256, 24, 24] (recursive) 97 | | | └─Sequential: 3-58 [-1, 256, 24, 24] (recursive) 98 | | └─Conv2d: 2-7 [-1, 79, 24, 24] (recursive) 99 | | └─ModuleList: 2 [] -- 100 | | | └─Sequential: 3-59 [-1, 256, 42, 25] (recursive) 101 | | | └─Sequential: 3-60 [-1, 256, 42, 25] (recursive) 102 | | | └─Sequential: 3-61 [-1, 256, 42, 25] (recursive) 103 | | | └─Sequential: 3-62 [-1, 256, 42, 25] (recursive) 104 | | | └─Sequential: 3-63 [-1, 256, 42, 25] (recursive) 105 | | | └─Sequential: 3-64 [-1, 256, 42, 25] (recursive) 106 | | | └─Sequential: 3-65 [-1, 256, 42, 25] (recursive) 107 | | └─ModuleList: 2 [] -- 108 | | | └─Conv2d: 3-66 [-1, 256, 84, 50] 65,792 109 | | └─ModuleList: 2 [] -- 110 | | | └─Sequential: 3-67 [-1, 256, 16, 16] (recursive) 111 | | | └─Sequential: 3-68 [-1, 256, 16, 16] (recursive) 112 | | | └─Sequential: 3-69 [-1, 256, 16, 16] (recursive) 113 | | | └─Sequential: 3-70 [-1, 256, 16, 16] (recursive) 114 | | | └─Sequential: 3-71 [-1, 256, 16, 16] (recursive) 115 | | | └─Sequential: 3-72 [-1, 256, 16, 16] (recursive) 116 | | | └─Sequential: 3-73 [-1, 256, 16, 16] (recursive) 117 | | └─Conv2d: 2-8 [-1, 79, 16, 16] (recursive) 118 | | └─ModuleList: 2 [] -- 119 | | | └─Sequential: 3-74 [-1, 256, 42, 25] (recursive) 120 | | | └─Sequential: 3-75 [-1, 256, 42, 25] (recursive) 121 | | | └─Sequential: 3-76 [-1, 256, 42, 25] (recursive) 122 | | | └─Sequential: 3-77 [-1, 256, 42, 25] (recursive) 123 | | | └─Sequential: 3-78 [-1, 256, 42, 25] (recursive) 124 | | | └─Sequential: 3-79 [-1, 256, 42, 25] (recursive) 125 | | | └─Sequential: 3-80 [-1, 256, 42, 25] (recursive) 126 | | └─ModuleList: 2 [] -- 127 | | | └─Conv2d: 3-81 [-1, 144, 84, 50] 37,008 128 | | └─ModuleList: 2 [] -- 129 | | | └─Sequential: 3-82 [-1, 256, 12, 12] (recursive) 130 | | | └─Sequential: 3-83 [-1, 256, 12, 12] (recursive) 131 | | | └─Sequential: 3-84 [-1, 256, 12, 12] (recursive) 132 | | | └─Sequential: 3-85 [-1, 256, 12, 12] (recursive) 133 | | | └─Sequential: 3-86 [-1, 256, 12, 12] (recursive) 134 | | | └─Sequential: 3-87 [-1, 256, 12, 12] (recursive) 135 | | | └─Sequential: 3-88 [-1, 256, 12, 12] (recursive) 136 | | └─Conv2d: 2-9 [-1, 79, 12, 12] (recursive) 137 | ========================================================================================== 138 | Total params: 36,892,591 139 | Trainable params: 36,892,591 140 | Non-trainable params: 0 141 | Total mult-adds (G): 296.58 142 | ========================================================================================== 143 | Input size (MB): 12.20 144 | Forward/backward pass size (MB): 2671.69 145 | Params size (MB): 140.73 146 | Estimated Total Size (MB): 2824.63 147 | ========================================================================================== 148 | ``` -------------------------------------------------------------------------------- /boda/models/feature_extractor/efficientnet.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import math 3 | from functools import partial 4 | from typing import Any, Callable, Optional, List 5 | 6 | import torch 7 | from torch import nn, Tensor 8 | from torchvision.ops import StochasticDepth 9 | from torchvision.ops.misc import ConvNormActivation, SqueezeExcitation 10 | 11 | # from torchvision._internally_replaced_utils import load_state_dict_from_url 12 | 13 | 14 | __all__ = [ 15 | "EfficientNet", 16 | "efficientnet_b0", 17 | "efficientnet_b1", 18 | "efficientnet_b2", 19 | "efficientnet_b3", 20 | "efficientnet_b4", 21 | "efficientnet_b5", 22 | "efficientnet_b6", 23 | "efficientnet_b7", 24 | ] 25 | 26 | 27 | model_urls = { 28 | # Weights ported from https://github.com/rwightman/pytorch-image-models/ 29 | "efficientnet_b0": "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth", 30 | "efficientnet_b1": "https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth", 31 | "efficientnet_b2": "https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth", 32 | "efficientnet_b3": "https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth", 33 | "efficientnet_b4": "https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth", 34 | # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/ 35 | "efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth", 36 | "efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth", 37 | "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth", 38 | } 39 | 40 | 41 | def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int: 42 | if min_value is None: 43 | min_value = divisor 44 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 45 | # Make sure that round down does not go down by more than 10%. 46 | if new_v < 0.9 * v: 47 | new_v += divisor 48 | return new_v 49 | 50 | 51 | class MBConvConfig: 52 | # Stores information listed at Table 1 of the EfficientNet paper 53 | def __init__( 54 | self, 55 | expand_ratio: float, 56 | kernel: int, 57 | stride: int, 58 | input_channels: int, 59 | out_channels: int, 60 | num_layers: int, 61 | width_mult: float, 62 | depth_mult: float, 63 | ) -> None: 64 | self.expand_ratio = expand_ratio 65 | self.kernel = kernel 66 | self.stride = stride 67 | self.input_channels = self.adjust_channels(input_channels, width_mult) 68 | self.out_channels = self.adjust_channels(out_channels, width_mult) 69 | self.num_layers = self.adjust_depth(num_layers, depth_mult) 70 | 71 | def __repr__(self) -> str: 72 | s = self.__class__.__name__ + "(" 73 | s += "expand_ratio={expand_ratio}" 74 | s += ", kernel={kernel}" 75 | s += ", stride={stride}" 76 | s += ", input_channels={input_channels}" 77 | s += ", out_channels={out_channels}" 78 | s += ", num_layers={num_layers}" 79 | s += ")" 80 | return s.format(**self.__dict__) 81 | 82 | @staticmethod 83 | def adjust_channels( 84 | channels: int, width_mult: float, min_value: Optional[int] = None 85 | ) -> int: 86 | return _make_divisible(channels * width_mult, 8, min_value) 87 | 88 | @staticmethod 89 | def adjust_depth(num_layers: int, depth_mult: float): 90 | return int(math.ceil(num_layers * depth_mult)) 91 | 92 | 93 | def _efficientnet_conf(width_mult: float, depth_mult: float) -> List[MBConvConfig]: 94 | bneck_conf = partial(MBConvConfig, width_mult=width_mult, depth_mult=depth_mult) 95 | inverted_residual_setting = [ 96 | bneck_conf(1, 3, 1, 32, 16, 1), 97 | bneck_conf(6, 3, 2, 16, 24, 2), 98 | bneck_conf(6, 5, 2, 24, 40, 2), 99 | bneck_conf(6, 3, 2, 40, 80, 3), 100 | bneck_conf(6, 5, 1, 80, 112, 3), 101 | bneck_conf(6, 5, 2, 112, 192, 4), 102 | bneck_conf(6, 3, 1, 192, 320, 1), 103 | ] 104 | 105 | return inverted_residual_setting 106 | 107 | 108 | class MBConv(nn.Module): 109 | def __init__( 110 | self, 111 | cnf: MBConvConfig, 112 | stochastic_depth_prob: float, 113 | norm_layer: Callable[..., nn.Module], 114 | se_layer: Callable[..., nn.Module] = SqueezeExcitation, 115 | ) -> None: 116 | super().__init__() 117 | 118 | if not (1 <= cnf.stride <= 2): 119 | raise ValueError("illegal stride value") 120 | 121 | self.use_res_connect = ( 122 | cnf.stride == 1 and cnf.input_channels == cnf.out_channels 123 | ) 124 | 125 | layers: List[nn.Module] = [] 126 | activation_layer = nn.SiLU 127 | 128 | # expand 129 | expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio) 130 | if expanded_channels != cnf.input_channels: 131 | layers.append( 132 | ConvNormActivation( 133 | cnf.input_channels, 134 | expanded_channels, 135 | kernel_size=1, 136 | norm_layer=norm_layer, 137 | activation_layer=activation_layer, 138 | ) 139 | ) 140 | 141 | # depthwise 142 | layers.append( 143 | ConvNormActivation( 144 | expanded_channels, 145 | expanded_channels, 146 | kernel_size=cnf.kernel, 147 | stride=cnf.stride, 148 | groups=expanded_channels, 149 | norm_layer=norm_layer, 150 | activation_layer=activation_layer, 151 | ) 152 | ) 153 | 154 | # squeeze and excitation 155 | squeeze_channels = max(1, cnf.input_channels // 4) 156 | layers.append( 157 | se_layer( 158 | expanded_channels, 159 | squeeze_channels, 160 | activation=partial(nn.SiLU, inplace=True), 161 | ) 162 | ) 163 | 164 | # project 165 | layers.append( 166 | ConvNormActivation( 167 | expanded_channels, 168 | cnf.out_channels, 169 | kernel_size=1, 170 | norm_layer=norm_layer, 171 | activation_layer=None, 172 | ) 173 | ) 174 | 175 | self.block = nn.Sequential(*layers) 176 | self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row") 177 | self.out_channels = cnf.out_channels 178 | self.stride = cnf.stride 179 | 180 | def forward(self, input: Tensor) -> Tensor: 181 | result = self.block(input) 182 | if self.use_res_connect: 183 | result = self.stochastic_depth(result) 184 | result += input 185 | return result 186 | 187 | 188 | class EfficientNet(nn.Module): 189 | def __init__( 190 | self, 191 | width_mult: float, 192 | depth_mult: float, 193 | stochastic_depth_prob: float = 0.2, 194 | block: Optional[Callable[..., nn.Module]] = None, 195 | norm_layer: Optional[Callable[..., nn.Module]] = None, 196 | **kwargs: Any, 197 | ) -> None: 198 | super().__init__() 199 | self.layers = nn.ModuleList() 200 | self.channels = [] 201 | 202 | if block is None: 203 | block = MBConv 204 | 205 | if norm_layer is None: 206 | norm_layer = nn.BatchNorm2d 207 | 208 | self.inverted_residual_setting = _efficientnet_conf( 209 | width_mult=width_mult, depth_mult=depth_mult 210 | ) 211 | 212 | # building first layer 213 | firstconv_output_channels = self.inverted_residual_setting[0].input_channels 214 | self.firstconv_layer = ConvNormActivation( 215 | 3, 216 | firstconv_output_channels, 217 | kernel_size=3, 218 | stride=2, 219 | norm_layer=norm_layer, 220 | activation_layer=nn.SiLU, 221 | ) 222 | 223 | # building inverted residual blocks 224 | total_stage_blocks = sum( 225 | cnf.num_layers for cnf in self.inverted_residual_setting 226 | ) 227 | stage_block_id = 0 228 | for cnf in self.inverted_residual_setting: 229 | stage: List[nn.Module] = [] 230 | for _ in range(cnf.num_layers): 231 | # copy to avoid modifications. shallow copy is enough 232 | block_cnf = copy.copy(cnf) 233 | 234 | # overwrite info if not the first conv in the stage 235 | if stage: 236 | block_cnf.input_channels = block_cnf.out_channels 237 | block_cnf.stride = 1 238 | 239 | # adjust stochastic depth probability based on the depth of the stage block 240 | sd_prob = ( 241 | stochastic_depth_prob * float(stage_block_id) / total_stage_blocks 242 | ) 243 | 244 | stage.append(block(block_cnf, sd_prob, norm_layer)) 245 | stage_block_id += 1 246 | 247 | # self.channels.append(block_cnf.out_channels) 248 | self.layers.extend(stage) 249 | 250 | for m in self.modules(): 251 | if isinstance(m, nn.Conv2d): 252 | nn.init.kaiming_normal_(m.weight, mode="fan_out") 253 | if m.bias is not None: 254 | nn.init.zeros_(m.bias) 255 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 256 | nn.init.ones_(m.weight) 257 | nn.init.zeros_(m.bias) 258 | 259 | def forward(self, inputs: Tensor) -> Tensor: 260 | x = self.firstconv_layer(inputs) 261 | 262 | outputs = [] 263 | last_x = None 264 | for i, layer in enumerate(self.layers): 265 | x = layer(x) 266 | 267 | if layer.stride == 2: 268 | outputs.append(last_x) 269 | elif i == len(self.layers) - 1: 270 | outputs.append(x) 271 | last_x = x 272 | 273 | del last_x 274 | 275 | return outputs[1:] 276 | 277 | def from_pretrained(self, path): 278 | state_dict = torch.load(path) 279 | # state_dict = load_state_dict_from_url(model_urls[arch], progress=True) 280 | 281 | try: 282 | excepted_keys = [ 283 | key 284 | for key in list(state_dict) 285 | if key.startswith("features.8") or key.startswith("classifier") 286 | ] 287 | for excepted_key in excepted_keys: 288 | state_dict.pop(excepted_key) 289 | except KeyError: 290 | pass 291 | 292 | self.load_state_dict(state_dict, strict=False) 293 | 294 | 295 | def efficientnet_b0() -> EfficientNet: 296 | backbone = EfficientNet(width_mult=1.0, depth_mult=1.0) 297 | return backbone 298 | 299 | 300 | def efficientnet_b1() -> EfficientNet: 301 | backbone = EfficientNet(width_mult=1.0, depth_mult=1.1) 302 | return backbone 303 | 304 | 305 | def efficientnet_b2() -> EfficientNet: 306 | backbone = EfficientNet(width_mult=1.1, depth_mult=1.2) 307 | return backbone 308 | 309 | 310 | def efficientnet_b3() -> EfficientNet: 311 | backbone = EfficientNet(width_mult=1.2, depth_mult=1.4) 312 | return backbone 313 | 314 | 315 | def efficientnet_b4() -> EfficientNet: 316 | backbone = EfficientNet(width_mult=1.4, depth_mult=1.8) 317 | return backbone 318 | 319 | 320 | def efficientnet_b5() -> EfficientNet: 321 | backbone = EfficientNet( 322 | width_mult=1.6, 323 | depth_mult=2.2, 324 | norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), 325 | ) 326 | return backbone 327 | 328 | 329 | def efficientnet_b6() -> EfficientNet: 330 | backbone = EfficientNet( 331 | width_mult=1.8, 332 | depth_mult=2.6, 333 | norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), 334 | ) 335 | return backbone 336 | 337 | 338 | def efficientnet_b7() -> EfficientNet: 339 | backbone = EfficientNet( 340 | width_mult=2.0, 341 | depth_mult=3.1, 342 | norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), 343 | ) 344 | return backbone 345 | -------------------------------------------------------------------------------- /boda/lib/torchsummary/torchsummary.py: -------------------------------------------------------------------------------- 1 | """ torchsummary.py """ 2 | from typing import ( 3 | Any, 4 | Dict, 5 | Iterable, 6 | Iterator, 7 | List, 8 | Mapping, 9 | Optional, 10 | Sequence, 11 | Tuple, 12 | Union, 13 | ) 14 | 15 | import torch 16 | import torch.nn as nn 17 | from torch.utils.hooks import RemovableHandle 18 | 19 | from .formatting import FormattingOptions, Verbosity 20 | from .layer_info import LayerInfo 21 | from .model_statistics import CORRECTED_INPUT_SIZE_TYPE, HEADER_TITLES, ModelStatistics 22 | 23 | # Some modules do the computation themselves using parameters 24 | # or the parameters of children. Treat these as layers. 25 | LAYER_MODULES = (torch.nn.MultiheadAttention,) 26 | INPUT_SIZE_TYPE = Sequence[Union[int, Sequence[Any], torch.Size]] 27 | INPUT_DATA_TYPE = Optional[ 28 | Union[torch.Tensor, torch.Size, Sequence[torch.Tensor], INPUT_SIZE_TYPE] 29 | ] 30 | DEFAULT_COLUMN_NAMES = ("output_size", "num_params") 31 | 32 | 33 | def summary( 34 | model: nn.Module, 35 | input_data: INPUT_DATA_TYPE = None, 36 | *args: Any, 37 | batch_dim: Optional[int] = 0, 38 | branching: bool = True, 39 | col_names: Optional[Iterable[str]] = None, 40 | col_width: int = 25, 41 | depth: int = 3, 42 | device: Optional[torch.device] = None, 43 | dtypes: Optional[List[torch.dtype]] = None, 44 | verbose: int = 1, 45 | **kwargs: Any, 46 | ) -> ModelStatistics: 47 | """ 48 | Summarize the given PyTorch model. Summarized information includes: 49 | 1) Layer names, 50 | 2) input/output shapes, 51 | 3) kernel shape, 52 | 4) # of parameters, 53 | 5) # of operations (Mult-Adds) 54 | 55 | Args: 56 | model (nn.Module): 57 | PyTorch model to summarize 58 | 59 | input_data (Sequence of Sizes or Tensors): 60 | Example input tensor of the model (dtypes inferred from model input). 61 | - OR - 62 | Shape of input data as a List/Tuple/torch.Size 63 | (dtypes must match model input, default is FloatTensors). 64 | You should NOT include batch size in the tuple. 65 | - OR - 66 | If input_data is not provided, no forward pass through the network is 67 | performed, and the provided model information is limited to layer names. 68 | Default: None 69 | 70 | batch_dim (int): 71 | Batch_dimension of input data. If batch_dim is None, the input data 72 | is assumed to contain the batch dimension. 73 | WARNING: in a future version, the default will change to None. 74 | Default: 0 75 | 76 | branching (bool): 77 | Whether to use the branching layout for the printed output. 78 | Default: True 79 | 80 | col_names (Iterable[str]): 81 | Specify which columns to show in the output. Currently supported: 82 | ("input_size", "output_size", "num_params", "kernel_size", "mult_adds") 83 | If input_data is not provided, only "num_params" is used. 84 | Default: ("output_size", "num_params") 85 | 86 | col_width (int): 87 | Width of each column. 88 | Default: 25 89 | 90 | depth (int): 91 | Number of nested layers to traverse (e.g. Sequentials). 92 | Default: 3 93 | 94 | device (torch.Device): 95 | Uses this torch device for model and input_data. 96 | If not specified, uses result of torch.cuda.is_available(). 97 | Default: None 98 | 99 | dtypes (List[torch.dtype]): 100 | For multiple inputs, specify the size of both inputs, and 101 | also specify the types of each parameter here. 102 | Default: None 103 | 104 | verbose (int): 105 | 0 (quiet): No output 106 | 1 (default): Print model summary 107 | 2 (verbose): Show weight and bias layers in full detail 108 | Default: 1 109 | 110 | *args, **kwargs: 111 | Other arguments used in `model.forward` function. 112 | 113 | Return: 114 | ModelStatistics object 115 | See torchsummary/model_statistics.py for more information. 116 | """ 117 | if col_names is None: 118 | col_names = ("num_params",) if input_data is None else DEFAULT_COLUMN_NAMES 119 | 120 | validate_user_params(input_data, col_names, verbose) 121 | input_size: CORRECTED_INPUT_SIZE_TYPE = [] 122 | summary_list: List[LayerInfo] = [] 123 | hooks: Optional[List[RemovableHandle]] = None if input_data is None else [] 124 | idx: Dict[int, int] = {} 125 | apply_hooks(model, model, batch_dim, depth, summary_list, idx, hooks) 126 | 127 | if input_data is not None: 128 | if device is None: 129 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 130 | 131 | x, input_size = process_input_data(input_data, batch_dim, device, dtypes) 132 | args, kwargs = set_device(args, device), set_device(kwargs, device) 133 | try: 134 | with torch.no_grad(): 135 | _ = model.to(device)(*x, *args, **kwargs) # type: ignore[misc] 136 | except Exception as e: 137 | executed_layers = [layer for layer in summary_list if layer.executed] 138 | raise RuntimeError( 139 | "Failed to run torchsummary. See above stack traces for more details. " 140 | "Executed layers up to: {}".format(executed_layers) 141 | ) from e 142 | finally: 143 | if hooks is not None: 144 | for hook in hooks: 145 | hook.remove() 146 | 147 | formatting = FormattingOptions(branching, depth, verbose, col_names, col_width) 148 | formatting.set_layer_name_width(summary_list) 149 | results = ModelStatistics(summary_list, input_size, formatting) 150 | if verbose > Verbosity.QUIET.value: 151 | print(results) 152 | return results 153 | 154 | 155 | def validate_user_params( 156 | input_data: INPUT_DATA_TYPE, col_names: Iterable[str], verbose: int 157 | ) -> None: 158 | """Raise exceptions if the user's input is invalid.""" 159 | if verbose not in (0, 1, 2): 160 | raise ValueError( 161 | "Verbose must be either 0 (quiet), 1 (default), or 2 (verbose)." 162 | ) 163 | 164 | for col in col_names: 165 | if col not in HEADER_TITLES.keys(): 166 | raise ValueError(f"Column {col} is not a valid column name.") 167 | if input_data is None and col not in ("num_params", "kernel_size"): 168 | raise ValueError(f"You must pass input_data in order to use column {col}") 169 | 170 | 171 | def set_device(data: Any, device: torch.device) -> Any: 172 | """Sets device for all input types and collections of input types.""" 173 | if torch.is_tensor(data): 174 | return data.to(device, non_blocking=True) 175 | 176 | # Recursively apply to collection items 177 | elem_type = type(data) 178 | if isinstance(data, Mapping): 179 | return elem_type({k: set_device(v, device) for k, v in data.items()}) 180 | if isinstance(data, tuple) and hasattr(data, "_fields"): # Named tuple 181 | return elem_type(*(set_device(d, device) for d in data)) 182 | if isinstance(data, Iterable) and not isinstance(data, str): 183 | return elem_type([set_device(d, device) for d in data]) 184 | # Data is neither a tensor nor a collection 185 | return data 186 | 187 | 188 | def process_input_data( 189 | input_data: INPUT_DATA_TYPE, 190 | batch_dim: Optional[int], 191 | device: torch.device, 192 | dtypes: Optional[List[torch.dtype]], 193 | ) -> Tuple[INPUT_DATA_TYPE, CORRECTED_INPUT_SIZE_TYPE]: 194 | """Create sample input data and the corrected input size.""" 195 | if isinstance(input_data, torch.Tensor): 196 | input_size = get_correct_input_sizes(input_data.size()) 197 | x = [input_data.to(device)] 198 | 199 | elif isinstance(input_data, (list, tuple)): 200 | if all(isinstance(data, torch.Tensor) for data in input_data): 201 | input_sizes = [ 202 | data.size() for data in input_data # type: ignore[union-attr] 203 | ] 204 | input_size = get_correct_input_sizes(input_sizes) 205 | x = set_device(input_data, device) 206 | else: 207 | if dtypes is None: 208 | dtypes = [torch.float] * len(input_data) 209 | input_size = get_correct_input_sizes(input_data) 210 | x = get_input_tensor(input_size, batch_dim, dtypes, device) 211 | 212 | else: 213 | raise TypeError( 214 | "Input type is not recognized. Please ensure input_data is valid.\n" 215 | "For multiple inputs to the network, ensure input_data passed in is " 216 | "a sequence of tensors or a list of tuple sizes. If you are having " 217 | "trouble here, please submit a GitHub issue." 218 | ) 219 | 220 | return x, input_size 221 | 222 | 223 | def get_input_tensor( 224 | input_size: CORRECTED_INPUT_SIZE_TYPE, 225 | batch_dim: Optional[int], 226 | dtypes: List[torch.dtype], 227 | device: torch.device, 228 | ) -> List[torch.Tensor]: 229 | """Get input_tensor with batch size 2 for use in model.forward()""" 230 | x = [] 231 | for size, dtype in zip(input_size, dtypes): 232 | # add batch_size of 2 for BatchNorm 233 | input_tensor = torch.rand(*size) 234 | if batch_dim is not None: 235 | input_tensor = input_tensor.unsqueeze(dim=batch_dim) 236 | input_tensor = torch.cat([input_tensor] * 2, dim=batch_dim) 237 | x.append(input_tensor.to(device).type(dtype)) 238 | return x 239 | 240 | 241 | def get_correct_input_sizes(input_size: INPUT_SIZE_TYPE) -> CORRECTED_INPUT_SIZE_TYPE: 242 | """ 243 | Convert input_size to the correct form, which is a list of tuples. 244 | Also handles multiple inputs to the network. 245 | """ 246 | 247 | def flatten(nested_array: INPUT_SIZE_TYPE) -> Iterator[Any]: 248 | """Flattens a nested array.""" 249 | for item in nested_array: 250 | if isinstance(item, (list, tuple)): 251 | yield from flatten(item) 252 | else: 253 | yield item 254 | 255 | if not input_size or any(size <= 0 for size in flatten(input_size)): 256 | raise ValueError("Input_data is invalid, or negative size found in input_data.") 257 | 258 | if isinstance(input_size, list) and isinstance(input_size[0], int): 259 | return [tuple(input_size)] 260 | if isinstance(input_size, list): 261 | return input_size 262 | if isinstance(input_size, tuple) and isinstance(input_size[0], tuple): 263 | return list(input_size) 264 | return [input_size] 265 | 266 | 267 | def apply_hooks( 268 | module: nn.Module, 269 | orig_model: nn.Module, 270 | batch_dim: Optional[int], 271 | depth: int, 272 | summary_list: List[LayerInfo], 273 | idx: Dict[int, int], 274 | hooks: Optional[List[RemovableHandle]], 275 | curr_depth: int = 0, 276 | parent_info: Optional[LayerInfo] = None, 277 | ) -> None: 278 | """ 279 | If input_data is provided, recursively adds hooks to all layers of the model. 280 | Else, fills summary_list with layer info without computing a 281 | forward pass through the network. 282 | """ 283 | # Fallback is used if the layer's hook is never called, in ModuleLists, for example. 284 | info = LayerInfo(module, curr_depth, None, parent_info) 285 | 286 | def pre_hook(module: nn.Module, inputs: Any) -> None: 287 | """Create a LayerInfo object to aggregate information about that layer.""" 288 | del inputs 289 | nonlocal info 290 | idx[curr_depth] = idx.get(curr_depth, 0) + 1 291 | info = LayerInfo(module, curr_depth, idx[curr_depth], parent_info) 292 | info.check_recursive(summary_list) 293 | summary_list.append(info) 294 | 295 | def hook(module: nn.Module, inputs: Any, outputs: Any) -> None: 296 | """Update LayerInfo after forward pass.""" 297 | del module 298 | info.input_size = info.calculate_size(inputs, batch_dim) 299 | info.output_size = info.calculate_size(outputs, batch_dim) 300 | info.calculate_macs() 301 | info.executed = True 302 | 303 | submodules = [m for m in module.modules() if m is not orig_model] 304 | if module != orig_model or isinstance(module, LAYER_MODULES) or not submodules: 305 | if hooks is None: 306 | pre_hook(module, None) 307 | else: 308 | hooks.append(module.register_forward_pre_hook(pre_hook)) 309 | hooks.append(module.register_forward_hook(hook)) 310 | 311 | if curr_depth <= depth: 312 | for child in module.children(): 313 | apply_hooks( 314 | child, 315 | orig_model, 316 | batch_dim, 317 | depth, 318 | summary_list, 319 | idx, 320 | hooks, 321 | curr_depth + 1, 322 | info, 323 | ) 324 | --------------------------------------------------------------------------------