├── mypy.ini
├── CHANGELOG.md
├── pytest.ini
├── boda
├── lib
│ ├── __init__.py
│ ├── torchinfo
│ │ ├── __init__.py
│ │ ├── formatting.py
│ │ ├── layer_info.py
│ │ └── model_statistics.py
│ └── torchsummary
│ │ ├── __init__.py
│ │ ├── formatting.py
│ │ ├── layer_info.py
│ │ ├── model_statistics.py
│ │ └── torchsummary.py
├── ops
│ ├── __init__.py
│ └── anchor_generators.py
├── models
│ ├── centermask
│ │ └── __init__.py
│ ├── feature_extractor
│ │ ├── __init__.py
│ │ ├── vggnet.py
│ │ ├── pafpn.py
│ │ ├── fpn.py
│ │ ├── resnet.py
│ │ ├── mobilenetv2.py
│ │ └── efficientnet.py
│ ├── __init__.py
│ ├── yolox
│ │ ├── __init__.py
│ │ ├── configuration_yolox.py
│ │ ├── loss_yolox.py
│ │ └── utils.py
│ ├── ssd
│ │ ├── __init__.py
│ │ ├── configuration_ssd.py
│ │ ├── README.md
│ │ ├── inference_ssd.py
│ │ └── loss_ssd.py
│ ├── solov2
│ │ ├── __init__.py
│ │ ├── configuration_solov1.py
│ │ ├── architecture_decoupled_solov1.py
│ │ ├── inference_solov1.py
│ │ └── README.md
│ └── yolact
│ │ ├── __init__.py
│ │ ├── configuration_yolact.py
│ │ ├── README.md
│ │ └── inference_yolact.py
├── __init__.py
├── setup.py
├── README.md
├── file_utils.py
├── custom_activation.py
├── custom_modules.py
├── postprocessing.py
└── base_configuration.py
├── benchmarks
└── benchmark_yolact.py
├── .flake8
├── boda.png
├── docs
├── requirements.txt
├── source
│ ├── index.rst
│ └── conf.py
├── Makefile
└── make.bat
├── environment.yml
├── run_test_ssd.py
├── CONTRIBUTING.md
├── .gitignore
├── calc_flops.py
├── setup.py
├── README.md
└── run_test.py
/mypy.ini:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boda/lib/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boda/ops/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/benchmarks/benchmark_yolact.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boda/models/centermask/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 100
3 |
--------------------------------------------------------------------------------
/boda.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unerue/boda/HEAD/boda.png
--------------------------------------------------------------------------------
/boda/models/feature_extractor/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import FeaturePyramidNetworks
2 | from .vggnet import *
3 | from .resnet import *
4 |
--------------------------------------------------------------------------------
/boda/models/__init__.py:
--------------------------------------------------------------------------------
1 | # from .feature_extractor import *
2 | from .ssd import *
3 | from .yolact import *
4 |
5 | # from .yolox import *
6 |
--------------------------------------------------------------------------------
/boda/lib/torchinfo/__init__.py:
--------------------------------------------------------------------------------
1 | """ torchinfo """
2 | from .model_statistics import ModelStatistics
3 | from .torchinfo import summary
4 |
5 | __all__ = ("ModelStatistics", "summary")
6 |
--------------------------------------------------------------------------------
/boda/lib/torchsummary/__init__.py:
--------------------------------------------------------------------------------
1 | """ torchsummary """
2 | from .model_statistics import ModelStatistics
3 | from .torchsummary import summary
4 |
5 | __all__ = ("ModelStatistics", "summary")
6 |
--------------------------------------------------------------------------------
/boda/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import *
2 |
3 |
4 | # __all__ = [
5 | # 'SsdConfig', 'SsdModel', 'SsdLoss',
6 | # 'YolactConfig', 'YolactModel', 'YolactLoss',
7 | # 'Solov1Config', 'Solov1Model',
8 | # ]
9 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | numpy
3 | sphinx-copybutton>=0.3.1
4 | sphinx-gallery>=0.9.0
5 | sphinx==3.5.4
6 | tabulate
7 | Jinja2<3.1.*
8 | -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
--------------------------------------------------------------------------------
/boda/models/yolox/__init__.py:
--------------------------------------------------------------------------------
1 | # from .configuration_yolox import YoloXConfig
2 | # from .architecture_yolox import YoloXModel
3 | # # from .loss_yolox import Yo
4 |
5 |
6 | # __all__ = [
7 | # 'YoloXConfig', 'YoloXModel',
8 | # ]
9 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: boda
2 | channels:
3 | - conda-forge
4 | - pytorch
5 | - anaconda
6 | dependencies:
7 | - python=3.7
8 | - pytorch=1.7
9 | - torchvision=0.8
10 | - cudatoolkit=10.2
11 | - numpy
12 | - cython
13 | # - pip:
14 | # - pycocotools
15 | # - opencv-python
--------------------------------------------------------------------------------
/boda/models/ssd/__init__.py:
--------------------------------------------------------------------------------
1 | from .architecture_ssd import SsdPredictNeck, SsdPredictHead, SsdModel
2 | from .configuration_ssd import SsdConfig
3 |
4 | # from .loss_ssd import SsdLoss
5 |
6 |
7 | __all__ = [
8 | "SsdConfig",
9 | "SsdPredictNeck",
10 | "SsdPredictHead",
11 | "SsdModel",
12 | ]
13 |
--------------------------------------------------------------------------------
/run_test_ssd.py:
--------------------------------------------------------------------------------
1 | from boda.models import SsdModel, SsdConfig
2 |
3 |
4 | from boda.lib.torchsummary import summary
5 | import torch
6 |
7 | config = SsdConfig(num_classes=80)
8 | model = SsdModel(config).to('cuda')
9 | model.eval()
10 | print(model)
11 | # print(summary(model, input_size=(16, 3, 550, 550), verbose=0))
12 | print(summary(model, input_data=(3, 550, 550), verbose=0))
--------------------------------------------------------------------------------
/boda/models/solov2/__init__.py:
--------------------------------------------------------------------------------
1 | # from .configuration_solov1 import Solov1Config
2 | # from .architecture_solov1 import Solov1PredictNeck, Solov1PredictHead, Solov1Model
3 | # # from .architecture_decoupled_solov1 import DecoupledSolov1Model
4 | # from .loss_solov1 import Solov1Loss
5 |
6 |
7 | # __all__ = [
8 | # 'Solov1Loss', 'Solov1Config', 'Solov1PredictNeck',
9 | # 'Solov1PredictHead', 'Solov1Model', 'Solov1Loss'
10 | # ]
11 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to boda
2 | ---
3 |
4 | ## Code formatting and typing
5 |
6 | ### Formatting
7 |
8 | To format your code, install `ufmt`
9 |
10 | ```bash
11 | pip install ufmt==1.3.2 black==21.9b0 usort==0.6.4
12 | ```
13 |
14 | ```bash
15 | ufmt format boda
16 | ```
17 |
18 | ### Type annotations
19 |
20 | ```bash
21 | mypy --config-file mypy.ini
22 | ```
23 |
24 | ## Unit tests
25 |
26 | ```bash
27 | pytest test -vvv
28 | ```
29 |
30 | ## Documentation
31 | ```bash
32 | cd docs
33 | make html-noplot
34 | ```
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. boda documentation master file, created by
2 | sphinx-quickstart on Mon Jul 11 19:55:45 2022.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to boda's documentation!
7 | ================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 | :caption: Contents:
12 |
13 |
14 |
15 | Indices and tables
16 | ==================
17 |
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/boda/setup.py:
--------------------------------------------------------------------------------
1 | # from Cython.Build import cythonize
2 | # from numpy.distutils.misc_util import Configuration
3 |
4 |
5 | # def cythonize_extensions(top_path, config):
6 | # config.ext_modules = cythonize(
7 | # config.ext_modules,
8 | # compiler_directives={'language_level': '3'})
9 |
10 |
11 | # def configuration(parent_package='', top_path=None):
12 | # config = Configuration('boda', parent_package, top_path)
13 | # config.add_subpackage('models')
14 | # config.add_subpackage('utils')
15 | # config.add_subpackage('lib')
16 | # cythonize_extensions(top_path, config)
17 |
18 | # return config
19 |
20 |
21 | # if __name__ == '__main__':
22 | # from numpy.distutils.core import setup
23 |
24 | # setup(**configuration(top_path='').todict())
25 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | echo.
16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | echo.installed, then set the SPHINXBUILD environment variable to point
18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | echo.may add the Sphinx directory to PATH.
20 | echo.
21 | echo.If you don't have Sphinx installed, grab it from
22 | echo.https://www.sphinx-doc.org/
23 | exit /b 1
24 | )
25 |
26 | if "%1" == "" goto help
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | .vscode
3 | .DS_Store
4 | __pycache__
5 | boda.egg-info
6 | build
7 | dist
8 | tests
9 | misc
10 | doc
11 | boda/models/cascade_mask_rcnn/
12 | boda/models/efficientdet/
13 | boda/models/faster_rcnn/
14 | boda/models/fcos/
15 | boda/models/keypoint_rcnn/
16 | boda/models/mask_rcnn/
17 | boda/models/polarmask/
18 | # boda/models/solov1/
19 | boda/models/yolact_edge/
20 | boda/models/yolov4/
21 |
22 | # boda/ops/
23 |
24 | benchmarks/data/
25 | benchmarks/*pth
26 | benchmarks/samples/
27 | benchmarks/dataset/
28 | benchmarks/benchmark_yolov1.py
29 | benchmarks/benchmark_backbone.py
30 |
31 | boda/dev/
32 | dev/
33 | old/
34 | cache/
35 |
36 | run.py
37 | test_ssd.py
38 | test_yolov1.py
39 | test_yolact.py
40 | test_solov1.py
41 | test_fcos.py
42 | test_backbone.py
43 | test_centermask.py
44 | eval_yolact.py
45 | test_faster_rcnn.py
46 | test_mask_rcnn.py
47 | test_keypoint_rcnn.py
48 |
49 | logo.pptx
50 | *.pth
51 | *.zip
52 | *.jpg
--------------------------------------------------------------------------------
/boda/models/yolox/configuration_yolox.py:
--------------------------------------------------------------------------------
1 | # from typing import List
2 |
3 | # from ...base_configuration import BaseConfig
4 |
5 |
6 | # class YoloXConfig(BaseConfig):
7 | # model_name = 'yolox'
8 |
9 | # def __init__(
10 | # self,
11 | # num_classes: int = 80,
12 | # image_size: int = 640,
13 | # depth: float = 1.0,
14 | # width: float = 1.0,
15 | # act: str = 'silu',
16 | # selected_backbone_layers: List[int] = [2, 3, 4],
17 | # depthwise: bool = False,
18 | # test_conf: float = 0.01,
19 | # nmsthre: float = 0.65,
20 | # ):
21 | # super().__init__(
22 | # num_classes=num_classes,
23 | # max_size=image_size,
24 | # )
25 | # self.depth = depth
26 | # self.width = width
27 | # self.act = act
28 |
29 | # self.selected_backbone_layers = selected_backbone_layers
30 |
31 | # self.depthwise = depthwise
32 |
33 | # self.test_conf = test_conf
34 | # self.nmsthre = nmsthre
35 |
--------------------------------------------------------------------------------
/boda/README.md:
--------------------------------------------------------------------------------
1 | # Models
2 |
3 | ## Library Structure
4 | ```{bash}
5 | .
6 | +-- models
7 | | +-- model
8 | | | +-- configuration_model.py
9 | | | +-- architecture_model.py
10 | | | +-- loss_model.py
11 | | | +-- inference_model.py
12 | | | +-- README.md
13 | | +-- backbone.py
14 | | +-- neck.py
15 | +-- utils
16 | | +-- box.py
17 | | +-- mask.py
18 | | +-- nms.py
19 | +-- lib
20 | | +-- torchsummary
21 | +-- base_architecture.py
22 | +-- base_configuration.py
23 | +-- modules.py
24 | +-- activation.py
25 | +-- setup.py
26 | ```
27 |
28 | ## Abstract Structure
29 |
30 | ```{python}
31 | class Backbone(nn.Module):
32 | def __init__(self):
33 | super().__init__()
34 |
35 | def forward(self):
36 | return
37 |
38 |
39 | class Neck(nn.Module):
40 | def __init__(self):
41 | super().__init__()
42 |
43 | def _make_layer(self):
44 |
45 | def forward(self):
46 | return
47 |
48 |
49 | class Head(nn.Module):
50 |
51 |
52 | class Pretrained:
53 |
54 |
55 | class Model()
56 | ```
--------------------------------------------------------------------------------
/calc_flops.py:
--------------------------------------------------------------------------------
1 | from boda.models import YolactConfig, YolactModel
2 | # from boda.lib.torchsummary import summary
3 | from torchinfo import summary
4 | from boda.models.backbone_mobilenetv3 import mobilenet_v3_large, mobilenet_v3_small
5 | from boda.models.backbone_resnet import resnet101, resnet18, resnet34, resnet50
6 | # from torchvision.models import resnet50, mobilenet_v3_large, resnet101
7 |
8 |
9 | config = YolactConfig(num_classes=90)
10 | # model = YolactModel(config, backbone=mobilenet_v3_small(), selected_backbone_layers=[3, 8, 11]).to('cuda:0')
11 | # model = YolactModel(config, backbone=mobilenet_v3_large(), selected_backbone_layers=[6, 12, 15]).to('cuda:0')
12 | # model = YolactModel(config, backbone=resnet50(), selected_backbone_layers=[1, 2, 3]).to('cuda:0')
13 | # print(summary(model, (1, 3, 550, 550), verbose=0))
14 |
15 | # from boda.resnet import resnet101
16 | model = mobilenet_v3_small().to('cuda')
17 | # model = mobilenet_v3_large().to('cuda')
18 | # print(summary(model, input_data=(3, 550, 550), depth=2, verbose=0))
19 |
20 | # model = resnet101().to('cuda')
21 | # model = resnet50().to('cuda')
22 | print(summary(model, (1, 3, 224, 224), depth=3, verbose=0))
23 |
--------------------------------------------------------------------------------
/boda/models/ssd/configuration_ssd.py:
--------------------------------------------------------------------------------
1 | from ...base_configuration import BaseConfig
2 |
3 |
4 | SSD_PRETRAINED_CONFIG = {
5 | "ssd300": None,
6 | "ssd512": None,
7 | }
8 |
9 |
10 | class SsdConfig(BaseConfig):
11 | """Configuration for SSD
12 |
13 | Arguments:
14 | max_size ():
15 |
16 | """
17 |
18 | def __init__(
19 | self,
20 | num_classes: int = 20,
21 | max_size: int = 300,
22 | preserve_aspect_ratio: bool = False,
23 | selected_layers: int = -1,
24 | num_grids: int = 7,
25 | **kwargs
26 | ) -> None:
27 | super().__init__(max_size=max_size, **kwargs)
28 | self.selected_layers = [3, 4]
29 | self.boxes = [4, 6, 6, 6, 4, 4]
30 | self.num_classes = num_classes
31 | self.backbone_name = "vgg16"
32 | self.aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
33 | self.variance = [0.1, 0.2]
34 | self.min_sizes = [30, 60, 111, 162, 213, 264]
35 | self.max_sizes = [60, 111, 162, 213, 264, 315]
36 | self.steps = [8, 16, 32, 64, 100, 300]
37 | self.clip = True
38 | # self.grid_sizes = [38, 19, 10, 5, 3, 1]
39 |
--------------------------------------------------------------------------------
/boda/models/yolox/loss_yolox.py:
--------------------------------------------------------------------------------
1 | # import torch
2 | # from torch import nn
3 |
4 |
5 | # class IOUloss(nn.Module):
6 | # def __init__(self, reduction="none", loss_type="iou"):
7 | # super(IOUloss, self).__init__()
8 | # self.reduction = reduction
9 | # self.loss_type = loss_type
10 |
11 | # def forward(self, pred, target):
12 | # assert pred.shape[0] == target.shape[0]
13 |
14 | # pred = pred.view(-1, 4)
15 | # target = target.view(-1, 4)
16 | # tl = torch.max(
17 | # (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
18 | # )
19 | # br = torch.min(
20 | # (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
21 | # )
22 |
23 | # area_p = torch.prod(pred[:, 2:], 1)
24 | # area_g = torch.prod(target[:, 2:], 1)
25 |
26 | # en = (tl < br).type(tl.type()).prod(dim=1)
27 | # area_i = torch.prod(br - tl, 1) * en
28 | # area_u = area_p + area_g - area_i
29 | # iou = (area_i) / (area_u + 1e-16)
30 |
31 | # if self.loss_type == "iou":
32 | # loss = 1 - iou ** 2
33 | # elif self.loss_type == "giou":
34 | # c_tl = torch.min(
35 | # (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
36 | # )
37 | # c_br = torch.max(
38 | # (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
39 | # )
40 | # area_c = torch.prod(c_br - c_tl, 1)
41 | # giou = iou - (area_c - area_u) / area_c.clamp(1e-16)
42 | # loss = 1 - giou.clamp(min=-1.0, max=1.0)
43 |
44 | # if self.reduction == "mean":
45 | # loss = loss.mean()
46 | # elif self.reduction == "sum":
47 | # loss = loss.sum()
48 |
49 | # return loss
50 |
--------------------------------------------------------------------------------
/boda/lib/torchinfo/formatting.py:
--------------------------------------------------------------------------------
1 | """ formatting.py """
2 | import math
3 | from enum import Enum, unique
4 | from typing import Dict, Iterable, List
5 |
6 | from .layer_info import LayerInfo
7 |
8 |
9 | @unique
10 | class Verbosity(Enum):
11 | """Contains verbosity levels."""
12 |
13 | QUIET, DEFAULT, VERBOSE = 0, 1, 2
14 |
15 |
16 | class FormattingOptions:
17 | """Class that holds information about formatting the table output."""
18 |
19 | def __init__(
20 | self,
21 | max_depth: int,
22 | verbose: int,
23 | col_names: Iterable[str],
24 | col_width: int,
25 | ):
26 | self.max_depth = max_depth
27 | self.verbose = verbose
28 | self.col_names = col_names
29 | self.col_width = col_width
30 | self.layer_name_width = 40
31 |
32 | def set_layer_name_width(
33 | self, summary_list: List[LayerInfo], align_val: int = 5
34 | ) -> None:
35 | """
36 | Set layer name width by taking the longest line length and rounding up to
37 | the nearest multiple of align_val.
38 | """
39 | max_length = 0
40 | for info in summary_list:
41 | depth_indent = info.depth * align_val + 1
42 | max_length = max(max_length, len(str(info)) + depth_indent)
43 | if max_length >= self.layer_name_width:
44 | self.layer_name_width = math.ceil(max_length / align_val) * align_val
45 |
46 | def get_total_width(self) -> int:
47 | """Calculate the total width of all lines in the table."""
48 | return len(tuple(self.col_names)) * self.col_width + self.layer_name_width
49 |
50 | def format_row(self, layer_name: str, row_values: Dict[str, str]) -> str:
51 | """Get the string representation of a single layer of the model."""
52 | info_to_use = [row_values.get(row_type, "") for row_type in self.col_names]
53 | new_line = f"{layer_name:<{self.layer_name_width}} "
54 | for info in info_to_use:
55 | new_line += f"{info:<{self.col_width}} "
56 | return new_line.rstrip() + "\n"
57 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 |
17 |
18 | # -- Project information -----------------------------------------------------
19 |
20 | project = 'boda'
21 | copyright = '2022, Kyung-Su Kang'
22 | author = 'Kyung-Su Kang'
23 |
24 | # The full version, including alpha/beta/rc tags
25 | release = '0.01a'
26 |
27 |
28 | # -- General configuration ---------------------------------------------------
29 |
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 | ]
35 |
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ['_templates']
38 |
39 | # List of patterns, relative to source directory, that match files and
40 | # directories to ignore when looking for source files.
41 | # This pattern also affects html_static_path and html_extra_path.
42 | exclude_patterns = []
43 |
44 |
45 | # -- Options for HTML output -------------------------------------------------
46 |
47 | # The theme to use for HTML and HTML Help pages. See the documentation for
48 | # a list of builtin themes.
49 | #
50 | html_theme = 'alabaster'
51 |
52 | # Add any paths that contain custom static files (such as style sheets) here,
53 | # relative to this directory. They are copied after the builtin static files,
54 | # so a file named "default.css" will overwrite the builtin "default.css".
55 | html_static_path = ['_static']
--------------------------------------------------------------------------------
/boda/lib/torchsummary/formatting.py:
--------------------------------------------------------------------------------
1 | """ formatting.py """
2 | import math
3 | from enum import Enum, unique
4 | from typing import Dict, Iterable, List
5 |
6 | from .layer_info import LayerInfo
7 |
8 |
9 | @unique
10 | class Verbosity(Enum):
11 | """Contains verbosity levels."""
12 |
13 | QUIET, DEFAULT, VERBOSE = 0, 1, 2
14 |
15 |
16 | class FormattingOptions:
17 | """Class that holds information about formatting the table output."""
18 |
19 | def __init__(
20 | self,
21 | use_branching: bool,
22 | max_depth: int,
23 | verbose: int,
24 | col_names: Iterable[str],
25 | col_width: int,
26 | ):
27 | self.use_branching = use_branching
28 | self.max_depth = max_depth
29 | self.verbose = verbose
30 | self.col_names = col_names
31 | self.col_width = col_width
32 | self.layer_name_width = 40
33 |
34 | def set_layer_name_width(
35 | self, summary_list: List[LayerInfo], align_val: int = 5
36 | ) -> None:
37 | """
38 | Set layer name width by taking the longest line length and rounding up to
39 | the nearest multiple of align_val.
40 | """
41 | max_length = 0
42 | for info in summary_list:
43 | depth_indent = info.depth * align_val + 1
44 | max_length = max(max_length, len(str(info)) + depth_indent)
45 | if max_length >= self.layer_name_width:
46 | self.layer_name_width = math.ceil(max_length / align_val) * align_val
47 |
48 | def get_total_width(self) -> int:
49 | """Calculate the total width of all lines in the table."""
50 | return len(tuple(self.col_names)) * self.col_width + self.layer_name_width
51 |
52 | def format_row(self, layer_name: str, row_values: Dict[str, str]) -> str:
53 | """Get the string representation of a single layer of the model."""
54 | info_to_use = [row_values.get(row_type, "") for row_type in self.col_names]
55 | new_line = "{:<{}} ".format(layer_name, self.layer_name_width)
56 | for info in info_to_use:
57 | new_line += "{:<{}} ".format(info, self.col_width)
58 | return new_line.rstrip() + "\n"
59 |
--------------------------------------------------------------------------------
/boda/file_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import sys
4 | from urllib.request import urlretrieve
5 |
6 |
7 | class DataEncoder(json.JSONEncoder):
8 | def default(self, obj):
9 | if isinstance(obj, list):
10 | return json.JSONEncoder().encode(obj)
11 |
12 | return json.JSONEncoder.default(self, obj)
13 |
14 |
15 | def progressbar(cur, total=100):
16 | percent = "{:.2%}".format(cur / total)
17 | sys.stdout.write("\r")
18 | # sys.stdout.write("[%-50s] %s" % ('=' * int(math.floor(cur * 50 / total)),percent))
19 | sys.stdout.write("[%-100s] %s" % ("=" * int(cur), percent))
20 | sys.stdout.flush()
21 |
22 |
23 | def schedule(blocknum, blocksize, totalsize):
24 | """
25 | blocknum: currently downloaded block
26 | blocksize: block size for each transfer
27 | totalsize: total size of web page files
28 | """
29 | if totalsize == 0:
30 | percent = 0
31 | else:
32 | percent = blocknum * blocksize / totalsize
33 | if percent > 1.0:
34 | percent = 1.0
35 |
36 | percent = percent * 100
37 | print("download : %.2f%%" % (percent))
38 | progressbar(percent)
39 |
40 |
41 | def reporthook(count, block_size, total_size):
42 | """
43 | https://blog.shichao.io/2012/10/04/progress_speed_indicator_for_urlretrieve_in_python.html
44 | """
45 | # global start_time
46 | # if count == 0:
47 | # start_time = time.time()
48 | # return
49 | # duration = time.time() - start_time
50 | progress_size = int(count * block_size)
51 | # speed = int(progress_size / (1024 * duration))
52 | percent = int(count * block_size * 100 / total_size)
53 | # min(int(count*blockSize*100/totalSize),100)
54 | sys.stdout.write(
55 | f"\rDownload file for pretrained model: {percent:>3}% {progress_size / (1024*1204):>4.1f} MB"
56 | )
57 |
58 | # sys.stdout.write("\rDownload pretrained model: %d%%, %d MB, %d KB/s, %d seconds passed" %
59 | # (percent, progress_size / (1024 * 1024), speed, duration))
60 | sys.stdout.flush()
61 |
62 |
63 | def get_file_from_url(
64 | file_name: str,
65 | ):
66 | """
67 | file_name (): model_name/file_name.json or pth
68 | """
69 | url = "https://unerue.synology.me/boda/models/"
70 | urlretrieve(f"{url}{file_name}", config_file, reporthook)
71 | print()
72 |
--------------------------------------------------------------------------------
/boda/custom_activation.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.nn import functional as F
4 |
5 |
6 | class Swish(nn.Module):
7 | """Swish https://arxiv.org/pdf/1905.02244.pdf"""
8 |
9 | @staticmethod
10 | def forward(x):
11 | return x * torch.sigmoid(x)
12 |
13 |
14 | class Hardswish(nn.Module):
15 | """export-friendly version of nn.Hardswish()
16 |
17 | Return:
18 | x * F.hardsigmoid(x) for torchscript and CoreML
19 | """
20 |
21 | @staticmethod
22 | def forward(x):
23 | # for torchscript, CoreML and ONNX
24 | return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0
25 |
26 |
27 | class MemoryEfficientSwish(nn.Module):
28 | class F(torch.autograd.Function):
29 | @staticmethod
30 | def forward(ctx, x):
31 | ctx.save_for_backward(x)
32 | return x * torch.sigmoid(x)
33 |
34 | @staticmethod
35 | def backward(ctx, grad_output):
36 | x = ctx.saved_tensors[0]
37 | sx = torch.sigmoid(x)
38 | return grad_output * (sx * (1 + x * (1 - sx)))
39 |
40 | def forward(self, x):
41 | return self.F.apply(x)
42 |
43 |
44 | class Mish(nn.Module):
45 | """# Mish https://github.com/digantamisra98/Mish"""
46 |
47 | @staticmethod
48 | def forward(x):
49 | return x * F.softplus(x).tanh()
50 |
51 |
52 | class MemoryEfficientMish(nn.Module):
53 | class F(torch.autograd.Function):
54 | @staticmethod
55 | def forward(ctx, x):
56 | ctx.save_for_backward(x)
57 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
58 |
59 | @staticmethod
60 | def backward(ctx, grad_output):
61 | x = ctx.saved_tensors[0]
62 | sx = torch.sigmoid(x)
63 | fx = F.softplus(x).tanh()
64 | return grad_output * (fx + x * sx * (1 - fx * fx))
65 |
66 | def forward(self, x):
67 | return self.F.apply(x)
68 |
69 |
70 | class FReLU(nn.Module):
71 | """FReLU https://arxiv.org/abs/2007.11824"""
72 |
73 | def __init__(self, in_channels, kernel_size=3):
74 | super().__init__()
75 | self.conv = nn.Conv2d(
76 | in_channels,
77 | in_channels,
78 | kernel_size,
79 | stride=1,
80 | padding=1,
81 | groups=in_channels,
82 | bias=False,
83 | )
84 | self.bn = nn.BatchNorm2d(in_channels)
85 |
86 | def forward(self, x):
87 | return torch.max(x, self.bn(self.conv(x)))
88 |
--------------------------------------------------------------------------------
/boda/models/yolact/__init__.py:
--------------------------------------------------------------------------------
1 | from .architecture_yolact import YolactPredictHead, YolactModel
2 | from .configuration_yolact import YolactConfig
3 | from .inference_yolact import PostprocessYolact
4 |
5 | # from .loss_yolact import YolactLoss
6 |
7 |
8 | __all__ = ["YolactConfig", "PostprocessYolact", "YolactPredictHead", "YolactModel"]
9 |
10 | # _import_structure = {
11 | # 'configuration_yolact': ['YolactConfig'],
12 | # 'architecture_yolact': ['YolactPredictNeck', 'YolactPredictHead', 'YolactModel'],
13 | # 'loss_yolact': ['YolactLoss']
14 | # }
15 | # import importlib
16 | # import os
17 | # import sys
18 |
19 |
20 | # class _BaseLazyModule(ModuleType):
21 | # """
22 | # Module class that surfaces all objects but only performs associated imports when the objects are requested.
23 | # """
24 |
25 | # # Very heavily inspired by optuna.integration._IntegrationModule
26 | # # https://github.com/optuna/optuna/blob/master/optuna/integration/__init__.py
27 | # def __init__(self, name, import_structure):
28 | # super().__init__(name)
29 | # self._modules = set(import_structure.keys())
30 | # self._class_to_module = {}
31 | # for key, values in import_structure.items():
32 | # for value in values:
33 | # self._class_to_module[value] = key
34 | # # Needed for autocompletion in an IDE
35 | # self.__all__ = list(import_structure.keys()) + sum(import_structure.values(), [])
36 |
37 | # # Needed for autocompletion in an IDE
38 | # def __dir__(self):
39 | # return super().__dir__() + self.__all__
40 |
41 | # def __getattr__(self, name: str) -> Any:
42 | # if name in self._modules:
43 | # value = self._get_module(name)
44 | # elif name in self._class_to_module.keys():
45 | # module = self._get_module(self._class_to_module[name])
46 | # value = getattr(module, name)
47 | # else:
48 | # raise AttributeError(f"module {self.__name__} has no attribute {name}")
49 |
50 | # setattr(self, name, value)
51 | # return value
52 |
53 | # def _get_module(self, module_name: str) -> ModuleType:
54 | # raise NotImplementedError
55 |
56 |
57 | # class _LazyModule(_BaseLazyModule):
58 | # """
59 | # Module class that surfaces all objects but only performs associated imports when the objects are requested.
60 | # """
61 |
62 | # __file__ = globals()["__file__"]
63 | # __path__ = [os.path.dirname(__file__)]
64 |
65 | # def _get_module(self, module_name: str):
66 | # return importlib.import_module("." + module_name, self.__name__)
67 |
--------------------------------------------------------------------------------
/boda/models/solov2/configuration_solov1.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Tuple, Sequence, Union, Any
3 |
4 | from ...base_configuration import BaseConfig
5 |
6 |
7 | solov1_pretrained_models = {"solov1-base": ""}
8 |
9 |
10 | class Solov1Config(BaseConfig):
11 | """Configuration for SOLOv1
12 |
13 | Arguments:
14 | max_size ():
15 | padding ():
16 | proto_net_structure (List):
17 | """
18 |
19 | config_name = "solov1"
20 |
21 | def __init__(
22 | self,
23 | num_classes: int = 80,
24 | min_size: int = 800,
25 | max_size: int = 1333,
26 | preserve_aspect_ratio: bool = True,
27 | selected_layers: Sequence[int] = [0, 1, 2, 3],
28 | fpn_channels: int = 256,
29 | num_extra_fpn_layers: int = 1,
30 | scales: Sequence[int] = [[8, 32], [16, 64], [32, 128], [64, 256], [128, 512]],
31 | grids: Sequence[int] = [40, 36, 24, 16, 12],
32 | strides: Sequence[int] = [4, 8, 16, 32, 64],
33 | base_edges: Sequence[int] = [16, 32, 64, 128, 256],
34 | **kwargs
35 | ) -> None:
36 | super().__init__(max_size=max_size, **kwargs)
37 | self.num_classes = num_classes
38 | self.selected_layers = selected_layers
39 | self.fpn_channels = fpn_channels
40 | self.num_extra_fpn_layers = num_extra_fpn_layers
41 | self.scales = scales
42 | self.grids = grids
43 | self.strides = strides
44 | self.base_edges = base_edges
45 |
46 | self.cate_down_pos = 0
47 |
48 |
49 | class DecoupledSolov1Config(BaseConfig):
50 | """Configuration for SOLOv1
51 |
52 | Arguments:
53 | max_size ():
54 | padding ():
55 | proto_net_structure (List):
56 | """
57 |
58 | config_name = "solov1"
59 |
60 | def __init__(
61 | self,
62 | num_classes: int = 80,
63 | max_size: Tuple[int] = (1333, 800),
64 | selected_layers: Sequence[int] = [0, 1, 2, 3],
65 | fpn_channels: int = 256,
66 | num_extra_fpn_layers: int = 1,
67 | scales: Sequence[int] = [[8, 32], [16, 64], [32, 128], [64, 256], [128, 512]],
68 | grids: Sequence[int] = [40, 36, 24, 16, 12],
69 | strides: Sequence[int] = [4, 8, 16, 32, 64],
70 | base_edges: Sequence[int] = [16, 32, 64, 128, 256],
71 | **kwargs
72 | ) -> None:
73 | super().__init__(max_size=max_size, **kwargs)
74 | self.num_classes = num_classes
75 | self.selected_layers = selected_layers
76 | self.fpn_channels = fpn_channels
77 | self.num_extra_fpn_layers = num_extra_fpn_layers
78 | self.scales = scales
79 | self.grids = grids
80 | self.strides = strides
81 | self.base_edges = base_edges
82 |
83 | self.cate_down_pos = 0
84 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages
2 | from numpy.distutils.core import setup
3 | from numpy.distutils.misc_util import Configuration
4 | from distutils.command.clean import clean as Clean
5 | from distutils.command.sdist import sdist
6 | import os
7 | import shutil
8 |
9 |
10 | def configuration(parent_package='', top_path=None):
11 | config = Configuration(None, parent_package, top_path)
12 |
13 | config.set_options(
14 | ignore_setup_xxx_py=True,
15 | assume_default_configuration=True,
16 | delegate_options_to_subpackages=True,
17 | quiet=True)
18 | config.add_subpackage('boda')
19 |
20 | return config
21 |
22 |
23 | class CleanCommand(Clean):
24 | description = 'Remove build artifacts from the source tree'
25 |
26 | def run(self):
27 | Clean.run(self)
28 | # Remove c files if we are not within a sdist package
29 | cwd = os.path.abspath(os.path.dirname(__file__))
30 | remove_c_files = not os.path.exists(os.path.join(cwd, 'PKG-INFO'))
31 | if remove_c_files:
32 | print('Will remove generated .c files')
33 | if os.path.exists('build'):
34 | shutil.rmtree('build')
35 | for dirpath, dirnames, filenames in os.walk('sklearn'):
36 | for filename in filenames:
37 | if any(filename.endswith(suffix) for suffix in
38 | (".so", ".pyd", ".dll", ".pyc")):
39 | os.unlink(os.path.join(dirpath, filename))
40 | continue
41 | extension = os.path.splitext(filename)[1]
42 | if remove_c_files and extension in ['.c', '.cpp']:
43 | pyx_file = str.replace(filename, extension, '.pyx')
44 | if os.path.exists(os.path.join(dirpath, pyx_file)):
45 | os.unlink(os.path.join(dirpath, filename))
46 | for dirname in dirnames:
47 | if dirname == '__pycache__':
48 | shutil.rmtree(os.path.join(dirpath, dirname))
49 |
50 |
51 | cmdclass = {'clean': CleanCommand, 'sdist': sdist}
52 |
53 |
54 | def setup_packages():
55 | metadata = dict(
56 | name='boda',
57 | version='0.0.1',
58 | install_requires=['torch', 'numpy', 'cython'],
59 | author='Kang, Kyung-Su',
60 | author_email='unerue@me.com',
61 | maintainer='Kang, Kyung-Su',
62 | maintainer_email='unerue@me.com',
63 | description='boda is a library for instance segmentation.',
64 | packages=find_packages(),
65 | # include_package_data=True,
66 | classifiers=[
67 | 'Programming Language :: C',
68 | 'Programming Language :: Python',
69 | 'Programming Language :: Python :: 3.6',
70 | 'Programming Language :: Python :: 3.7',
71 | 'Programming Language :: Python :: 3.8'],
72 | cmdclass=cmdclass,
73 | configuration=configuration,
74 | python_requires='>=3.6')
75 |
76 | setup(**metadata)
77 |
78 |
79 | if __name__ == '__main__':
80 | setup_packages()
81 |
--------------------------------------------------------------------------------
/boda/ops/anchor_generators.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import itertools
3 | import math
4 | from collections import defaultdict
5 | from typing import List, Tuple
6 |
7 | import torch
8 | from torch import Tensor
9 |
10 |
11 | def default_box_cache(func):
12 | cache = defaultdict()
13 |
14 | @functools.wraps(func)
15 | def wrapper(*args):
16 | k, v = func(*args)
17 | if k not in cache:
18 | cache[k] = v
19 | return k, cache[k]
20 |
21 | return wrapper
22 |
23 |
24 | class DefaultBoxGenerator:
25 | """
26 | Args:
27 | aspect_ratios (:obj:`List[int]`):
28 | scales (:obj:):
29 | max_size ():
30 | use_preapply_sqrt ():
31 | use_pixel_scales ():
32 | use_square_anchors (:obj:`bool`): default `True`
33 | """
34 |
35 | def __init__(
36 | self,
37 | aspect_ratios: List[int],
38 | scales: List[float],
39 | max_size: Tuple[int] = (550, 550),
40 | use_preapply_sqrt: bool = True,
41 | use_pixel_scales: bool = True,
42 | use_square_anchors: bool = True,
43 | ) -> None:
44 | self.aspect_ratios = aspect_ratios
45 | self.scales = scales
46 | self.clip = False
47 | self.max_size = max_size
48 | self.use_preapply_sqrt = use_preapply_sqrt
49 | self.use_pixel_scales = use_pixel_scales
50 | self.use_square_anchors = use_square_anchors
51 |
52 | @default_box_cache
53 | def generate(
54 | self, h: int, w: int, device: str = "cuda:0"
55 | ) -> Tuple[Tuple[int], Tensor]:
56 | """DefaultBoxGenerator is
57 |
58 | Args:
59 | h (:obj:`int`): feature map size from backbone
60 | w (:obj:`int`): feature map size from backbone
61 | device (:obj:`str`): default `cuda`
62 |
63 | Returns
64 | size (:obj:`Tuple[int]`): feature map size
65 | prior_boxes (:obj:`FloatTensor[N, 4]`):
66 | """
67 | size = (h, w)
68 | default_boxes = []
69 | for j, i in itertools.product(range(h), range(w)):
70 | cx = (i + 0.5) / w
71 | cy = (j + 0.5) / h
72 | for ratios in self.aspect_ratios:
73 | for scale in self.scales:
74 | for ratio in ratios:
75 | if not self.use_preapply_sqrt:
76 | ratio = math.sqrt(ratio)
77 |
78 | if self.use_pixel_scales:
79 | _h = scale / ratio / self.max_size[0]
80 | _w = scale * ratio / self.max_size[1]
81 | else:
82 | _h = scale / ratio / h
83 | _w = scale * ratio / w
84 |
85 | if self.use_square_anchors:
86 | _h = _w
87 |
88 | default_boxes += [cx, cy, _w, _h]
89 |
90 | default_boxes = torch.tensor(
91 | default_boxes, dtype=torch.float32, device=device, requires_grad=False
92 | ).view(-1, 4)
93 | if self.clip:
94 | default_boxes.clamp_(min=0, max=1)
95 | # prior_boxes.requires_grad = False
96 |
97 | return size, default_boxes
98 |
--------------------------------------------------------------------------------
/boda/models/yolact/configuration_yolact.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Optional, Tuple, List, Union, Any
3 |
4 | from ...base_configuration import BaseConfig
5 |
6 |
7 | YOLACT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
8 | "yolact-base": "https://unerue.synology.me/boda/models/yolact/yolact-base.json",
9 | "yolact-550-r50": "",
10 | "yolact-300-r101": "",
11 | "yolact-700-r101": "",
12 | }
13 |
14 |
15 | class YolactConfig(BaseConfig):
16 | """Configuration for YOLACT
17 |
18 | Args:
19 | num_classes (:obj:`int`):
20 | max_size (:obj:`Union[int, Tuple[int]]`):
21 | num_grids (:obj:`int`):
22 | num_grid_sizes (:obj:`int`):
23 | num_mask_dim (:obj:`int`):
24 | fpn_channels (:obj:`int`):
25 | extra_fpn_layers (:obj:`bool`):
26 | num_extra_fpn_layers (:obj:`int`):
27 | mask_dim (:obj:`int`):
28 | num_grid_sizes (:obj:`int`):
29 | num_mask_dim (:obj:`int`):
30 | """
31 |
32 | model_name = "yolact"
33 |
34 | def __init__(
35 | self,
36 | num_classes: int = 80,
37 | max_size: Tuple[int] = (550, 550),
38 | preserve_aspect_ratio: bool = False,
39 | selected_backbone_layers: List[int] = [1, 2, 3],
40 | fpn_channels: int = 256,
41 | extra_fpn_layers: bool = True,
42 | num_extra_fpn_layers: int = 2,
43 | aspect_ratios: List = [1, 1 / 2, 2],
44 | scales: List = [24, 48, 96, 192, 384],
45 | num_extra_box_layers: int = 0,
46 | num_extra_mask_layers: int = 0,
47 | num_extra_score_layers: int = 0,
48 | use_preapply_sqrt: bool = False,
49 | use_pixel_scales: bool = True,
50 | use_square_anchors: bool = True,
51 | num_grids: int = 0,
52 | mask_size: int = 16,
53 | mask_dim: int = 0,
54 | box_weight: float = 1.0,
55 | mask_weight: float = 6.125,
56 | score_weight: float = 1.0,
57 | semantic_weight: float = 1.0,
58 | **kwargs
59 | ) -> None:
60 | super().__init__(max_size=max_size, **kwargs)
61 | self.num_classes = num_classes + 1
62 | self.preserve_aspect_ratio = preserve_aspect_ratio
63 | self.fpn_channels = fpn_channels
64 | self.extra_fpn_layers = extra_fpn_layers
65 | self.num_extra_fpn_layers = num_extra_fpn_layers
66 | self.selected_backbone_layers = selected_backbone_layers
67 | self.aspect_ratios = aspect_ratios
68 | self.scales = scales
69 | self.num_grids = num_grids
70 | self.mask_size = mask_size
71 | self.use_preapply_sqrt = use_preapply_sqrt
72 | self.use_pixel_scales = use_pixel_scales
73 | self.use_square_anchors = use_square_anchors
74 |
75 | self.num_extra_box_layers = num_extra_box_layers
76 | self.num_extra_mask_layers = num_extra_mask_layers
77 | self.num_extra_score_layers = num_extra_score_layers
78 | self.num_grids = num_grids
79 | self.mask_size = mask_size
80 | self.mask_dim = mask_dim
81 |
82 | self.box_weight = box_weight
83 | self.mask_weight = mask_weight
84 | self.score_weight = score_weight
85 | self.semantic_weight = semantic_weight
86 |
87 | self.label_map = kwargs.get("label_map", None)
88 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | ## Deep learning-based Computer Vision Models for PyTorch
15 |
16 | Boda (보다) means to see in Korean. This library was inspired by 🤗 Transformers.
17 |
18 | ## Get started
19 |
20 | ```bash
21 | git clone https://github.com/unerue/boda.git && cd boda
22 | conda env create -f environment.yml
23 | conda activate boda
24 | python setup.py install
25 | ```
26 |
27 | ```python
28 | from boda.models import YolactConfig, YolactModel, YolactLoss
29 |
30 | config = YolactConfig(num_classes=80)
31 | model = YolactModel(config)
32 | criterion = YolactLoss()
33 |
34 | outputs = model(images)
35 | losses = criterion(outputs, targets)
36 | print(losses)
37 | ```
38 |
39 | ## Comparison
40 |
41 | |Model|State|Training|Inference|Original|Ours|
42 | |:----|:---:|:------:|:-------:|-------:|---:|
43 | |Mask R-CNN|😡|❌|❌|||
44 | |[YOLACT](boda/models/yolact/)|😆|✔️|✔️|||
45 | |SOLOv2|🙂|❌|✔️|||
46 | |[CenterMask]()|😡|❌|❌|||
47 | |YOLACT EDGE|😡|❌|❌|||
48 | ||
49 |
50 | ### Misc
51 |
52 | |Model|State|Training|Inference|Original|Ours|
53 | |:----|:---:|:------:|:-------:|-------:|---:|
54 | |[SSD](boda/models/ssd/)|🙂|❌|✔️|||
55 | |Faster R-CNN|🙂|❌|✔️|||
56 | |[FCOS](boda/models/fcos/)|🙂|❌|✔️|||
57 | |Keypoint R-CNN|🙂|❌|✔️|||
58 | |YOLOv4|😡|❌|❌|||
59 | ||
60 |
61 | ## Pretrained Model Configurations
62 |
63 | |Model|Config name|Status|Original|Ours|
64 | |:----|:----|:------:|-------:|---:|
65 | |[SSD](boda/models/ssd/)|`ssd-base`|🙂|||
66 | | |`ssd-512`|😡|||
67 | |[Faster R-CNN]()|`faster-rcnn-base`|🙂|||
68 | | |`faster-rcnn-r101`|😡|||
69 | |Mask R-CNN|`mask-rcnn-base`|😡|||
70 | | |`mask-rcnn-r50`|😡|||
71 | |Keypoint R-CNN|`keypoint-rcnn-base`|🙂|||
72 | | |`keypoint-rcnn-mobile`|😡|||
73 | |[FCOS](boda/models/fcos/)|`fcos-base`|🙂|||
74 | |PolarMask|`polarmask-base`|😡|||
75 | |YOLOv4|`yolov4-base`|😡|||
76 | |[YOLACT](boda/models/yolact/)|`yolact-base`|😆|||
77 | | |`yolact-r101`|😡|||
78 | | |`yolact-r101-300`|😡|||
79 | | |`yolact-r101-700`|😡|||
80 | |[SOLOv1](boda/models/solov1/)|`solov1-base`|🙂|||
81 | | |`solov1-r101`|😡|||
82 | |SOLOv2|`solov2-base`|😡||||
83 | |[CenterMask]()|`centermask-base`|😡|||
84 | |YOLACT EDGE|`yolact-edge-base`|😡|||
85 | ||
--------------------------------------------------------------------------------
/boda/models/feature_extractor/vggnet.py:
--------------------------------------------------------------------------------
1 | import math
2 | from collections import OrderedDict
3 | from typing import Tuple, List, Dict, Optional
4 |
5 | import torch
6 | import torch.nn.functional as F
7 | from torch import nn, Tensor
8 | from torch.nn.modules.batchnorm import BatchNorm2d
9 |
10 | # from ..base_architecture import Backbone
11 |
12 |
13 | class VGG(nn.Module):
14 | """
15 | This function is derived from torchvision VGG make_layers()
16 | https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
17 | https://github.com/dbolya/yolact/blob/master/backbone.py
18 | """
19 |
20 | def __init__(
21 | self,
22 | structure,
23 | bn: bool = False,
24 | norm_layer: Optional[nn.Module] = nn.BatchNorm2d,
25 | num_classes: int = 1000,
26 | ) -> None:
27 | super().__init__()
28 | self.bn = bn
29 | self.in_channels = 3
30 | self.channels = []
31 | self.layers = nn.ModuleList()
32 |
33 | for layer in structure:
34 | self._make_layer(layer)
35 |
36 | def forward(self, inputs):
37 | outputs = []
38 | for layer in self.layers:
39 | inputs = layer(inputs)
40 | outputs.append(inputs)
41 |
42 | return outputs
43 |
44 | def _make_layer(self, config):
45 | # _layers = []
46 | _layers = OrderedDict()
47 | i = 0
48 | for v in config:
49 | kwargs = None
50 | if isinstance(v, tuple):
51 | kwargs = v[1]
52 | v = v[0]
53 |
54 | if v == "M":
55 | if kwargs is None:
56 | kwargs = {"kernel_size": 2, "stride": 2}
57 |
58 | # _layers.append(nn.MaxPool2d(**kwargs))
59 | # _layers.update({'maxpool': nn.MaxPool2d(**kwargs)})
60 | _layers.update({f"maxpool{i}": nn.MaxPool2d(**kwargs)})
61 | else:
62 | if kwargs is None:
63 | kwargs = {"kernel_size": 3, "padding": 1}
64 |
65 | conv2d = nn.Conv2d(
66 | in_channels=self.in_channels, out_channels=v, **kwargs
67 | )
68 |
69 | if self.bn:
70 | # _layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU()]
71 | # _layers.update({'conv': conv2d, 'bn': nn.BatchNorm2d(v), 'relu': nn.ReLU()})
72 | _layers.update(
73 | {
74 | f"{i}": conv2d,
75 | f"bn{i}": nn.BatchNorm2d(v),
76 | f"relu{i}": nn.ReLU(),
77 | }
78 | )
79 | else:
80 | # _layers += [conv2d, nn.ReLU()]
81 | # _layers.update({'conv': conv2d, 'relu': nn.ReLU()})
82 | _layers.update({f"{i}": conv2d, f"relu{i}": nn.ReLU()})
83 |
84 | self.in_channels = v
85 | i += 1
86 |
87 | self.channels.append(self.in_channels)
88 | self.layers.append(nn.Sequential(_layers))
89 |
90 |
91 | structures = {
92 | "vgg16": [
93 | [64, 64],
94 | ["M", 128, 128],
95 | ["M", 256, 256, 256],
96 | [("M", {"kernel_size": 2, "stride": 2, "ceil_mode": True}), 512, 512, 512],
97 | ["M", 512, 512, 512],
98 | ]
99 | }
100 |
101 |
102 | def vgg16(config: Dict = None):
103 | model = VGG(structures["vgg16"])
104 |
105 | return model
106 |
--------------------------------------------------------------------------------
/boda/models/ssd/README.md:
--------------------------------------------------------------------------------
1 | # SSD (Single Shot MultiBox Object Detector)
2 |
3 | ```
4 | ██████╗ ██████╗ ███████╗
5 | ██╔════╝ ██╔════╝ ██╔═══██╗
6 | ╚██████╗ ╚██████╗ ██║ ██║
7 | ╚════██╗ ╚════██╗██║ ██║
8 | ██████╔╝ ██████╔╝███████╔╝
9 | ╚═════╝ ╚═════╝ ╚══════╝
10 | ```
11 |
12 | ## SSD Architecture
13 |
14 | ```{bash}
15 | ==========================================================================================
16 | Layer (type:depth-idx) Output Shape Param #
17 | ==========================================================================================
18 | ├─VGG: 1-1 [-1, 64, 300, 300] --
19 | | └─ModuleList: 2 [] --
20 | | | └─Sequential: 3-1 [-1, 64, 300, 300] 38,720
21 | | | └─Sequential: 3-2 [-1, 128, 150, 150] 221,440
22 | | | └─Sequential: 3-3 [-1, 256, 75, 75] 1,475,328
23 | | | └─Sequential: 3-4 [-1, 512, 38, 38] 5,899,776
24 | | | └─Sequential: 3-5 [-1, 512, 19, 19] 7,079,424
25 | ├─SsdPredictNeck: 1-2 [-1, 512, 38, 38] --
26 | | └─L2Norm: 2-1 [-1, 512, 38, 38] 512
27 | | └─ModuleList: 2 [] --
28 | | | └─Sequential: 3-6 [-1, 1024, 19, 19] 5,769,216
29 | | | └─Sequential: 3-7 [-1, 512, 10, 10] 1,442,560
30 | | | └─Sequential: 3-8 [-1, 256, 5, 5] 360,832
31 | | | └─Sequential: 3-9 [-1, 256, 3, 3] 328,064
32 | | | └─Sequential: 3-10 [-1, 256, 1, 1] 328,064
33 | ├─ModuleList: 1 [] --
34 | | └─SsdPredictHead: 2-2 [[-1, 4]] --
35 | | | └─Sequential: 3-11 [-1, 16, 38, 38] 73,744
36 | | | └─Sequential: 3-12 [-1, 84, 38, 38] 387,156
37 | | └─SsdPredictHead: 2-3 [[-1, 4]] --
38 | | | └─Sequential: 3-13 [-1, 24, 19, 19] 221,208
39 | | | └─Sequential: 3-14 [-1, 126, 19, 19] 1,161,342
40 | | └─SsdPredictHead: 2-4 [[-1, 4]] --
41 | | | └─Sequential: 3-15 [-1, 24, 10, 10] 110,616
42 | | | └─Sequential: 3-16 [-1, 126, 10, 10] 580,734
43 | | └─SsdPredictHead: 2-5 [[-1, 4]] --
44 | | | └─Sequential: 3-17 [-1, 24, 5, 5] 55,320
45 | | | └─Sequential: 3-18 [-1, 126, 5, 5] 290,430
46 | | └─SsdPredictHead: 2-6 [[-1, 4]] --
47 | | | └─Sequential: 3-19 [-1, 16, 3, 3] 36,880
48 | | | └─Sequential: 3-20 [-1, 84, 3, 3] 193,620
49 | | └─SsdPredictHead: 2-7 [[-1, 4]] --
50 | | | └─Sequential: 3-21 [-1, 16, 1, 1] 36,880
51 | | | └─Sequential: 3-22 [-1, 84, 1, 1] 193,620
52 | ==========================================================================================
53 | Total params: 26,285,486
54 | Trainable params: 26,285,486
55 | Non-trainable params: 0
56 | Total mult-adds (G): 31.43
57 | ==========================================================================================
58 | Input size (MB): 1.03
59 | Forward/backward pass size (MB): 200.19
60 | Params size (MB): 100.27
61 | Estimated Total Size (MB): 301.49
62 | ```
63 |
64 | weight https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth
65 |
66 |
67 | ## References
68 |
69 | [](https://github.com/amdegroot/ssd.pytorch)
70 | [](https://github.com/open-mmlab/mmdetection)
71 | []()
--------------------------------------------------------------------------------
/boda/models/feature_extractor/pafpn.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from .backbone_darknet import BaseConv, CSPLayer, DWConv
5 |
6 |
7 | class YOLOPAFPN(nn.Module):
8 | """
9 | YOLOv3 model. Darknet 53 is the default backbone of this model.
10 | """
11 |
12 | def __init__(
13 | self,
14 | in_channels,
15 | depth=1.0,
16 | width=1.0,
17 | depthwise=False,
18 | act="silu",
19 | ):
20 | super().__init__()
21 | self.in_channels = in_channels
22 | print(self.in_channels)
23 | Conv = DWConv if depthwise else BaseConv
24 |
25 | self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
26 | self.lateral_conv0 = BaseConv(
27 | int(self.in_channels[2] * width),
28 | int(self.in_channels[1] * width),
29 | 1,
30 | 1,
31 | act=act,
32 | )
33 | self.C3_p4 = CSPLayer(
34 | int(2 * self.in_channels[1] * width),
35 | int(self.in_channels[1] * width),
36 | round(3 * depth),
37 | False,
38 | depthwise=depthwise,
39 | act=act,
40 | ) # cat
41 |
42 | self.reduce_conv1 = BaseConv(
43 | int(self.in_channels[1] * width),
44 | int(self.in_channels[0] * width),
45 | 1,
46 | 1,
47 | act=act,
48 | )
49 | self.C3_p3 = CSPLayer(
50 | int(2 * self.in_channels[0] * width),
51 | int(self.in_channels[0] * width),
52 | round(3 * depth),
53 | False,
54 | depthwise=depthwise,
55 | act=act,
56 | )
57 |
58 | # bottom-up conv
59 | self.bu_conv2 = Conv(
60 | int(self.in_channels[0] * width),
61 | int(self.in_channels[0] * width),
62 | 3,
63 | 2,
64 | act=act,
65 | )
66 | self.C3_n3 = CSPLayer(
67 | int(2 * self.in_channels[0] * width),
68 | int(self.in_channels[1] * width),
69 | round(3 * depth),
70 | False,
71 | depthwise=depthwise,
72 | act=act,
73 | )
74 |
75 | # bottom-up conv
76 | self.bu_conv1 = Conv(
77 | int(self.in_channels[1] * width),
78 | int(self.in_channels[1] * width),
79 | 3,
80 | 2,
81 | act=act,
82 | )
83 | self.C3_n4 = CSPLayer(
84 | int(2 * self.in_channels[1] * width),
85 | int(self.in_channels[2] * width),
86 | round(3 * depth),
87 | False,
88 | depthwise=depthwise,
89 | act=act,
90 | )
91 |
92 | def forward(self, inputs):
93 | """
94 | Args:
95 | inputs: input images.
96 |
97 | Returns:
98 | Tuple[Tensor]: FPN feature.
99 | """
100 |
101 | # backbone
102 | [x2, x1, x0] = inputs
103 |
104 | fpn_out0 = self.lateral_conv0(x0) # 1024->512/32
105 | f_out0 = self.upsample(fpn_out0) # 512/16
106 | f_out0 = torch.cat([f_out0, x1], 1) # 512->1024/16
107 | f_out0 = self.C3_p4(f_out0) # 1024->512/16
108 |
109 | fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16
110 | f_out1 = self.upsample(fpn_out1) # 256/8
111 | f_out1 = torch.cat([f_out1, x2], 1) # 256->512/8
112 | pan_out2 = self.C3_p3(f_out1) # 512->256/8
113 |
114 | p_out1 = self.bu_conv2(pan_out2) # 256->256/16
115 | p_out1 = torch.cat([p_out1, fpn_out1], 1) # 256->512/16
116 | pan_out1 = self.C3_n3(p_out1) # 512->512/16
117 |
118 | p_out0 = self.bu_conv1(pan_out1) # 512->512/32
119 | p_out0 = torch.cat([p_out0, fpn_out0], 1) # 512->1024/32
120 | pan_out0 = self.C3_n4(p_out0) # 1024->1024/32
121 |
122 | outputs = (pan_out2, pan_out1, pan_out0)
123 | return outputs
124 |
--------------------------------------------------------------------------------
/boda/models/yolox/utils.py:
--------------------------------------------------------------------------------
1 | # import torch
2 | # import torch.nn.functional as F
3 | # import torchvision
4 |
5 |
6 | # def preproc(img, input_size):
7 | # padded_img = torch.ones(3, input_size[0], input_size[1]) * 0.48
8 | # r = min(input_size[0] / img.shape[1], input_size[1] / img.shape[2])
9 | # resized_img = F.interpolate(
10 | # img[None],
11 | # size=(int(img.shape[1] * r), int(img.shape[2] * r)),
12 | # mode='bilinear',
13 | # align_corners=False
14 | # )[0]
15 | # print(resized_img.shape)
16 |
17 | # padded_img[:, :int(img.shape[1] * r), :int(img.shape[2] * r)] = resized_img
18 | # padded_img = padded_img.contiguous().type(torch.float32)
19 |
20 | # return padded_img, r
21 |
22 |
23 | # def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):
24 | # if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
25 | # raise IndexError
26 |
27 | # if xyxy:
28 | # tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])
29 | # br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])
30 | # area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
31 | # area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
32 | # else:
33 | # tl = torch.max(
34 | # (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
35 | # (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2),
36 | # )
37 | # br = torch.min(
38 | # (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
39 | # (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2),
40 | # )
41 |
42 | # area_a = torch.prod(bboxes_a[:, 2:], 1)
43 | # area_b = torch.prod(bboxes_b[:, 2:], 1)
44 | # en = (tl < br).type(tl.type()).prod(dim=2)
45 | # area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all())
46 | # return area_i / (area_a[:, None] + area_b - area_i)
47 |
48 |
49 | # def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
50 | # box_corner = prediction.new(prediction.shape)
51 | # box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
52 | # box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
53 | # box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
54 | # box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
55 | # prediction[:, :, :4] = box_corner[:, :, :4]
56 |
57 | # output = [None for _ in range(len(prediction))]
58 | # for i, image_pred in enumerate(prediction):
59 |
60 | # # If none are remaining => process next image
61 | # if not image_pred.size(0):
62 | # continue
63 | # # Get score and class with highest confidence
64 | # class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)
65 |
66 | # conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
67 | # # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
68 | # detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
69 | # detections = detections[conf_mask]
70 | # if not detections.size(0):
71 | # continue
72 |
73 | # if class_agnostic:
74 | # nms_out_index = torchvision.ops.nms(
75 | # detections[:, :4],
76 | # detections[:, 4] * detections[:, 5],
77 | # nms_thre,
78 | # )
79 | # else:
80 | # nms_out_index = torchvision.ops.batched_nms(
81 | # detections[:, :4],
82 | # detections[:, 4] * detections[:, 5],
83 | # detections[:, 6],
84 | # nms_thre,
85 | # )
86 |
87 | # detections = detections[nms_out_index]
88 | # if output[i] is None:
89 | # output[i] = detections
90 | # else:
91 | # output[i] = torch.cat((output[i], detections))
92 |
93 | # output = [{
94 | # 'boxes': o[:, :4],
95 | # 'labels': o[:, 6],
96 | # 'scores': o[:, 4] * o[:, 5],
97 | # } for o in output]
98 |
99 | # return output
100 |
--------------------------------------------------------------------------------
/boda/models/feature_extractor/fpn.py:
--------------------------------------------------------------------------------
1 | from typing import List, Sequence
2 |
3 | import torch
4 | import torch.nn.functional as F
5 | from torch import nn, Tensor
6 |
7 |
8 | class FeaturePyramidNetworks(nn.Module):
9 | """Pyramid Feature Networks
10 |
11 | Example::
12 | >>> backbone = resnet101()
13 | >>> neck = FeaturePyramidNetworks(backbone.channels, [1, 2, 3])
14 | >>> print(neck.channels, neck.selected_layers)
15 | """
16 |
17 | def __init__(
18 | self,
19 | in_channels: Sequence[int] = [256, 512, 1024, 2048],
20 | selected_layers: Sequence[int] = [1, 2, 3],
21 | out_channels: int = 256,
22 | extra_layers: bool = False,
23 | num_extra_predict_layers: int = 2,
24 | **kwargs
25 | ) -> None:
26 | """
27 | Args:
28 | channels (:obj:`List[int]`): out channels from backbone
29 | selected_layers (:obj:`List[int]`): to use selected backbone layers
30 | out_channels (:obj:`int`):
31 | num_extra_predict_layers (:obj:`int`): make extra predict layers for training
32 | num_downsamples: (:obj:`int`): use predict layers does not training
33 | """
34 | super().__init__()
35 | self.in_channels = [in_channels[i] for i in selected_layers]
36 | self.selected_layers = selected_layers
37 | self.selected_backbones = selected_layers
38 |
39 | self.extra_layers = extra_layers
40 | self.num_extra_layers = 0
41 | self.num_extra_predict_layers = num_extra_predict_layers
42 |
43 | self.selected_layers = list(
44 | range(len(self.selected_layers) + self.num_extra_predict_layers)
45 | )
46 |
47 | self.lateral_layers = nn.ModuleList()
48 | for _in_channels in reversed(self.in_channels):
49 | self.lateral_layers.append(
50 | nn.Conv2d(
51 | _in_channels,
52 | out_channels,
53 | kernel_size=kwargs.get("lateral_kernel_size", 1),
54 | stride=kwargs.get("lateral_stride", 1),
55 | padding=kwargs.get("lateral_padding", 0),
56 | )
57 | )
58 |
59 | self.predict_layers = nn.ModuleList()
60 | for _ in self.in_channels:
61 | self.predict_layers.append(
62 | nn.Conv2d(
63 | out_channels,
64 | out_channels,
65 | kernel_size=kwargs.get("", 3),
66 | stride=kwargs.get("", 1),
67 | padding=kwargs.get("", 1),
68 | )
69 | )
70 |
71 | if self.num_extra_predict_layers > 0:
72 | self.extra_layers = nn.ModuleList(
73 | [
74 | nn.Conv2d(
75 | out_channels, out_channels, kernel_size=3, stride=2, padding=1
76 | )
77 | for _ in range(self.num_extra_predict_layers)
78 | ]
79 | )
80 | # self.channels.append(self.out_channels)
81 |
82 | self.channels = [out_channels] * len(self.selected_layers)
83 |
84 | def forward(self, inputs: List[Tensor]) -> List[Tensor]:
85 | """
86 | Args:
87 | inputs (:obj:`FloatTensor[B, C, H, W]`)
88 |
89 | Returns:
90 | outputs (:obj:`List[FloatTensor[B, C, H, W]]`)
91 | """
92 | device = inputs[0].device
93 | inputs = [inputs[i] for i in self.selected_backbones]
94 |
95 | x = torch.zeros(1, device=device)
96 | outputs = [x for _ in range(len(inputs))]
97 |
98 | i = len(inputs)
99 | for lateral_layer in self.lateral_layers:
100 | i -= 1
101 | if i < len(inputs) - 1:
102 | _, _, h, w = inputs[i].size()
103 | x = F.interpolate(x, size=(h, w), mode="bilinear", align_corners=False)
104 |
105 | x = x + lateral_layer(inputs[i])
106 | outputs[i] = x
107 |
108 | i = len(inputs)
109 | for predict_layer in self.predict_layers:
110 | i -= 1
111 | outputs[i] = F.relu(predict_layer(outputs[i]))
112 |
113 | if self.extra_layers:
114 | for extra_layer in self.extra_layers:
115 | outputs.append(extra_layer(outputs[-1]))
116 |
117 | elif self.num_extra_predict_layers > 0:
118 | for _ in range(self.num_extra_predict_layers):
119 | outputs.append(self.predict_layers[-1](outputs[-1]))
120 |
121 | return outputs
122 |
--------------------------------------------------------------------------------
/run_test.py:
--------------------------------------------------------------------------------
1 | from boda.models import YolactConfig, YolactModel
2 | from boda.models.feature_extractor import resnet50, resnet101
3 | # from boda.lib.torchinfo import summary
4 | from boda.lib.torchsummary import summary
5 | import torch
6 |
7 | config = YolactConfig(num_classes=80)
8 | model = YolactModel(config, backbone=resnet101()).to('cuda')
9 | model.train()
10 | print(model)
11 | # print(summary(model, input_size=(16, 3, 550, 550), verbose=0))
12 | print(summary(model, input_data=(3, 550, 550), verbose=0))
13 |
14 | # model.load_weights('cache/yolact-base.pth')
15 |
16 |
17 | from boda.models import PostprocessYolact
18 | from PIL import Image
19 | from torchvision import transforms
20 |
21 | image = Image.open('test6.jpg')
22 | model = YolactModel.from_pretrained('yolact-base').cuda()
23 | model.eval()
24 |
25 | aug = transforms.Compose([
26 | transforms.ToTensor(),
27 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
28 | # transforms.Normalize([0.406, 0.456, 0.485], [0.225, 0.224, 0.229])
29 | ])
30 |
31 | outputs = model([aug(image).cuda()])
32 |
33 | print(outputs.keys())
34 | post = PostprocessYolact()
35 | outputs = post(outputs, outputs['image_sizes'])
36 | print(outputs[0]['boxes'])
37 | import cv2
38 | import numpy as np
39 | import matplotlib.pyplot as plt
40 | import matplotlib.patches as patches
41 | from skimage.measure import find_contours
42 | import adjustText
43 |
44 | np_image = np.array(image)
45 | np_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
46 | # for box in outputs[0]['boxes']:
47 | # # box = list(map(int, boxes[j, :]))
48 | # x1, y1, x2, y2 = box.detach().cpu().numpy()
49 | # # score = scores[j]
50 | # # label = labels[j]
51 | # cv2.rectangle(np_image, (x1, y1), (x2, y2), (0, 0, 255), thickness=1)
52 |
53 | plt.imshow(image)
54 | ax = plt.gca()
55 | threshold = 0
56 | COCO_CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
57 | 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
58 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
59 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
60 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
61 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
62 | 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
63 | 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
64 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
65 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
66 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
67 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
68 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
69 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
70 |
71 | COLORS = {
72 | 1: 'deepskyblue',
73 | 2: 'orangered',
74 | 3: 'yellowgreen',
75 | 4: 'darkorange',
76 | 5: 'chocolate',
77 | 6: 'slategrey',
78 | 7: 'darkgoldenrod',
79 | 8: 'purple',
80 | 9: 'saddlebrown',
81 | 10: 'olive',
82 | }
83 |
84 | for output in outputs:
85 | boxes = output['boxes']
86 | scores = output['scores']
87 | labels = output['labels']
88 | masks = output['masks']
89 | print(scores)
90 |
91 | for i, box in enumerate(boxes):
92 | x1, y1, x2, y2 = box.detach().cpu().numpy()
93 | score = scores[i].detach().cpu().numpy()
94 | label = labels[i].detach().cpu().numpy()
95 | mask = masks[i].detach().cpu().numpy().astype(np.int64)
96 |
97 | color = COLORS[(label+1) % 11]
98 | contours = find_contours(mask, 0.5)
99 |
100 | if score >= threshold:
101 | cx = x2 - x1
102 | cy = y2 - y1
103 | ax.text(x1, y1, f"{COCO_CLASSES[label]}", c='black', size=8, va='bottom', ha='left', alpha=0.5)
104 |
105 | rect = patches.Rectangle(
106 | (x1, y1),
107 | cx, cy,
108 | linewidth=1,
109 | edgecolor=color,
110 | facecolor='none'
111 | )
112 | ax.add_patch(rect)
113 |
114 | ## contours
115 | for contour in contours:
116 | shapes = []
117 | for point in contour:
118 | shapes.append([int(point[1]), int(point[0])])
119 |
120 | polygon_edge = patches.Polygon(
121 | (shapes),
122 | edgecolor=color,
123 | facecolor='none',
124 | linewidth=1,
125 | fill=False,
126 | )
127 |
128 | polygon_fill = patches.Polygon(
129 | (shapes),
130 | alpha=0.5,
131 | edgecolor='none',
132 | facecolor=color,
133 | fill=True
134 | )
135 |
136 | ax.add_patch(polygon_edge)
137 | ax.add_patch(polygon_fill)
138 |
139 |
140 | plt.axis('off')
141 | plt.savefig('test.jpg' ,dpi=100, bbox_inches='tight', pad_inches=0)
142 |
--------------------------------------------------------------------------------
/boda/lib/torchsummary/layer_info.py:
--------------------------------------------------------------------------------
1 | """ layer_info.py """
2 | from typing import Any, Dict, List, Optional, Sequence, Union
3 |
4 | import numpy as np
5 | import torch
6 | import torch.nn as nn
7 |
8 | DETECTED_INPUT_OUTPUT_TYPES = Union[
9 | Sequence[Any], Dict[Any, torch.Tensor], torch.Tensor
10 | ]
11 |
12 |
13 | class LayerInfo:
14 | """Class that holds information about a layer module."""
15 |
16 | def __init__(
17 | self,
18 | module: nn.Module,
19 | depth: int,
20 | depth_index: Optional[int] = None,
21 | parent_info: Optional["LayerInfo"] = None,
22 | ):
23 | # Identifying information
24 | self.layer_id = id(module)
25 | self.module = module
26 | self.class_name = str(module.__class__).split(".")[-1].split("'")[0]
27 | self.inner_layers: Dict[str, List[int]] = {}
28 | self.depth = depth
29 | self.depth_index = depth_index
30 | self.executed = False
31 | self.parent_info = parent_info
32 |
33 | # Statistics
34 | self.trainable = True
35 | self.is_recursive = False
36 | self.input_size: List[int] = []
37 | self.output_size: List[int] = []
38 | self.kernel_size: List[int] = []
39 | self.num_params = 0
40 | self.macs = 0
41 | self.calculate_num_params()
42 |
43 | def __repr__(self) -> str:
44 | if self.depth_index is None:
45 | return f"{self.class_name}: {self.depth}"
46 | return f"{self.class_name}: {self.depth}-{self.depth_index}"
47 |
48 | @staticmethod
49 | def calculate_size(
50 | inputs: DETECTED_INPUT_OUTPUT_TYPES, batch_dim: Optional[int]
51 | ) -> List[int]:
52 | """Set input_size or output_size using the model's inputs."""
53 |
54 | def nested_list_size(inputs: Sequence[Any]) -> List[int]:
55 | """Flattens nested list size."""
56 | if hasattr(inputs[0], "size") and callable(inputs[0].size):
57 | return list(inputs[0].size())
58 | if isinstance(inputs, (list, tuple)):
59 | return nested_list_size(inputs[0])
60 | return []
61 |
62 | # pack_padded_seq and pad_packed_seq store feature into data attribute
63 | if isinstance(inputs, (list, tuple)) and len(inputs) == 0:
64 | size = []
65 | elif isinstance(inputs, (list, tuple)) and hasattr(inputs[0], "data"):
66 | size = list(inputs[0].data.size())
67 | if batch_dim is not None:
68 | size = size[:batch_dim] + [-1] + size[batch_dim + 1 :]
69 |
70 | elif isinstance(inputs, dict):
71 | # TODO avoid overwriting the previous size every time?
72 | for _, output in inputs.items():
73 | size = list(output.size())
74 | if batch_dim is not None:
75 | size = [size[:batch_dim] + [-1] + size[batch_dim + 1 :]]
76 |
77 | elif isinstance(inputs, torch.Tensor):
78 | size = list(inputs.size())
79 | if batch_dim is not None:
80 | size[batch_dim] = -1
81 |
82 | elif isinstance(inputs, (list, tuple)):
83 | size = nested_list_size(inputs)
84 |
85 | else:
86 | raise TypeError(
87 | "Model contains a layer with an unsupported "
88 | "input or output type: {}".format(inputs)
89 | )
90 |
91 | return size
92 |
93 | def calculate_num_params(self) -> None:
94 | """
95 | Set num_params, trainable, inner_layers, and kernel_size
96 | using the module's parameters.
97 | """
98 | for name, param in self.module.named_parameters():
99 | self.num_params += param.nelement()
100 | self.trainable &= param.requires_grad
101 |
102 | if name == "weight":
103 | ksize = list(param.size())
104 | # to make [in_shape, out_shape, ksize, ksize]
105 | if len(ksize) > 1:
106 | ksize[0], ksize[1] = ksize[1], ksize[0]
107 | self.kernel_size = ksize
108 |
109 | # RNN modules have inner weights such as weight_ih_l0
110 | elif "weight" in name:
111 | self.inner_layers[name] = list(param.size())
112 |
113 | def calculate_macs(self) -> None:
114 | """
115 | Set MACs using the module's parameters and layer's output size, which is
116 | used for computing number of operations for Conv layers.
117 | """
118 | for name, param in self.module.named_parameters():
119 | if name == "weight":
120 | # ignore N, C when calculate Mult-Adds in ConvNd
121 | if "Conv" in self.class_name:
122 | self.macs += int(param.nelement() * np.prod(self.output_size[2:]))
123 | else:
124 | self.macs += param.nelement()
125 | # RNN modules have inner weights such as weight_ih_l0
126 | elif "weight" in name:
127 | self.macs += param.nelement()
128 |
129 | def check_recursive(self, summary_list: List["LayerInfo"]) -> None:
130 | """
131 | If the current module is already-used, mark as (recursive).
132 | Must check before adding line to the summary.
133 | """
134 | if list(self.module.named_parameters()):
135 | for other_layer in summary_list:
136 | if self.layer_id == other_layer.layer_id:
137 | self.is_recursive = True
138 |
139 | def macs_to_str(self, reached_max_depth: bool) -> str:
140 | """Convert MACs to string."""
141 | if self.num_params > 0 and (
142 | reached_max_depth or not any(self.module.children())
143 | ):
144 | return f"{self.macs:,}"
145 | return "--"
146 |
147 | def num_params_to_str(self, reached_max_depth: bool = False) -> str:
148 | """Convert num_params to string."""
149 | if self.is_recursive:
150 | return "(recursive)"
151 | if self.num_params > 0:
152 | param_count_str = f"{self.num_params:,}"
153 | if reached_max_depth or not any(self.module.children()):
154 | if not self.trainable:
155 | return f"({param_count_str})"
156 | return param_count_str
157 | return "--"
158 |
--------------------------------------------------------------------------------
/boda/lib/torchinfo/layer_info.py:
--------------------------------------------------------------------------------
1 | """ layer_info.py """
2 | from typing import Any, Dict, Iterable, List, Optional, Sequence, Union
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | DETECTED_INPUT_OUTPUT_TYPES = Union[
8 | Sequence[Any], Dict[Any, torch.Tensor], torch.Tensor
9 | ]
10 |
11 |
12 | class LayerInfo:
13 | """Class that holds information about a layer module."""
14 |
15 | def __init__(
16 | self,
17 | module: nn.Module,
18 | depth: int,
19 | depth_index: Optional[int] = None,
20 | parent_info: Optional["LayerInfo"] = None,
21 | ):
22 | # Identifying information
23 | self.layer_id = id(module)
24 | self.module = module
25 | self.class_name = str(module.__class__).split(".")[-1].split("'")[0]
26 | self.inner_layers: Dict[str, List[int]] = {}
27 | self.depth = depth
28 | self.depth_index = depth_index
29 | self.executed = False
30 | self.parent_info = parent_info
31 |
32 | # Statistics
33 | self.trainable = True
34 | self.is_recursive = False
35 | self.input_size: List[int] = []
36 | self.output_size: List[int] = []
37 | self.kernel_size: List[int] = []
38 | self.num_params = 0
39 | self.macs = 0
40 | self.calculate_num_params()
41 |
42 | def __repr__(self) -> str:
43 | layer_name = f"{self.class_name}: {self.depth}"
44 | if self.depth_index is None:
45 | return layer_name
46 | return f"{layer_name}-{self.depth_index}"
47 |
48 | @staticmethod
49 | def calculate_size(
50 | inputs: DETECTED_INPUT_OUTPUT_TYPES, batch_dim: Optional[int]
51 | ) -> List[int]:
52 | """Set input_size or output_size using the model's inputs."""
53 |
54 | def nested_list_size(inputs: Sequence[Any]) -> List[int]:
55 | """Flattens nested list size."""
56 | if hasattr(inputs[0], "size") and callable(inputs[0].size):
57 | return list(inputs[0].size())
58 | if isinstance(inputs, (list, tuple)):
59 | return nested_list_size(inputs[0])
60 | return []
61 |
62 | size = []
63 | # pack_padded_seq and pad_packed_seq store feature into data attribute
64 | if isinstance(inputs, (list, tuple)) and inputs and hasattr(inputs[0], "data"):
65 | size = list(inputs[0].data.size())
66 | if batch_dim is not None:
67 | size = size[:batch_dim] + [-1] + size[batch_dim + 1 :]
68 |
69 | elif isinstance(inputs, dict):
70 | # TODO avoid overwriting the previous size every time?
71 | for _, output in inputs.items():
72 | size = list(output.size())
73 | if batch_dim is not None:
74 | size = [size[:batch_dim] + [-1] + size[batch_dim + 1 :]]
75 |
76 | elif isinstance(inputs, torch.Tensor):
77 | size = list(inputs.size())
78 | if batch_dim is not None:
79 | size[batch_dim] = -1
80 |
81 | elif isinstance(inputs, (list, tuple)):
82 | size = nested_list_size(inputs)
83 |
84 | else:
85 | raise TypeError(
86 | "Model contains a layer with an unsupported "
87 | f"input or output type: {inputs}"
88 | )
89 |
90 | return size
91 |
92 | def calculate_num_params(self) -> None:
93 | """
94 | Set num_params, trainable, inner_layers, and kernel_size
95 | using the module's parameters.
96 | """
97 | for name, param in self.module.named_parameters():
98 | self.num_params += param.nelement()
99 | self.trainable &= param.requires_grad
100 |
101 | if name == "weight":
102 | ksize = list(param.size())
103 | # to make [in_shape, out_shape, ksize, ksize]
104 | if len(ksize) > 1:
105 | ksize[0], ksize[1] = ksize[1], ksize[0]
106 | self.kernel_size = ksize
107 |
108 | # RNN modules have inner weights such as weight_ih_l0
109 | elif "weight" in name:
110 | self.inner_layers[name] = list(param.size())
111 |
112 | def calculate_macs(self) -> None:
113 | """
114 | Set MACs using the module's parameters and layer's output size, which is
115 | used for computing number of operations for Conv layers.
116 | """
117 | for name, param in self.module.named_parameters():
118 | if name == "weight":
119 | # ignore N, C when calculate Mult-Adds in ConvNd
120 | if "Conv" in self.class_name:
121 | self.macs += int(param.nelement() * prod(self.output_size[2:]))
122 | else:
123 | self.macs += param.nelement()
124 | # RNN modules have inner weights such as weight_ih_l0
125 | elif "weight" in name:
126 | self.macs += param.nelement()
127 |
128 | def check_recursive(self, summary_list: List["LayerInfo"]) -> None:
129 | """
130 | If the current module is already-used, mark as (recursive).
131 | Must check before adding line to the summary.
132 | """
133 | if list(self.module.named_parameters()):
134 | for other_layer in summary_list:
135 | if self.layer_id == other_layer.layer_id:
136 | self.is_recursive = True
137 |
138 | def macs_to_str(self, reached_max_depth: bool) -> str:
139 | """Convert MACs to string."""
140 | if self.num_params > 0 and (
141 | reached_max_depth or not any(self.module.children())
142 | ):
143 | return f"{self.macs:,}"
144 | return "--"
145 |
146 | def num_params_to_str(self, reached_max_depth: bool = False) -> str:
147 | """Convert num_params to string."""
148 | if self.is_recursive:
149 | return "(recursive)"
150 | if self.num_params > 0:
151 | param_count_str = f"{self.num_params:,}"
152 | if reached_max_depth or not any(self.module.children()):
153 | if not self.trainable:
154 | return f"({param_count_str})"
155 | return param_count_str
156 | return "--"
157 |
158 |
159 | def prod(num_list: Union[Iterable[Any], torch.Size]) -> int:
160 | result = 1
161 | for num in num_list:
162 | result *= num
163 | return abs(result)
164 |
--------------------------------------------------------------------------------
/boda/models/solov2/architecture_decoupled_solov1.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import itertools
3 | import math
4 | import os
5 | from collections import defaultdict, OrderedDict
6 | from typing import Tuple, List, Dict, Any, Callable, TypeVar, Union, Sequence
7 |
8 | import torch
9 | import torch.nn.functional as F
10 | from torch import nn, Tensor
11 |
12 | from ...base_architecture import Neck, Head, Model
13 | from ...utils.mask import points_nms
14 | from ..backbone_resnet import resnet101, resnet50
15 | from ..neck_fpn import FeaturePyramidNetworks
16 | from .architecture_solov1 import (
17 | InstanceLayer,
18 | CategoryLayer,
19 | Solov1PredictNeck,
20 | Solov1PredictHead,
21 | Solov1Model,
22 | )
23 | from .configuration_solov1 import Solov1Config
24 |
25 |
26 | class DecoupledSolov1PredictHead(Solov1PredictHead):
27 | def __init__(
28 | self,
29 | config: Solov1Config,
30 | in_channels: int = 256,
31 | fpn_channels: int = 256,
32 | num_head_layers: int = 7,
33 | grids: List = [40, 36, 24, 16, 12],
34 | strides: List = [4, 8, 16, 32, 64],
35 | base_edges: List = [16, 32, 64, 128, 256],
36 | scales: List = [[8, 32], [16, 64], [32, 128], [64, 256], [128, 512]],
37 | num_classes: int = 80,
38 | ) -> None:
39 | super().__init__()
40 | self.config = config
41 | self.in_channels = in_channels
42 | self.fpn_channels = fpn_channels
43 | self.num_head_layers = num_head_layers
44 | self.grids = grids
45 | self.strides = strides
46 | self.base_edges = base_edges
47 | self.scales = scales
48 | self.num_classes = num_classes
49 |
50 | self.cate_down_pos = 0
51 |
52 | delattr(self, "instance_layers")
53 |
54 | self.x_instance_layers = nn.ModuleList()
55 | self.y_instance_layers = nn.ModuleList()
56 | self.category_layers = nn.ModuleList()
57 | for i in range(self.num_head_layers):
58 | if i == 0:
59 | in_channels = self.in_channels + 1
60 | else:
61 | in_channels = self.fpn_channels
62 |
63 | self.x_instance_layers.append(
64 | InstanceLayer(
65 | in_channels,
66 | self.fpn_channels,
67 | kernel_size=3,
68 | stride=1,
69 | padding=1,
70 | bias=True,
71 | num_groups=32,
72 | )
73 | )
74 |
75 | self.y_instance_layers.append(
76 | InstanceLayer(
77 | in_channels,
78 | self.fpn_channels,
79 | kernel_size=3,
80 | stride=1,
81 | padding=1,
82 | bias=True,
83 | num_groups=32,
84 | )
85 | )
86 |
87 | if i == 0:
88 | in_channels = self.in_channels
89 | else:
90 | in_channels = self.fpn_channels
91 |
92 | self.category_layers.append(
93 | CategoryLayer(
94 | in_channels,
95 | self.fpn_channels,
96 | kernel_size=3,
97 | stride=1,
98 | padding=1,
99 | bias=True,
100 | num_groups=32,
101 | )
102 | )
103 |
104 | self.x_decoupled_instance_layers = nn.ModuleList()
105 | self.y_decoupled_instance_layers = nn.ModuleList()
106 | self.pred_instance_layers = nn.ModuleList()
107 | for grid in self.grids:
108 | self.x_decoupled_instance_layers.append(
109 | nn.Conv2d(self.fpn_channels, grid, kernel_size=3, padding=1)
110 | )
111 | self.y_decoupled_instance_layers.append(
112 | nn.Conv2d(self.fpn_channels, grid, kernel_size=3, padding=1)
113 | )
114 |
115 | self.pred_category_layer = nn.Conv2d(
116 | self.fpn_channels, self.num_classes - 1, kernel_size=3, padding=1
117 | )
118 |
119 | def forward(self, inputs: List[Tensor]):
120 | inputs = self.split_feature_maps(inputs)
121 | feature_map_sizes = [feature_map.size()[-2:] for feature_map in inputs]
122 | upsampled_size = (feature_map_sizes[0][0] * 2, feature_map_sizes[0][1] * 2)
123 |
124 | pred_masks, pred_labels = self.multi_apply(
125 | self.forward_single,
126 | inputs,
127 | list(range(len(self.grids))),
128 | upsampled_size=upsampled_size,
129 | )
130 |
131 | return pred_masks, pred_labels
132 |
133 | def split_feature_maps(self, inputs: List[Tensor]) -> Tuple[Tensor]:
134 | """
135 | Returns:
136 | """
137 | return (
138 | F.interpolate(
139 | inputs[0],
140 | scale_factor=0.5,
141 | mode="bilinear",
142 | align_corners=False,
143 | recompute_scale_factor=True,
144 | ),
145 | inputs[1],
146 | inputs[2],
147 | inputs[3],
148 | F.interpolate(
149 | inputs[4],
150 | size=inputs[3].shape[-2:],
151 | mode="bilinear",
152 | align_corners=False,
153 | ),
154 | )
155 |
156 | def forward_single(self, inputs, idx, upsampled_size: Tuple = None):
157 | instances = inputs
158 | categories = inputs
159 |
160 | x_range = torch.linspace(-1, 1, instances.shape[-1], device=instances.device)
161 | y_range = torch.linspace(-1, 1, instances.shape[-2], device=categories.device)
162 | y, x = torch.meshgrid(y_range, x_range)
163 | y = y.expand([instances.shape[0], 1, -1, -1])
164 | x = x.expand([instances.shape[0], 1, -1, -1])
165 | coords = torch.cat([x, y], 1)
166 | instances = torch.cat([instances, coords], 1)
167 |
168 | for i, ins_layer in enumerate(self.instance_layers):
169 | instances = ins_layer(instances)
170 |
171 | instances = F.interpolate(
172 | instances, scale_factor=2.0, mode="bilinear", align_corners=False
173 | )
174 | pred_masks = self.pred_instance_layers[idx](instances)
175 |
176 | for i, cate_layer in enumerate(self.category_layers):
177 | if i == self.cate_down_pos:
178 | seg_num_grid = self.grids[idx]
179 | categories = F.interpolate(
180 | categories, size=seg_num_grid, mode="bilinear", align_corners=False
181 | )
182 | categories = cate_layer(categories)
183 |
184 | pred_labels = self.pred_category_layer(categories)
185 |
186 | return pred_masks, pred_labels
187 |
--------------------------------------------------------------------------------
/boda/lib/torchinfo/model_statistics.py:
--------------------------------------------------------------------------------
1 | """ model_statistics.py """
2 | from typing import Any, Dict, Iterable, List, Tuple, Union
3 |
4 | import torch
5 |
6 | from .formatting import FormattingOptions, Verbosity
7 | from .layer_info import LayerInfo, prod
8 |
9 | HEADER_TITLES = {
10 | "kernel_size": "Kernel Shape",
11 | "input_size": "Input Shape",
12 | "output_size": "Output Shape",
13 | "num_params": "Param #",
14 | "mult_adds": "Mult-Adds",
15 | }
16 | CORRECTED_INPUT_SIZE_TYPE = List[Union[Iterable[Any], torch.Size]]
17 |
18 |
19 | class ModelStatistics:
20 | """Class for storing results of the summary."""
21 |
22 | def __init__(
23 | self,
24 | summary_list: List[LayerInfo],
25 | input_size: CORRECTED_INPUT_SIZE_TYPE,
26 | formatting: FormattingOptions,
27 | ):
28 | self.summary_list = summary_list
29 | self.input_size = input_size
30 | self.total_input = sum(prod(sz) for sz in input_size) if input_size else 0
31 | self.formatting = formatting
32 | self.total_params, self.trainable_params = 0, 0
33 | self.total_output, self.total_mult_adds = 0, 0
34 | for layer_info in summary_list:
35 | self.total_mult_adds += layer_info.macs
36 | if not layer_info.is_recursive:
37 | if layer_info.depth == formatting.max_depth or (
38 | not any(layer_info.module.children())
39 | and layer_info.depth < formatting.max_depth
40 | ):
41 | self.total_params += layer_info.num_params
42 | if layer_info.trainable:
43 | self.trainable_params += layer_info.num_params
44 | if layer_info.num_params > 0 and not any(layer_info.module.children()):
45 | # x2 for gradients
46 | self.total_output += 2 * prod(layer_info.output_size)
47 |
48 | def __repr__(self) -> str:
49 | """Print results of the summary."""
50 | header_row = self.formatting.format_row("Layer (type:depth-idx)", HEADER_TITLES)
51 | layer_rows = self.layers_to_str()
52 | divider = "=" * self.formatting.get_total_width()
53 | summary_str = (
54 | "{0}\n{1}{0}\n{2}{0}"
55 | "\nTotal params: {3:,}\n"
56 | "Trainable params: {4:,}\n"
57 | "Non-trainable params: {5:,}\n".format(
58 | divider,
59 | header_row,
60 | layer_rows,
61 | self.total_params,
62 | self.trainable_params,
63 | self.total_params - self.trainable_params,
64 | )
65 | )
66 | if self.input_size:
67 | summary_str += (
68 | "Total mult-adds ({}): {:0.2f}\n"
69 | "{}\n"
70 | "Input size (MB): {:0.2f}\n"
71 | "Forward/backward pass size (MB): {:0.2f}\n"
72 | "Params size (MB): {:0.2f}\n"
73 | "Estimated Total Size (MB): {:0.2f}\n".format(
74 | *self.to_readable(self.total_mult_adds),
75 | divider,
76 | self.to_bytes(self.total_input),
77 | self.to_bytes(self.total_output),
78 | self.to_bytes(self.total_params),
79 | self.to_bytes(
80 | self.total_input + self.total_output + self.total_params
81 | ),
82 | )
83 | )
84 | summary_str += divider
85 | return summary_str
86 |
87 | @staticmethod
88 | def to_bytes(num: int) -> float:
89 | """Converts a number (assume floats, 4 bytes each) to megabytes."""
90 | return num * 4 / 1e6
91 |
92 | @staticmethod
93 | def to_readable(num: int) -> Tuple[str, float]:
94 | """Converts a number to millions, billions, or trillions."""
95 | if num >= 1e12:
96 | return "T", num / 1e12
97 | if num >= 1e9:
98 | return "G", num / 1e9
99 | return "M", num / 1e6
100 |
101 | def layer_info_to_row(
102 | self, layer_info: LayerInfo, reached_max_depth: bool = False
103 | ) -> str:
104 | """Convert layer_info to string representation of a row."""
105 |
106 | def get_start_str(depth: int) -> str:
107 | return "├─" if depth == 1 else "| " * (depth - 1) + "└─"
108 |
109 | row_values = {
110 | "kernel_size": str(layer_info.kernel_size)
111 | if layer_info.kernel_size
112 | else "--",
113 | "input_size": str(layer_info.input_size),
114 | "output_size": str(layer_info.output_size),
115 | "num_params": layer_info.num_params_to_str(reached_max_depth),
116 | "mult_adds": layer_info.macs_to_str(reached_max_depth),
117 | }
118 | depth = layer_info.depth
119 | name = get_start_str(depth) + str(layer_info)
120 | new_line = self.formatting.format_row(name, row_values)
121 | if self.formatting.verbose == Verbosity.VERBOSE.value:
122 | for inner_name, inner_shape in layer_info.inner_layers.items():
123 | prefix = get_start_str(depth + 1)
124 | extra_row_values = {"kernel_size": str(inner_shape)}
125 | new_line += self.formatting.format_row(
126 | prefix + inner_name, extra_row_values
127 | )
128 | return new_line
129 |
130 | def layers_to_str(self) -> str:
131 | """Print each layer of the model using a fancy branching diagram."""
132 | new_str = ""
133 | current_hierarchy: Dict[int, LayerInfo] = {}
134 |
135 | for layer_info in self.summary_list:
136 | if layer_info.depth > self.formatting.max_depth:
137 | continue
138 |
139 | # create full hierarchy of current layer
140 | hierarchy = {}
141 | parent = layer_info.parent_info
142 | while parent is not None and parent.depth > 0:
143 | hierarchy[parent.depth] = parent
144 | parent = parent.parent_info
145 |
146 | # show hierarchy if it is not there already
147 | for d in range(1, layer_info.depth):
148 | if (
149 | d not in current_hierarchy
150 | or current_hierarchy[d].module is not hierarchy[d].module
151 | ):
152 | new_str += self.layer_info_to_row(hierarchy[d])
153 | current_hierarchy[d] = hierarchy[d]
154 |
155 | reached_max_depth = layer_info.depth == self.formatting.max_depth
156 | new_str += self.layer_info_to_row(layer_info, reached_max_depth)
157 | current_hierarchy[layer_info.depth] = layer_info
158 |
159 | # remove deeper hierarchy
160 | d = layer_info.depth + 1
161 | while d in current_hierarchy:
162 | current_hierarchy.pop(d)
163 | d += 1
164 |
165 | return new_str
166 |
--------------------------------------------------------------------------------
/boda/models/ssd/inference_ssd.py:
--------------------------------------------------------------------------------
1 | # import torch
2 | # from torch.autograd import Function
3 | # from ..box_utils import decode, nms
4 | # from data import voc as cfg
5 | # from torchvision.ops import nms
6 |
7 |
8 | # # Adapted from https://github.com/Hakuyume/chainer-ssd
9 | # def decode(loc, priors, variances):
10 | # """Decode locations from predictions using priors to undo
11 | # the encoding we did for offset regression at train time.
12 | # Args:
13 | # loc (tensor): location predictions for loc layers,
14 | # Shape: [num_priors,4]
15 | # priors (tensor): Prior boxes in center-offset form.
16 | # Shape: [num_priors,4].
17 | # variances: (list[float]) Variances of priorboxes
18 | # Return:
19 | # decoded bounding box predictions
20 | # """
21 |
22 | # boxes = torch.cat((
23 | # priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
24 | # priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
25 | # boxes[:, :2] -= boxes[:, 2:] / 2
26 | # boxes[:, 2:] += boxes[:, :2]
27 | # return boxes
28 |
29 |
30 | # def nms(boxes, scores, overlap=0.5, top_k=200):
31 | # """Apply non-maximum suppression at test time to avoid detecting too many
32 | # overlapping bounding boxes for a given object.
33 | # Args:
34 | # boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
35 | # scores: (tensor) The class predscores for the img, Shape:[num_priors].
36 | # overlap: (float) The overlap thresh for suppressing unnecessary boxes.
37 | # top_k: (int) The Maximum number of box preds to consider.
38 | # Return:
39 | # The indices of the kept boxes with respect to num_priors.
40 | # """
41 |
42 | # keep = scores.new(scores.size(0)).zero_().long()
43 | # if boxes.numel() == 0:
44 | # return keep
45 | # x1 = boxes[:, 0]
46 | # y1 = boxes[:, 1]
47 | # x2 = boxes[:, 2]
48 | # y2 = boxes[:, 3]
49 | # area = torch.mul(x2 - x1, y2 - y1)
50 | # v, idx = scores.sort(0) # sort in ascending order
51 | # # I = I[v >= 0.01]
52 | # idx = idx[-top_k:] # indices of the top-k largest vals
53 | # xx1 = boxes.new()
54 | # yy1 = boxes.new()
55 | # xx2 = boxes.new()
56 | # yy2 = boxes.new()
57 | # w = boxes.new()
58 | # h = boxes.new()
59 |
60 | # # keep = torch.Tensor()
61 | # count = 0
62 | # while idx.numel() > 0:
63 | # i = idx[-1] # index of current largest val
64 | # # keep.append(i)
65 | # keep[count] = i
66 | # count += 1
67 | # if idx.size(0) == 1:
68 | # break
69 | # idx = idx[:-1] # remove kept element from view
70 | # # load bboxes of next highest vals
71 | # torch.index_select(x1, 0, idx, out=xx1)
72 | # torch.index_select(y1, 0, idx, out=yy1)
73 | # torch.index_select(x2, 0, idx, out=xx2)
74 | # torch.index_select(y2, 0, idx, out=yy2)
75 | # # store element-wise max with next highest score
76 | # xx1 = torch.clamp(xx1, min=x1[i])
77 | # yy1 = torch.clamp(yy1, min=y1[i])
78 | # xx2 = torch.clamp(xx2, max=x2[i])
79 | # yy2 = torch.clamp(yy2, max=y2[i])
80 | # w.resize_as_(xx2)
81 | # h.resize_as_(yy2)
82 | # w = xx2 - xx1
83 | # h = yy2 - yy1
84 | # # check sizes of xx1 and xx2.. after each iteration
85 | # w = torch.clamp(w, min=0.0)
86 | # h = torch.clamp(h, min=0.0)
87 | # inter = w*h
88 | # # IoU = i / (area(a) + area(b) - i)
89 | # rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
90 | # union = (rem_areas - inter) + area[i]
91 | # IoU = inter/union # store result in iou
92 | # # keep only elements with an IoU <= overlap
93 | # idx = idx[IoU.le(overlap)]
94 | # return keep, count
95 |
96 |
97 | # class Detect(Function):
98 | # """At test time, Detect is the final layer of SSD. Decode location preds,
99 | # apply non-maximum suppression to location predictions based on conf
100 | # scores and threshold to a top_k number of output predictions for both
101 | # confidence score and locations.
102 | # """
103 | # def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh):
104 | # self.num_classes = num_classes
105 | # self.background_label = bkg_label
106 | # self.top_k = top_k
107 | # # Parameters used in nms.
108 | # self.nms_thresh = nms_thresh
109 | # if nms_thresh <= 0:
110 | # raise ValueError('nms_threshold must be non negative.')
111 | # self.conf_thresh = conf_thresh
112 | # self.variance = cfg['variance']
113 |
114 | # def forward(self, boxes, scores, prior_boxes):
115 | # """
116 | # Args:
117 | # boxes (:obj:`Tensor`): [B, N, 4]
118 | # scores (:obj:`Tensor`): [N, C]
119 | # prior_boxes (:obj:`Tensor`): [N, 4]
120 |
121 | # loc_data: (tensor) Loc preds from loc layers
122 | # Shape: [batch, num_priors*4]
123 | # conf_data: (tensor) Shape: Conf preds from conf layers
124 | # Shape: [batch*num_priors,num_classes]
125 | # prior_data: (tensor) Prior boxes and variances from priorbox layers
126 | # Shape: [1,num_priors,4]
127 | # """
128 | # num = boxes.size(0) # batch size
129 | # num_priors = prior_boxes.size(0)
130 | # output = torch.zeros(num, self.num_classes, self.top_k, 5)
131 | # conf_preds = scores.view(num, num_priors, self.num_classes).transpose(2, 1)
132 |
133 | # # Decode predictions into bboxes.
134 | # for i in range(num):
135 | # decoded_boxes = decode(boxes[i], prior_data, self.variance)
136 | # # For each class, perform nms
137 | # conf_scores = conf_preds[i].clone()
138 |
139 | # for cl in range(1, self.num_classes):
140 | # c_mask = conf_scores[cl].gt(self.conf_thresh)
141 | # scores = conf_scores[cl][c_mask]
142 |
143 | # score_mask = scores[i].gt(0.05)
144 |
145 | # score = scores[score_mask]
146 | # index = index[score_mask]
147 |
148 | # if scores.size(0) == 0:
149 | # continue
150 |
151 | # l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
152 | # boxes = decoded_boxes[l_mask].view(-1, 4)
153 | # # idx of highest scoring and non-overlapping boxes per class
154 | # # ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
155 | # # boxes (Tensor[N, 4])) – boxes to perform NMS on. They are expected to be in (x1, y1, x2, y2) format
156 | # # scores (Tensor[N]) – scores for each one of the boxes
157 | # # iou_threshold (float) – discards all overlapping boxes with IoU > iou_threshold
158 | # keep = nms(boxes, scores, self.nms_thresh)
159 | # output[i, cl, :count] = \
160 | # torch.cat((scores[ids[:count]].unsqueeze(1),
161 | # boxes[ids[:count]]), 1)
162 |
163 | # flt = output.contiguous().view(num, -1, 5)
164 | # _, idx = flt[:, :, 0].sort(1, descending=True)
165 | # _, rank = idx.sort(1)
166 | # flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
167 | # return output
168 |
--------------------------------------------------------------------------------
/boda/models/ssd/loss_ssd.py:
--------------------------------------------------------------------------------
1 | # from typing import Tuple, List, Dict
2 |
3 | # import torch
4 | # from torch import nn, Tensor
5 | # import torch.nn.functional as F
6 |
7 | # from ...base_architecture import LossFunction
8 | # from ...ops.box import jaccard, cxywh_to_xyxy
9 | # from ...ops.loss import log_sum_exp
10 |
11 |
12 | # class Matcher:
13 | # """Matcher for SSD
14 |
15 | # Arguments:
16 | # threshold (float):
17 | # variances (List[float]):
18 | # """
19 | # def __init__(
20 | # self,
21 | # threshold: float = 0.5,
22 | # variances: List[float] = [0.1, 0.2]
23 | # ) -> None:
24 | # self.threshold = threshold
25 | # self.variances = variances
26 |
27 | # def __call__(
28 | # self,
29 | # pred_boxes,
30 | # pred_scores,
31 | # pred_priors,
32 | # true_boxes,
33 | # ) -> Tuple[Tensor]:
34 | # """
35 | # Arguments:
36 | # pred_boxes (Tensor): Size([N, ])
37 | # pred_priors (Tensor): default boxes Size([N, 4])
38 | # true_boxes (Tensor): ground truth of bounding boxes Size([N, 4])
39 |
40 | # Returns:
41 | # matched_boxes (Tensor): Size([num_priors, 4])
42 | # matched_scores (Tensor): Size([num_priors])
43 | # """
44 | # overlaps = jaccard(
45 | # true_boxes, cxcywh_to_xyxy(pred_priors))
46 |
47 | # # Best prior for each ground truth
48 | # best_prior_overlaps, best_prior_indexes = overlaps.max(1, keepdim=True)
49 | # best_prior_indexes.squeeze_(1)
50 | # best_prior_overlaps.squeeze_(1)
51 |
52 | # # Best ground truth for each prior boxes (default boxes)
53 | # best_truth_overlaps, best_truth_indexes = overlaps.max(0, keepdim=True)
54 | # best_truth_indexes.squeeze_(0)
55 | # best_truth_overlaps.squeeze_(0)
56 | # best_truth_overlaps.index_fill_(0, best_prior_indexes, 2)
57 |
58 | # # TODO refactor: index best_prior_idx with long tensor
59 | # # Ensure every gt matches with its prior of max overlap
60 | # for j in range(best_prior_indexes.size(0)):
61 | # best_truth_indexes[best_prior_indexes[j]] = j
62 |
63 | # matched_boxes = true_boxes[best_truth_indexes] # Size([N, 4])
64 | # matched_scores = pred_scores[best_truth_indexes] + 1 # Size([N])
65 | # matched_scores[best_truth_overlaps < self.threshold] = 0 # Size([])
66 | # matched_boxes = self.encode(matched_boxes, pred_priors)
67 |
68 | # return matched_boxes, matched_scores
69 |
70 | # def encode(self, matched_boxes, pred_priors):
71 | # """
72 | # Return:
73 | # (Tensor): Size([num_priors, 4])
74 | # """
75 | # gcxcy = (matched_boxes[:, :2] + matched_boxes[:, 2:])/2 - pred_priors[:, :2]
76 | # gcxcy /= (self.variances[0] * pred_priors[:, 2:])
77 | # gwh = (matched_boxes[:, 2:] - matched_boxes[:, :2]) / pred_priors[:, 2:]
78 | # gwh = torch.log(gwh) / self.variances[1]
79 | # return torch.cat([gcxcy, gwh], dim=1)
80 |
81 | # def decode(self, pred_boxes, pred_priors):
82 | # boxes = torch.cat((
83 | # pred_priors[:, :2] + pred_boxes[:, :2] * self.variances[0] * pred_priors[:, 2:],
84 | # pred_priors[:, 2:] * torch.exp(pred_boxes[:, 2:] * self.variances[1])), dim=1)
85 | # boxes[:, :2] -= boxes[:, 2:] / 2
86 | # boxes[:, 2:] += boxes[:, :2]
87 | # return boxes
88 |
89 |
90 | # class SsdLoss(LossFunction):
91 | # def __init__(
92 | # self,
93 | # size,
94 | # overlap_thresh,
95 | # prior_for_matching,
96 | # bkg_label,
97 | # neg_mining,
98 | # neg_pos,
99 | # neg_overlap,
100 | # encode_target,
101 | # variances: List[float] = [0.1, 0.2]
102 | # ) -> None:
103 | # super().__init__()
104 | # self.num_classes = config.num_classes + 1
105 | # self.variances = variances
106 | # self.threshold = overlap_thresh
107 | # self.background_label = bkg_label
108 | # self.encode_target = encode_target
109 | # self.use_prior_for_matching = prior_for_matching
110 | # self.do_neg_mining = neg_mining
111 | # self.negpos_ratio = neg_pos
112 | # self.neg_overlap = neg_overlap
113 |
114 | # def forward(self, inputs, targets):
115 | # """
116 | # """
117 | # self.check_targets(targets)
118 | # targets = self.copy_targets(targets)
119 |
120 | # pred_boxes = inputs['boxes']
121 | # num_boxes = pred_boxes.size(0)
122 | # pred_scores = inputs['scores']
123 | # pred_priors = inputs['priors']
124 | # pred_priors = pred_priors[:pred_boxes.size(1), :]
125 |
126 | # batch_size = len(targets)
127 | # num_priors = pred_priors.size(0)
128 |
129 | # # match priors (default boxes) and ground truth boxes
130 | # matched_true_boxes = pred_boxes.new_tensor(batch_size, num_priors, 4)
131 | # matched_true_scores = pred_boxes.new_tensor(batch_size, num_priors, dtype=torch.int64)
132 |
133 | # for i, target in enumerate(targets):
134 | # true_boxes = target['boxes']
135 | # true_labels = target['labels']
136 | # matched_boxes, matched_scores = Matcher(self.threshold)(
137 | # pred_boxes, pred_priors, true_boxes, true_labels)
138 |
139 | # matched_true_boxes[i] = matched_boxes
140 | # matched_true_scores[i] = matched_scores
141 |
142 | # matched_true_boxes.requires_grad = False
143 | # matched_true_scores.requires_grad = False
144 |
145 | # # TODO: positive_scores or pos_scores
146 | # pos = matched_true_scores > 0
147 | # num_pred_scores = pos.sum(dim=1, keepdim=True)
148 |
149 | # pos_indexes = pos.unsqueeze(pos.dim()).expand_as(pred_boxes)
150 | # matched_pred_boxes = pred_boxes[pos_indexes].view(-1, 4)
151 | # matched_true_boxes = matched_true_boxes[pos_indexes].view(-1, 4)
152 |
153 | # loss_box = F.smooth_l1_loss(
154 | # matched_pred_boxes, matched_true_boxes, size_average=False)
155 |
156 | # # Compute hard negative mining
157 | # pred_scores = pred_scores.view(-1, self.num_classes)
158 | # loss_score = log_sum_exp(pred_scores) - pred_scores.gather(1, matched_true_scores.view(-1, 1))
159 |
160 | # # Hard negative mining
161 | # loss_score[pos] = 0
162 | # loss_score = loss_score.view(num_boxes, -1)
163 |
164 | # _, loss_index = loss_score.sort(1, descending=True)
165 | # _, rank_index = loss_index.sort(1)
166 |
167 | # num_pos = pos.long().sum(1, keepdim=True)
168 | # num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
169 | # neg = rank_index < num_neg.expand_as(rank_index)
170 |
171 | # # Confidence loss including positive and negative samples
172 | # pos_index = pos.unsqueeze(2).expand_as(pred_scores)
173 | # neg_index = neg.unsqueeze(2).expand_as(pred_scores)
174 |
175 | # pred_scores = pred_boxes[(pos_index + neg_index).gt(0)].view(-1, self.num_classes)
176 | # weighted_targets = matched_true_scores[(pos+neg).gt(0)]
177 | # loss_score = F.cross_entropy(pred_scores, weighted_targets, size_average=False)
178 |
179 | # losses = {
180 | # 'loss_bbox': None,
181 | # 'loss_conf': None,
182 | # }
183 |
184 | # return losses
185 |
--------------------------------------------------------------------------------
/boda/models/yolact/README.md:
--------------------------------------------------------------------------------
1 | # YOLACT (You Only Look At CoefficienTs)
2 |
3 | ```
4 | ██╗ ██╗ ██████╗ ██╗ █████╗ ██████╗████████╗
5 | ╚██╗ ██╔╝██╔═══██╗██║ ██╔══██╗██╔════╝╚══██╔══╝
6 | ╚████╔╝ ██║ ██║██║ ███████║██║ ██║
7 | ╚██╔╝ ██║ ██║██║ ██╔══██║██║ ██║
8 | ██║ ╚██████╔╝███████╗██║ ██║╚██████╗ ██║
9 | ╚═╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝ ╚═════╝ ╚═╝
10 | ```
11 |
12 | ## YOLACT Architecture
13 |
14 | ```{bash}
15 | ==============================================================================
16 | Layer (type:depth-idx) Output Shape Param #
17 | ==============================================================================
18 | ├─ResNet: 1-1 [-1, 256, 138, 138] --
19 | | └─Conv2d: 2-1 [-1, 64, 275, 275] 9,408
20 | | └─BatchNorm2d: 2-2 [-1, 64, 275, 275] 128
21 | | └─ReLU: 2-3 [-1, 64, 275, 275] --
22 | | └─MaxPool2d: 2-4 [-1, 64, 138, 138] --
23 | | └─ModuleList: 2 -- --
24 | | | └─Sequential: 3-1 [-1, 256, 138, 138] 215,808
25 | | | └─Sequential: 3-2 [-1, 512, 69, 69] 1,219,584
26 | | | └─Sequential: 3-3 [-1, 1024, 35, 35] 26,090,496
27 | | | └─Sequential: 3-4 [-1, 2048, 18, 18] 14,964,736
28 | ├─YolactPredictNeck: 1-2 [-1, 256, 69, 69] --
29 | | └─ModuleList: 2 -- --
30 | | | └─Conv2d: 3-5 [-1, 256, 18, 18] 524,544
31 | | | └─Conv2d: 3-6 [-1, 256, 35, 35] 262,400
32 | | | └─Conv2d: 3-7 [-1, 256, 69, 69] 131,328
33 | | └─ModuleList: 2 -- --
34 | | | └─Conv2d: 3-8 [-1, 256, 18, 18] 590,080
35 | | | └─Conv2d: 3-9 [-1, 256, 35, 35] 590,080
36 | | | └─Conv2d: 3-10 [-1, 256, 69, 69] 590,080
37 | | └─ModuleList: 2 -- --
38 | | | └─Conv2d: 3-11 [-1, 256, 9, 9] 590,080
39 | | | └─Conv2d: 3-12 [-1, 256, 5, 5] 590,080
40 | ├─YolactPredictHead: 1 -- --
41 | | └─HeadBranch: 2-5 [[-1, 4]] --
42 | | | └─Conv2d: 3-13 [-1, 256, 69, 69] 590,080
43 | | | └─Sequential: 3-14 [-1, 12, 69, 69] 27,660
44 | | | └─Sequential: 3-15 [-1, 96, 69, 69] 221,280
45 | | | └─Sequential: 3-16 [-1, 243, 69, 69] 560,115
46 | | └─HeadBranch: 2-6 [[-1, 4]] --
47 | | └─HeadBranch: 2 -- --
48 | | | └─Conv2d: 3-17 [-1, 256, 35, 35] (recursive)
49 | | | └─Sequential: 3-18 [-1, 12, 35, 35] (recursive)
50 | | | └─Sequential: 3-19 [-1, 96, 35, 35] (recursive)
51 | | | └─Sequential: 3-20 [-1, 243, 35, 35] (recursive)
52 | | └─HeadBranch: 2-7 [[-1, 4]] --
53 | | └─HeadBranch: 2 -- --
54 | | | └─Conv2d: 3-21 [-1, 256, 18, 18] (recursive)
55 | | | └─Sequential: 3-22 [-1, 12, 18, 18] (recursive)
56 | | | └─Sequential: 3-23 [-1, 96, 18, 18] (recursive)
57 | | | └─Sequential: 3-24 [-1, 243, 18, 18] (recursive)
58 | ├─ProtoNet: 1-3 [-1, 32, 138, 138] --
59 | | └─Conv2d: 2-8 [-1, 256, 69, 69] 590,080
60 | | └─Conv2d: 2-9 [-1, 256, 69, 69] 590,080
61 | | └─Conv2d: 2-10 [-1, 256, 69, 69] 590,080
62 | | └─Upsample: 2-11 [-1, 256, 138, 138] --
63 | | └─Conv2d: 2-12 [-1, 256, 138, 138] 590,080
64 | | └─Conv2d: 2-13 [-1, 32, 138, 138] 8,224
65 | ├─SemanticSegmentation: 1-4 [-1, 80, 69, 69] --
66 | | └─Conv2d: 2-14 [-1, 80, 69, 69] 20,560
67 | ==============================================================================
68 | Total params: 50,157,071
69 | Trainable params: 50,157,071
70 | Non-trainable params: 0
71 | Total mult-adds (G): 34.48
72 | ==============================================================================
73 | Input size (MB): 3.46
74 | Forward/backward pass size (MB): 193.40
75 | Params size (MB): 191.33
76 | Estimated Total Size (MB): 388.20
77 | ==============================================================================
78 | ```
79 |
80 | ```{python}
81 | class CocoDataset(Dataset):
82 | def __getitem__(self, index: int) -> Tuple[Tensor, Dict]:
83 | """
84 | Returns:
85 | image (Tensor[C, H, W]): Original size
86 | targets (Dict[str, Any]):
87 | """
88 | return image, {
89 | 'boxes': FloatTensor[N, 4]: [x1, y1, x2, y2],
90 | 'labels': LongTensor[N],
91 | 'masks': ByteTensor[N, H, W],
92 | 'keypoints' FloatTensor[N, K, 3]: [x, y, visibility],
93 | 'area': float,
94 | 'iscrowd': 0 or 1,
95 | 'width': int, # width of an original image
96 | 'height': int, # height of an original image
97 | }
98 | ```
99 |
100 | ```{python}
101 | from boda.models import YolactConfig, YolactModel, YolactLoss
102 |
103 | config = YolactConfig(num_classes=80)
104 | model = YolactModel(config).to('cuda')
105 | criterion = YolactLoss()
106 |
107 | for epoch in range(num_epochs):
108 | for images, targets in train_loader:
109 | outputs = model(images)
110 | losses = criterion(outputs, targets)
111 | loss = sum(loss for loss in losses.values())
112 | ```
113 |
114 | ```{python}
115 | class YolacModel:
116 | def forward(self, images):
117 | if self.training:
118 | # 전처리가 끝난 outputs?
119 | return {
120 | 'boxes': FloatTensor,
121 | 'masks: Tensor
122 | 'scores': FloatTensor,
123 | 'prior_boxes': 'anchors' ???
124 | 'proposals'??
125 | 'proto_masks':??
126 | 'semantic_masks':??
127 | }
128 | else:
129 | # 전처리가 끝난 outputs
130 | return {
131 | 'boxes': Tensor,
132 | 'masks':
133 | 'scores': Tensor,
134 | 'labels': Tensor,
135 | 'keypoints': Tensor,
136 | }
137 | ```
138 |
139 |
140 | ```{python}
141 | outputs = model(images)
142 | outputs
143 |
144 | # SSD
145 | {'boxes', 'scores', 'prior_boxes'}
146 |
147 | # Faster R-CNN
148 | {'boxes', 'proposals', 'scores', 'anchors'}
149 |
150 | # Keypoint R-CNN
151 | {'boxes', 'proposals', 'scores', 'keypoints'}
152 |
153 | # YOLACT
154 | {'boxes', 'masks', 'scores', 'prior_boxes', 'proto_masks', 'semantic_masks'}
155 |
156 | # SOLO
157 | {'category', 'masks'}
158 |
159 | # CenterMask
160 | ```
161 |
162 |
--------------------------------------------------------------------------------
/boda/lib/torchsummary/model_statistics.py:
--------------------------------------------------------------------------------
1 | """ model_statistics.py """
2 | from typing import Any, Dict, Iterable, List, Union
3 |
4 | import numpy as np
5 | import torch
6 |
7 | from .formatting import FormattingOptions, Verbosity
8 | from .layer_info import LayerInfo
9 |
10 | HEADER_TITLES = {
11 | "kernel_size": "Kernel Shape",
12 | "input_size": "Input Shape",
13 | "output_size": "Output Shape",
14 | "num_params": "Param #",
15 | "mult_adds": "Mult-Adds",
16 | }
17 | CORRECTED_INPUT_SIZE_TYPE = List[Union[Iterable[Any], torch.Size]]
18 |
19 |
20 | class ModelStatistics:
21 | """Class for storing results of the summary."""
22 |
23 | def __init__(
24 | self,
25 | summary_list: List[LayerInfo],
26 | input_size: CORRECTED_INPUT_SIZE_TYPE,
27 | formatting: FormattingOptions,
28 | ):
29 | self.summary_list = summary_list
30 | self.input_size = input_size
31 | self.total_input = (
32 | sum(abs(np.prod(sz)) for sz in input_size) if input_size else 0
33 | )
34 | self.formatting = formatting
35 | self.total_params, self.trainable_params = 0, 0
36 | self.total_output, self.total_mult_adds = 0, 0
37 | for layer_info in summary_list:
38 | self.total_mult_adds += layer_info.macs
39 | if not layer_info.is_recursive:
40 | if layer_info.depth == formatting.max_depth or (
41 | not any(layer_info.module.children())
42 | and layer_info.depth < formatting.max_depth
43 | ):
44 | self.total_params += layer_info.num_params
45 | if layer_info.trainable:
46 | self.trainable_params += layer_info.num_params
47 | if layer_info.num_params > 0 and not any(layer_info.module.children()):
48 | # x2 for gradients
49 | self.total_output += 2.0 * abs(np.prod(layer_info.output_size))
50 |
51 | def __repr__(self) -> str:
52 | """Print results of the summary."""
53 | header_row = self.formatting.format_row("Layer (type:depth-idx)", HEADER_TITLES)
54 | layer_rows = self.layers_to_str()
55 | divider = "=" * self.formatting.get_total_width()
56 | summary_str = (
57 | "{0}\n{1}{0}\n{2}{0}"
58 | "\nTotal params: {3:,}\n"
59 | "Trainable params: {4:,}\n"
60 | "Non-trainable params: {5:,}\n".format(
61 | divider,
62 | header_row,
63 | layer_rows,
64 | self.total_params,
65 | self.trainable_params,
66 | self.total_params - self.trainable_params,
67 | )
68 | )
69 | if self.input_size:
70 | summary_str += (
71 | "Total mult-adds ({}): {:0.2f}\n"
72 | "{}\n"
73 | "Input size (MB): {:0.2f}\n"
74 | "Forward/backward pass size (MB): {:0.2f}\n"
75 | "Params size (MB): {:0.2f}\n"
76 | "Estimated Total Size (MB): {:0.2f}\n".format(
77 | "G" if self.total_mult_adds >= 1e9 else "M",
78 | self.to_readable(self.total_mult_adds),
79 | divider,
80 | self.to_bytes(self.total_input),
81 | self.to_bytes(self.total_output),
82 | self.to_bytes(self.total_params),
83 | self.to_bytes(
84 | self.total_input + self.total_output + self.total_params
85 | ),
86 | )
87 | )
88 | summary_str += divider
89 | return summary_str
90 |
91 | @staticmethod
92 | def to_bytes(num: int) -> float:
93 | """Converts a number (assume floats, 4 bytes each) to megabytes."""
94 | return num * 4 / (1024 ** 2)
95 |
96 | @staticmethod
97 | def to_readable(num: int) -> float:
98 | """Converts a number to millions or billions."""
99 | if num >= 1e9:
100 | return num / 1e9
101 | return num / 1e6
102 |
103 | def layer_info_to_row(
104 | self, layer_info: LayerInfo, reached_max_depth: bool = False
105 | ) -> str:
106 | """Convert layer_info to string representation of a row."""
107 |
108 | def get_start_str(depth: int) -> str:
109 | return "├─" if depth == 1 else "| " * (depth - 1) + "└─"
110 |
111 | row_values = {
112 | "kernel_size": str(layer_info.kernel_size)
113 | if layer_info.kernel_size
114 | else "--",
115 | "input_size": str(layer_info.input_size),
116 | "output_size": str(layer_info.output_size),
117 | "num_params": layer_info.num_params_to_str(reached_max_depth),
118 | "mult_adds": layer_info.macs_to_str(reached_max_depth),
119 | }
120 | depth = layer_info.depth
121 | name = (get_start_str(depth) if self.formatting.use_branching else "") + str(
122 | layer_info
123 | )
124 | new_line = self.formatting.format_row(name, row_values)
125 | if self.formatting.verbose == Verbosity.VERBOSE.value:
126 | for inner_name, inner_shape in layer_info.inner_layers.items():
127 | prefix = (
128 | get_start_str(depth + 1) if self.formatting.use_branching else " "
129 | )
130 | extra_row_values = {"kernel_size": str(inner_shape)}
131 | new_line += self.formatting.format_row(
132 | prefix + inner_name, extra_row_values
133 | )
134 | return new_line
135 |
136 | def layers_to_str(self) -> str:
137 | """Print each layer of the model as tree or as a list."""
138 | if self.formatting.use_branching:
139 | return self._layer_tree_to_str()
140 |
141 | layer_rows = ""
142 | for layer_info in self.summary_list:
143 | layer_rows += self.layer_info_to_row(layer_info)
144 | return layer_rows
145 |
146 | def _layer_tree_to_str(self) -> str:
147 | """Print each layer of the model using a fancy branching diagram."""
148 | new_str = ""
149 | current_hierarchy: Dict[int, LayerInfo] = {}
150 |
151 | for layer_info in self.summary_list:
152 | if layer_info.depth > self.formatting.max_depth:
153 | continue
154 |
155 | # create full hierarchy of current layer
156 | hierarchy = {}
157 | parent = layer_info.parent_info
158 | while parent is not None and parent.depth > 0:
159 | hierarchy[parent.depth] = parent
160 | parent = parent.parent_info
161 |
162 | # show hierarchy if it is not there already
163 | for d in range(1, layer_info.depth):
164 | if (
165 | d not in current_hierarchy
166 | or current_hierarchy[d].module is not hierarchy[d].module
167 | ):
168 | new_str += self.layer_info_to_row(hierarchy[d])
169 | current_hierarchy[d] = hierarchy[d]
170 |
171 | reached_max_depth = layer_info.depth == self.formatting.max_depth
172 | new_str += self.layer_info_to_row(layer_info, reached_max_depth)
173 | current_hierarchy[layer_info.depth] = layer_info
174 |
175 | # remove deeper hierarchy
176 | d = layer_info.depth + 1
177 | while d in current_hierarchy:
178 | current_hierarchy.pop(d)
179 | d += 1
180 |
181 | return new_str
182 |
--------------------------------------------------------------------------------
/boda/custom_modules.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import math
3 | import re
4 | from functools import partial
5 |
6 | import torch
7 | from torch import nn
8 | from torch.nn import functional as F
9 |
10 |
11 | class Conv2dDynamicSamePadding(nn.Conv2d):
12 | """
13 | Adapted from:
14 | https://github.com/lukemelas/EfficientNet-PyTorch
15 | https://github.com/rwightman/pytorch-image-models
16 |
17 | 2D Convolutions like TensorFlow, for a dynamic image size.
18 | The padding is operated in forward function by calculating dynamically.
19 |
20 | Tips for 'SAME' mode padding.
21 | Given the following:
22 | i: width or height
23 | s: stride
24 | k: kernel size
25 | d: dilation
26 | p: padding
27 | Output after Conv2d:
28 | o = floor((i+p-((k-1)*d+1))/s+1)
29 | If o equals i, i = floor((i+p-((k-1)*d+1))/s+1),
30 | => p = (i-1)*s+((k-1)*d+1)-i
31 | """
32 |
33 | def __init__(
34 | self,
35 | in_channels: int,
36 | out_channels: int,
37 | kernel_size: int,
38 | stride: int = 1,
39 | dilation: int = 1,
40 | groups: int = 1,
41 | bias: bool = True,
42 | ) -> None:
43 | super().__init__(
44 | in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias
45 | )
46 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
47 |
48 | def forward(self, x):
49 | ih, iw = x.size()[-2:]
50 | kh, kw = self.weight.size()[-2:]
51 | sh, sw = self.stride
52 | oh, ow = math.ceil(ih / sh), math.ceil(
53 | iw / sw
54 | ) # change the output size according to stride
55 |
56 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
57 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
58 | if pad_h > 0 or pad_w > 0:
59 | x = F.pad(
60 | x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]
61 | )
62 |
63 | return F.conv2d(
64 | x,
65 | self.weight,
66 | self.bias,
67 | self.stride,
68 | self.padding,
69 | self.dilation,
70 | self.groups,
71 | )
72 |
73 |
74 | class Conv2dStaticSamePadding(nn.Conv2d):
75 | """
76 | 2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size.
77 | The padding mudule is calculated in construction function, then used in forward.
78 | """
79 |
80 | def __init__(
81 | self,
82 | in_channels,
83 | out_channels,
84 | kernel_size,
85 | stride=1,
86 | image_size=None,
87 | **kwargs
88 | ) -> None:
89 | super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs)
90 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
91 | assert image_size is not None
92 |
93 | ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
94 | kh, kw = self.weight.size()[-2:]
95 | sh, sw = self.stride
96 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
97 |
98 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
99 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
100 | if pad_h > 0 or pad_w > 0:
101 | self.static_padding = nn.ZeroPad2d(
102 | (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)
103 | )
104 | else:
105 | self.static_padding = nn.Identity()
106 |
107 | def forward(self, x):
108 | x = self.static_padding(x)
109 | x = F.conv2d(
110 | x,
111 | self.weight,
112 | self.bias,
113 | self.stride,
114 | self.padding,
115 | self.dilation,
116 | self.groups,
117 | )
118 | return x
119 |
120 |
121 | class MaxPool2dDynamicSamePadding(nn.MaxPool2d):
122 | """
123 | 2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size.
124 | The padding is operated in forward function by calculating dynamically.
125 | """
126 |
127 | def __init__(
128 | self,
129 | kernel_size,
130 | stride,
131 | padding=0,
132 | dilation=1,
133 | return_indices=False,
134 | ceil_mode=False,
135 | ) -> None:
136 | super().__init__(
137 | kernel_size, stride, padding, dilation, return_indices, ceil_mode
138 | )
139 | self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
140 | self.kernel_size = (
141 | [self.kernel_size] * 2
142 | if isinstance(self.kernel_size, int)
143 | else self.kernel_size
144 | )
145 | self.dilation = (
146 | [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
147 | )
148 |
149 | def forward(self, x):
150 | ih, iw = x.size()[-2:]
151 | kh, kw = self.kernel_size
152 | sh, sw = self.stride
153 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
154 |
155 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
156 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
157 | if pad_h > 0 or pad_w > 0:
158 | x = F.pad(
159 | x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]
160 | )
161 |
162 | return F.max_pool2d(
163 | x,
164 | self.kernel_size,
165 | self.stride,
166 | self.padding,
167 | self.dilation,
168 | self.ceil_mode,
169 | self.return_indices,
170 | )
171 |
172 |
173 | class MaxPool2dStaticSamePadding(nn.MaxPool2d):
174 | """
175 | 2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size.
176 | The padding mudule is calculated in construction function, then used in forward.
177 | """
178 |
179 | def __init__(self, kernel_size, stride, image_size=None, **kwargs) -> None:
180 | super().__init__(kernel_size, stride, **kwargs)
181 | self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
182 | self.kernel_size = (
183 | [self.kernel_size] * 2
184 | if isinstance(self.kernel_size, int)
185 | else self.kernel_size
186 | )
187 | self.dilation = (
188 | [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
189 | )
190 | assert image_size is not None
191 |
192 | ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
193 | kh, kw = self.kernel_size
194 | sh, sw = self.stride
195 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
196 |
197 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
198 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
199 | if pad_h > 0 or pad_w > 0:
200 | self.static_padding = nn.ZeroPad2d(
201 | (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)
202 | )
203 | else:
204 | self.static_padding = nn.Identity()
205 |
206 | def forward(self, x):
207 | x = self.static_padding(x)
208 | x = F.max_pool2d(
209 | x,
210 | self.kernel_size,
211 | self.stride,
212 | self.padding,
213 | self.dilation,
214 | self.ceil_mode,
215 | self.return_indices,
216 | )
217 | return x
218 |
--------------------------------------------------------------------------------
/boda/models/feature_extractor/resnet.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple, List, Optional, Callable
2 |
3 | import torch
4 | import torch.nn.functional as F
5 | from torch import nn, Tensor
6 |
7 |
8 | # TODO: BACKBONE_ARCHIVE_MAP or _MAPS? or ARCHIVES?
9 | BACKBONE_ARCHIVE_MAP = {
10 | "resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth",
11 | "resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth",
12 | "resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth",
13 | "resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth",
14 | }
15 |
16 |
17 | class Conv2d1x1(nn.Sequential):
18 | """1x1 convolution"""
19 |
20 | def __init__(
21 | self,
22 | in_planes: int,
23 | out_planes: int,
24 | stride: int = 1,
25 | ) -> None:
26 | super().__init__(
27 | nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
28 | )
29 |
30 |
31 | class Conv2d3x3(nn.Sequential):
32 | """3x3 convolution with padding"""
33 |
34 | def __init__(
35 | self,
36 | in_planes: int,
37 | out_planes: int,
38 | stride: int = 1,
39 | groups: int = 1,
40 | dilation: int = 1,
41 | ) -> None:
42 | super().__init__(
43 | nn.Conv2d(
44 | in_planes,
45 | out_planes,
46 | kernel_size=3,
47 | stride=stride,
48 | padding=dilation,
49 | groups=groups,
50 | bias=False,
51 | dilation=dilation,
52 | )
53 | )
54 |
55 |
56 | class BasicBlock(nn.Module):
57 | expansion: int = 1
58 |
59 | def __init__(
60 | self,
61 | inplanes: int,
62 | planes: int,
63 | stride: int = 1,
64 | downsample: Optional[nn.Module] = None,
65 | groups: int = 1,
66 | base_width: int = 64,
67 | dilation: int = 1,
68 | norm_layer: Optional[Callable[..., nn.Module]] = None,
69 | ) -> None:
70 | super().__init__()
71 | if norm_layer is None:
72 | norm_layer = nn.BatchNorm2d # (track_running_stats=False)
73 |
74 | if groups != 1 or base_width != 64:
75 | raise ValueError("BasicBlock only supports groups=1 and base_width=64")
76 | if dilation > 1:
77 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
78 |
79 | self.conv1 = Conv2d3x3(inplanes, planes, stride)
80 | self.bn1 = norm_layer(planes)
81 | self.relu = nn.ReLU(inplace=True)
82 | self.conv2 = Conv2d3x3(planes, planes)
83 | self.bn2 = norm_layer(planes)
84 | self.downsample = downsample
85 | self.stride = stride
86 |
87 | def forward(self, x: Tensor) -> Tensor:
88 | identity = x
89 |
90 | out = self.conv1(x)
91 | out = self.bn1(out)
92 | out = self.relu(out)
93 |
94 | out = self.conv2(out)
95 | out = self.bn2(out)
96 |
97 | if self.downsample is not None:
98 | identity = self.downsample(x)
99 |
100 | out += identity
101 | out = self.relu(out)
102 |
103 | return out
104 |
105 |
106 | class Bottleneck(nn.Module):
107 | expansion = 4
108 |
109 | def __init__(
110 | self,
111 | in_planes: int,
112 | planes: int,
113 | stride: int = 1,
114 | downsample: Optional[nn.Module] = None,
115 | norm_layer: Optional[Callable[..., nn.Module]] = None,
116 | ) -> None:
117 | super().__init__()
118 | if norm_layer is None:
119 | norm_layer = nn.BatchNorm2d
120 |
121 | self.conv1 = Conv2d1x1(in_planes, planes)
122 | self.bn1 = norm_layer(planes)
123 |
124 | self.conv2 = Conv2d3x3(planes, planes, stride=stride)
125 | self.bn2 = norm_layer(planes)
126 |
127 | self.conv3 = Conv2d1x1(planes, planes * self.expansion)
128 | self.bn3 = norm_layer(planes * self.expansion)
129 |
130 | self.downsample = downsample
131 | self.stride = stride
132 |
133 | def forward(self, inputs) -> Tensor:
134 | residual = inputs
135 |
136 | outputs = F.relu(self.bn1(self.conv1(inputs)), inplace=True)
137 | outputs = F.relu(self.bn2(self.conv2(outputs)), inplace=True)
138 | outputs = self.bn3(self.conv3(outputs))
139 |
140 | if self.downsample is not None:
141 | residual = self.downsample(inputs)
142 |
143 | outputs += residual
144 | outputs = F.relu(outputs, inplace=True)
145 |
146 | return outputs
147 |
148 |
149 | class ResNet(nn.Module):
150 | def __init__(self, layers, block=Bottleneck):
151 | super().__init__()
152 | self.num_base_layers = len(layers)
153 | self.layers = nn.ModuleList()
154 | self.channels = []
155 |
156 | self.inplanes = 64
157 |
158 | # TODO self.stem = nn.Sequential() ??
159 | self.conv = nn.Conv2d(
160 | 3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False
161 | )
162 | self.bn = nn.BatchNorm2d(self.inplanes)
163 | self.relu = nn.ReLU(inplace=True)
164 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
165 |
166 | # TODO self._make_stage ???
167 | self._make_layer(block, 64, layers[0])
168 | self._make_layer(block, 128, layers[1], stride=2)
169 | self._make_layer(block, 256, layers[2], stride=2)
170 | self._make_layer(block, 512, layers[3], stride=2)
171 |
172 | # self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)]
173 | # self.backbone_modules = [m for m in self.modules()]
174 |
175 | def _make_layer(self, block, planes, blocks, stride=1):
176 | downsample = None
177 | if stride != 1 or self.inplanes != planes * block.expansion:
178 | downsample = nn.Sequential(
179 | Conv2d1x1(
180 | self.inplanes,
181 | planes * block.expansion,
182 | stride=stride,
183 | ),
184 | nn.BatchNorm2d(planes * block.expansion),
185 | )
186 |
187 | layers = [block(self.inplanes, planes, stride, downsample)]
188 | self.inplanes = planes * block.expansion
189 |
190 | # Add identity block
191 | for _ in range(1, blocks):
192 | layers.append(block(self.inplanes, planes))
193 |
194 | # layer = nn.Sequential(*layers)
195 |
196 | self.channels.append(planes * block.expansion)
197 | self.layers.append(nn.Sequential(*layers))
198 |
199 | def forward(self, inputs):
200 | inputs = self.conv(inputs)
201 | inputs = self.bn(inputs)
202 | inputs = self.relu(inputs)
203 | inputs = self.maxpool(inputs)
204 |
205 | outputs = []
206 | for layer in self.layers:
207 | inputs = layer(inputs)
208 | outputs.append(inputs)
209 |
210 | return outputs
211 |
212 | def add_layer(self, conv_channels=1024, downsample=2, depth=1, block=Bottleneck):
213 | self._make_layer(
214 | block, conv_channels // block.expansion, blocks=depth, stride=downsample
215 | )
216 |
217 | def from_pretrained(self, path):
218 | state_dict = torch.load(path)
219 |
220 | try:
221 | state_dict.pop("fc.weight")
222 | state_dict.pop("fc.bias")
223 | except KeyError:
224 | pass
225 |
226 | keys = list(state_dict)
227 | for key in keys:
228 | if key.startswith("layer"):
229 | idx = int(key[5])
230 | new_key = "layers." + str(idx - 1) + key[6:]
231 | state_dict[new_key] = state_dict.pop(key)
232 |
233 | self.load_state_dict(state_dict, strict=False)
234 |
235 |
236 | def resnet18():
237 | backbone = ResNet([2, 2, 2, 2], BasicBlock)
238 | return backbone
239 |
240 |
241 | def resnet34():
242 | backbone = ResNet([3, 4, 6, 3], BasicBlock)
243 | print(backbone.channels)
244 | return backbone
245 |
246 |
247 | def resnet50(pretrained: bool = False):
248 | backbone = ResNet([3, 4, 6, 3], Bottleneck)
249 | return backbone
250 |
251 |
252 | def resnet101(pretrained: bool = False):
253 | backbone = ResNet([3, 4, 23, 3], Bottleneck)
254 | return backbone
255 |
--------------------------------------------------------------------------------
/boda/models/solov2/inference_solov1.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 |
4 |
5 | def matrix_nms(
6 | seg_masks, cate_labels, cate_scores, kernel="gaussian", sigma=2.0, sum_masks=None
7 | ):
8 | """Matrix NMS for multi-class masks.
9 |
10 | Args:
11 | seg_masks (Tensor): shape (n, h, w)
12 | cate_labels (Tensor): shape (n), mask labels in descending order
13 | cate_scores (Tensor): shape (n), mask scores in descending order
14 | kernel (str): 'linear' or 'gauss'
15 | sigma (float): std in gaussian method
16 | sum_masks (Tensor): The sum of seg_masks
17 |
18 | Returns:
19 | Tensor: cate_scores_update, tensors of shape (n)
20 | """
21 | n_samples = len(cate_labels)
22 | if n_samples == 0:
23 | return []
24 | if sum_masks is None:
25 | sum_masks = seg_masks.sum((1, 2)).float()
26 | seg_masks = seg_masks.reshape(n_samples, -1).float()
27 | # inter.
28 | inter_matrix = torch.mm(seg_masks, seg_masks.transpose(1, 0))
29 | # union.
30 | sum_masks_x = sum_masks.expand(n_samples, n_samples)
31 | # iou.
32 | iou_matrix = (
33 | inter_matrix / (sum_masks_x + sum_masks_x.transpose(1, 0) - inter_matrix)
34 | ).triu(diagonal=1)
35 | # label_specific matrix.
36 | cate_labels_x = cate_labels.expand(n_samples, n_samples)
37 | label_matrix = (
38 | (cate_labels_x == cate_labels_x.transpose(1, 0)).float().triu(diagonal=1)
39 | )
40 |
41 | # IoU compensation
42 | compensate_iou, _ = (iou_matrix * label_matrix).max(0)
43 | compensate_iou = compensate_iou.expand(n_samples, n_samples).transpose(1, 0)
44 |
45 | # IoU decay
46 | decay_iou = iou_matrix * label_matrix
47 |
48 | # matrix nms
49 | if kernel == "gaussian":
50 | decay_matrix = torch.exp(-1 * sigma * (decay_iou ** 2))
51 | compensate_matrix = torch.exp(-1 * sigma * (compensate_iou ** 2))
52 | decay_coefficient, _ = (decay_matrix / compensate_matrix).min(0)
53 | elif kernel == "linear":
54 | decay_matrix = (1 - decay_iou) / (1 - compensate_iou)
55 | decay_coefficient, _ = decay_matrix.min(0)
56 | else:
57 | raise NotImplementedError
58 |
59 | # update the score.
60 | cate_scores_update = cate_scores * decay_coefficient
61 | return cate_scores_update
62 |
63 |
64 | def get_seg(seg_preds, cate_preds, img_metas=[1]):
65 | assert len(seg_preds) == len(cate_preds)
66 |
67 | num_levels = len(cate_preds)
68 | featmap_size = seg_preds[0].size()[-2:]
69 |
70 | result_list = []
71 | for img_id in range(len(img_metas)):
72 | cate_pred_list = [
73 | cate_preds[i][img_id].view(-1, 80).detach()
74 | for i in range(num_levels)
75 | # cate_preds[i][img_id].view(-1, self.cate_out_channels).detach() for i in range(num_levels)
76 | ]
77 | seg_pred_list = [seg_preds[i][img_id].detach() for i in range(num_levels)]
78 |
79 | # img_shape = img_metas[img_id]['img_shape']
80 | # scale_factor = img_metas[img_id]['scale_factor']
81 | # ori_shape = img_metas[img_id]['ori_shape']
82 | size = (1333, 800, 3)
83 | # size = (800, 1333, 3)
84 | img_shape = size
85 | ori_shape = size
86 |
87 | cate_pred_list = torch.cat(cate_pred_list, dim=0)
88 | seg_pred_list = torch.cat(seg_pred_list, dim=0)
89 |
90 | result = get_seg_single(
91 | cate_pred_list, seg_pred_list, featmap_size, img_shape, ori_shape
92 | )
93 |
94 | result_list.append(result)
95 |
96 | return result_list
97 |
98 |
99 | def get_seg_single(cate_preds, seg_preds, featmap_size, img_shape, ori_shape):
100 | assert len(cate_preds) == len(seg_preds)
101 |
102 | # test_seg_masks = seg_preds > 0.5 # cfg.mask_thr
103 | # test_masks = test_seg_masks.detach().cpu().numpy()[0] * 255
104 | # print(test_masks.shape)
105 | # import cv2
106 | # cv2.imwrite('solo-test12.jpg', test_masks)
107 |
108 | # overall info.
109 | h, w, _ = img_shape
110 | upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4)
111 |
112 | # process.
113 | inds = cate_preds > 0.1 # cfg.score_thr
114 | # category scores.
115 | cate_scores = cate_preds[inds]
116 | if len(cate_scores) == 0:
117 | return None
118 | # category labels.
119 | # inds = inds.nonzero()
120 | inds = inds.nonzero()
121 | # print(inds.nonzero())
122 | cate_labels = inds[:, 1]
123 |
124 | # strides.
125 | # size_trans = cate_labels.new_tensor(self.seg_num_grids).pow(2).cumsum(0)
126 | size_trans = cate_labels.new_tensor([40, 36, 24, 16, 12]).pow(2).cumsum(0)
127 | strides = cate_scores.new_ones(size_trans[-1])
128 | n_stage = len([40, 36, 24, 16, 12]) # len(self.seg_num_grids)
129 | strides[: size_trans[0]] *= (4, 8, 16, 32, 64)[0] # self.strides[0]
130 | for ind_ in range(1, n_stage):
131 | # strides[size_trans[ind_ - 1]:size_trans[ind_]] *= self.strides[ind_]
132 | strides[size_trans[ind_ - 1] : size_trans[ind_]] *= (4, 8, 16, 32, 64)[ind_]
133 | strides = strides[inds[:, 0]]
134 |
135 | # masks.
136 | seg_preds = seg_preds[inds[:, 0]]
137 | seg_masks = seg_preds > 0.5 # cfg.mask_thr
138 | sum_masks = seg_masks.sum((1, 2)).float()
139 |
140 | # filter.
141 | keep = sum_masks > strides
142 | if keep.sum() == 0:
143 | return None
144 |
145 | seg_masks = seg_masks[keep, ...]
146 | seg_preds = seg_preds[keep, ...]
147 | sum_masks = sum_masks[keep]
148 | cate_scores = cate_scores[keep]
149 | cate_labels = cate_labels[keep]
150 |
151 | # print('#'*50)
152 | # print(seg_masks.size())
153 | test_seg_masks = seg_masks > 0.5 # cfg.mask_thr
154 | test_masks = test_seg_masks.detach().cpu().numpy()[0] * 255
155 | print(test_masks.shape)
156 | # test_masks = test_masks.transpose(1, 2, 0)
157 | import cv2
158 |
159 | cv2.imwrite("solo-test11.jpg", test_masks)
160 |
161 | # maskness.
162 | seg_scores = (seg_preds * seg_masks.float()).sum((1, 2)) / sum_masks
163 | cate_scores *= seg_scores
164 |
165 | # sort and keep top nms_pre
166 | sort_inds = torch.argsort(cate_scores, descending=True)
167 | if len(sort_inds) > 500: # cfg.nms_pre
168 | sort_inds = sort_inds[:500] # [:cfg.nms_pre]
169 | seg_masks = seg_masks[sort_inds, :, :]
170 | seg_preds = seg_preds[sort_inds, :, :]
171 | sum_masks = sum_masks[sort_inds]
172 | cate_scores = cate_scores[sort_inds]
173 | cate_labels = cate_labels[sort_inds]
174 |
175 | # Matrix NMS
176 | cate_scores = matrix_nms(
177 | seg_masks,
178 | cate_labels,
179 | cate_scores,
180 | kernel="gaussian",
181 | sigma=2.0,
182 | sum_masks=sum_masks,
183 | )
184 |
185 | # filter.
186 | keep = cate_scores >= 0.05 # cfg.update_thr
187 | if keep.sum() == 0:
188 | return None
189 | seg_preds = seg_preds[keep, :, :]
190 | cate_scores = cate_scores[keep]
191 | cate_labels = cate_labels[keep]
192 |
193 | # sort and keep top_k
194 | sort_inds = torch.argsort(cate_scores, descending=True)
195 | if len(sort_inds) > 100: # cfg.max_per_img:
196 | sort_inds = sort_inds[:100] # [:cfg.max_per_img]
197 | seg_preds = seg_preds[sort_inds, :, :]
198 | cate_scores = cate_scores[sort_inds]
199 | cate_labels = cate_labels[sort_inds]
200 |
201 | print(seg_preds.size())
202 | print(upsampled_size_out)
203 | seg_preds = F.interpolate(
204 | seg_preds.unsqueeze(0), size=upsampled_size_out, mode="bilinear"
205 | ) # [:, :, :h, :w]
206 |
207 | # seg_masks = F.interpolate(
208 | # seg_preds, size=ori_shape[:2], mode='bilinear').squeeze(0)
209 | size = (1333, 800)
210 | # size = (800, 1333)
211 | seg_masks = F.interpolate(seg_preds, size=size, mode="bilinear").squeeze(0)
212 |
213 | print("#" * 50)
214 | print(seg_masks.size())
215 | seg_masks = seg_masks > 0.5 # cfg.mask_thr
216 |
217 | test_masks = seg_masks.detach().cpu().numpy()[0] * 255
218 | print(test_masks.shape)
219 | # test_masks = test_masks.transpose(1, 2, 0)
220 | print(test_masks.shape)
221 | import cv2
222 |
223 | # test_masks = cv2.flip(test_masks, 1)
224 | # test_masks = cv2.rotate(test_masks, cv2.ROTATE_90_COUNTERCLOCKWISE)
225 | # print(test_masks.shape)
226 | # test_masks = cv2.resize(test_masks, (1333, 800), cv2.INTER_AREA)
227 | # print(test_masks.shape)
228 | cv2.imwrite("solo-test1.jpg", test_masks)
229 |
230 | return seg_masks, cate_labels, cate_scores
231 |
--------------------------------------------------------------------------------
/boda/postprocessing.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from typing import Tuple, List, Dict
3 |
4 | import torch
5 | import torch.nn.functional as F
6 | from torch import Tensor
7 | from torchvision.ops import batched_nms
8 |
9 |
10 | def decode(boxes: Tensor, prior_boxes: Tensor, variances: List[float] = [0.1, 0.2]):
11 | """Decode locations from predictions using priors to undo
12 | the encoding we did for offset regression at train time.
13 |
14 | https://github.com/Hakuyume/chainer-ssd
15 |
16 | Args:
17 | loc (tensor): location predictions for loc layers,
18 | Shape: [num_priors, 4]
19 | priors (tensor): Prior boxes in center-offset form.
20 | Shape: [num_priors, 4].
21 | variances: (`List[float]`) Variances of priorboxes
22 | Return:
23 | decoded bounding box predictions
24 | """
25 | boxes = torch.cat(
26 | (
27 | prior_boxes[:, :2] + boxes[:, :2] * variances[0] * prior_boxes[:, 2:],
28 | prior_boxes[:, 2:] * torch.exp(boxes[:, 2:] * variances[1]),
29 | ),
30 | dim=1,
31 | )
32 | boxes[:, :2] -= boxes[:, 2:] / 2
33 | boxes[:, 2:] += boxes[:, :2]
34 |
35 | return boxes
36 |
37 |
38 | def sanitize_coordinates(
39 | _x1, _x2, img_size: int, padding: int = 0, cast: bool = True
40 | ) -> Tuple[Tensor, Tensor]:
41 | """
42 | Sanitizes the input coordinates so that x1 < x2, x1 != x2, x1 >= 0, and x2 <= image_size.
43 | Also converts from relative to absolute coordinates and casts the results to long tensors.
44 | If cast is false, the result won't be cast to longs.
45 | Warning: this does things in-place behind the scenes so copy if necessary.
46 | """
47 | _x1 = _x1 * img_size
48 | _x2 = _x2 * img_size
49 | if cast:
50 | _x1 = _x1.long()
51 | _x2 = _x2.long()
52 | x1 = torch.min(_x1, _x2)
53 | x2 = torch.max(_x1, _x2)
54 | x1 = torch.clamp(x1 - padding, min=0)
55 | x2 = torch.clamp(x2 + padding, max=img_size)
56 |
57 | return x1, x2
58 |
59 |
60 | def crop(masks, boxes, padding: int = 1) -> Tensor:
61 | """
62 | "Crop" predicted masks by zeroing out everything not in the predicted bbox.
63 | Vectorized by Chong (thanks Chong).
64 | Args:
65 | # TODO: torchvision mask rcnn masks UInt8Tensor[N, H, W]
66 | # TODO: torchvision boxes FloatTensor[N, 4]
67 | - masks should be a size [h, w, n] tensor of masks
68 | - boxes should be a size [n, 4] tensor of bbox coords in relative point form
69 | """
70 | h, w, n = masks.size()
71 | x1, x2 = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, padding, cast=False)
72 | y1, y2 = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, padding, cast=False)
73 |
74 | rows = (
75 | torch.arange(w, device=masks.device, dtype=x1.dtype)
76 | .view(1, -1, 1)
77 | .expand(h, w, n)
78 | )
79 | cols = (
80 | torch.arange(h, device=masks.device, dtype=x1.dtype)
81 | .view(-1, 1, 1)
82 | .expand(h, w, n)
83 | )
84 |
85 | masks_left = rows >= x1.view(1, 1, -1)
86 | masks_right = rows < x2.view(1, 1, -1)
87 | masks_up = cols >= y1.view(1, 1, -1)
88 | masks_down = cols < y2.view(1, 1, -1)
89 |
90 | crop_mask = masks_left * masks_right * masks_up * masks_down
91 |
92 | return masks * crop_mask.float()
93 |
94 |
95 | class PostprocessOutputs:
96 | def __init__(
97 | self,
98 | num_classes: int = 80,
99 | top_k: int = 10,
100 | nms_threshold: float = 0.3,
101 | score_threshold: float = 0.2,
102 | ) -> None:
103 | """
104 | Args:
105 | num_classes (int)
106 | top_k
107 | nms_threshold
108 | score_threshold
109 | nms ()
110 | """
111 | self.config = None
112 | self.num_classes = num_classes + 1
113 | self.background_label = 0
114 | self.top_k = top_k
115 | self.nms_threshold = 0.5
116 | self.score_threshold = 0.2
117 |
118 | self.nms = batched_nms
119 | # if self.nms is None:
120 | # self.nms = fast_nms
121 |
122 | def __call__(
123 | self, preds: Dict[str, Tensor], image_sizes: List[Tuple[int]]
124 | ) -> List[Dict[str, Tensor]]:
125 | """ """
126 | pred_boxes = None
127 | pred_scores = None
128 | default_boxes = None
129 | pred_masks = None
130 | proto_masks = None
131 | if "boxes" in preds:
132 | pred_boxes = preds["boxes"]
133 | if "scores" in preds:
134 | pred_scores = preds["scores"]
135 | if "default_boxes" in preds:
136 | default_boxes = preds["default_boxes"]
137 | if "mask_coefs" in preds:
138 | pred_masks = preds["mask_coefs"]
139 | if "proto_masks" in preds:
140 | proto_masks = preds["proto_masks"]
141 |
142 | batch_size = pred_boxes.size(0)
143 | num_prior_boxes = default_boxes.size(0)
144 | pred_scores = (
145 | preds["scores"]
146 | .view(batch_size, num_prior_boxes, self.num_classes)
147 | .transpose(2, 1)
148 | .contiguous()
149 | )
150 |
151 | # test_scores, test_index = torch.max(preds['scores'], dim=1)
152 |
153 | return_list = []
154 | # print(image_sizes)
155 | for i, image_size in enumerate(image_sizes):
156 | print(i, proto_masks.size())
157 | decoded_boxes = decode(pred_boxes[i], default_boxes)
158 | results = self._filter_overlaps(i, decoded_boxes, pred_masks, pred_scores)
159 | print(proto_masks[i].dtype)
160 | results["proto_masks"] = proto_masks[i]
161 |
162 | return_list.append(_convert_boxes_and_masks(results, image_size))
163 | # return_list.append(results)
164 |
165 | for result in return_list:
166 | scores = result["scores"].detach().cpu()
167 | sorted_index = range(len(scores))[: self.top_k]
168 | # sorted_index = scores.argsort(0, descending=True)[:5]
169 |
170 | boxes = result["boxes"][sorted_index]
171 | labels = result["labels"][sorted_index]
172 | scores = scores[sorted_index]
173 | masks = result["masks"][sorted_index]
174 |
175 | result["boxes"] = boxes
176 | result["scores"] = scores
177 | result["labels"] = labels
178 | result["masks"] = masks
179 |
180 | return return_list
181 |
182 | def _filter_overlaps(
183 | self,
184 | batch_index,
185 | decoded_boxes,
186 | pred_masks,
187 | pred_scores,
188 | ) -> Dict[str, Tensor]:
189 | scores = pred_scores[batch_index, 1:, :]
190 | max_scores, labels = torch.max(scores, dim=0)
191 |
192 | keep = max_scores > self.score_threshold # 0.05
193 | scores = scores[:, keep]
194 | boxes = decoded_boxes[keep, :]
195 | labels = labels[keep]
196 | masks = pred_masks[batch_index, keep, :]
197 |
198 | if scores.size(1) == 0:
199 | return None
200 |
201 | # print(max_scores[0], max_class[0])
202 | print(boxes.size(), scores.size(), keep.size(), labels.size())
203 | # boxes, masks, labels, scores = self.nms(boxes, scores, keep, iou_threshold=0.3)
204 |
205 | return_dict = defaultdict()
206 | for _class in range(scores.size(0)):
207 | _scores = scores[_class, :]
208 | indices = self.nms(boxes, _scores, labels, iou_threshold=0.3)
209 |
210 | return_dict["boxes"] = boxes[indices]
211 | return_dict["scores"] = _scores[indices]
212 | return_dict["mask_coefs"] = masks[indices]
213 | return_dict["labels"] = labels[indices]
214 |
215 | return dict(return_dict)
216 |
217 |
218 | def _convert_boxes_and_masks(preds, size):
219 | """
220 | Args:
221 | preds
222 | size (): (h, w)
223 |
224 | """
225 | h, w = size
226 | boxes = preds["boxes"]
227 | mask_coefs = preds["mask_coefs"]
228 | proto_masks = preds["proto_masks"]
229 |
230 | masks = proto_masks @ mask_coefs.t()
231 | masks = torch.sigmoid(masks)
232 |
233 | masks = crop(masks, boxes)
234 | masks = F.interpolate(
235 | masks.unsqueeze(0), (h, w), mode="bilinear", align_corners=False
236 | ).squeeze(0)
237 | masks.gt_(0.5) # Binarize the masks
238 |
239 | boxes[:, 0], boxes[:, 2] = sanitize_coordinates(
240 | boxes[:, 0], boxes[:, 2], w, cast=False
241 | )
242 | boxes[:, 1], boxes[:, 3] = sanitize_coordinates(
243 | boxes[:, 1], boxes[:, 3], h, cast=False
244 | )
245 | boxes = boxes.long()
246 |
247 | preds["boxes"] = boxes
248 | preds["masks"] = masks
249 |
250 | del preds["proto_masks"]
251 | del preds["mask_coefs"]
252 |
253 | return preds
254 |
--------------------------------------------------------------------------------
/boda/base_configuration.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import json
3 | import os
4 | import sys
5 | import time
6 | from typing import Tuple, List, Dict, Any, Union, Sequence
7 | from urllib.parse import urlparse
8 | from urllib.request import urlretrieve
9 |
10 | from .file_utils import DataEncoder
11 |
12 |
13 | class BaseConfig:
14 | """
15 | Class attributes:
16 | model_type (:obj:`str`):
17 | Args:
18 | name_or_path (:obj:`str`):
19 | """
20 |
21 | model_name: str = ""
22 | cache_dir = "cache"
23 |
24 | def __init__(self, **kwargs):
25 | self.use_torchscript = kwargs.pop("use_torchscript", False)
26 | # self.use_fp16 = kwargs.pop('use_fp16', False)
27 | self.label_map = kwargs.pop("label_map", {})
28 | self.num_classes = kwargs.pop("num_classes", 0)
29 | self.min_size = kwargs.pop("min_size", None)
30 | self.max_size = kwargs.pop("max_size", None)
31 | self.preserve_aspect_ratio = kwargs.pop("preserve_aspect_ratio", False)
32 | if not isinstance(self.max_size, Sequence):
33 | if not self.preserve_aspect_ratio:
34 | self.max_size = (self.max_size, self.max_size)
35 | else:
36 | self.max_size = (self.min_size, self.max_size)
37 |
38 | self.num_grids = kwargs.pop("num_grids", 0)
39 | self.top_k = kwargs.pop("top_k", 5)
40 | self.score_thresh = kwargs.pop("score_thresh", 0.15)
41 |
42 | # backbone
43 | self.backbone_name = kwargs.pop("backbone_name", "resnet101")
44 | self.backbone_structure = kwargs.pop("backbone_structure", None)
45 |
46 | # neck
47 | self.neck_name = kwargs.pop("neck_name", "fpn")
48 | self.selected_layers = kwargs.pop("selected_layers", [1, 2, 3])
49 | self.aspect_ratios = kwargs.pop("aspect_ratios", [1, 1 / 2, 2])
50 | self.scales = kwargs.pop("scales", [24, 48, 96, 192, 384])
51 | self.fpn_channels = kwargs.pop("fpn_channels", 256)
52 |
53 | # head
54 | self.anchors = kwargs.pop("anchors", None)
55 |
56 | for k, v in kwargs.items():
57 | print(k, v)
58 | try:
59 | setattr(k, v)
60 | except AttributeError as e:
61 | print(k, v, e)
62 |
63 | def __repr__(self):
64 | return f"{self.__class__.__name__} {self.to_dict()}"
65 |
66 | def to_json(self):
67 | config_dict = self.to_dict()
68 | return json.dumps(config_dict, indent=4, cls=DataEncoder)
69 |
70 | def to_dict(self):
71 | output = copy.deepcopy(self.__dict__)
72 | if hasattr(self.__class__, "model_name"):
73 | output["model_name"] = self.__class__.model_name
74 | return output
75 |
76 | def save_json(self, path: str):
77 | # if os.path.isfile(path):
78 | # raise AssertionError
79 |
80 | # os.makedirs(path, exist_ok=True)
81 | # config_file = os.path.join(path, CONFIG_NAME)
82 | with open(path, "w", encoding="utf-8") as writer:
83 | writer.write(self.to_json())
84 |
85 | def update(self, config_dict: Dict[str, Any]):
86 | for key, value in config_dict.items():
87 | setattr(self, key, value)
88 |
89 | @classmethod
90 | def from_pretrained(cls, name_or_path: str, **kwargs):
91 | config_dict = cls._get_config_dict(name_or_path)
92 | return cls(**config_dict)
93 |
94 | @classmethod
95 | def _dict_from_json_file(self, path):
96 | with open(path, "r", encoding="utf-8") as json_file:
97 | config_dict = json.load(json_file)
98 | return config_dict
99 |
100 | @classmethod
101 | def from_json(cls, json_file: str):
102 | config_dict = cls._dict_from_json_file(json_file)
103 | print(config_dict)
104 | return cls(**config_dict)
105 |
106 | @classmethod
107 | def _get_config_dict(cls, name_or_path, **kwargs):
108 | if os.path.isdir(name_or_path):
109 | # TODO: Thinking idea!!
110 | config_file = os.path.join(name_or_path, "config.json")
111 | elif os.path.isfile(name_or_path):
112 | config_file = name_or_path
113 | else:
114 | url = "https://unerue.synology.me/boda/models/"
115 | config_dir = os.path.join(cls.cache_dir, cls.model_name)
116 | config_file = os.path.join(config_dir, f"{name_or_path}.json")
117 | if not os.path.isfile(config_file):
118 | from urllib import request
119 |
120 | from .file_utils import reporthook
121 |
122 | if not os.path.isdir(config_dir):
123 | os.mkdir(config_dir)
124 |
125 | # file_name = f'{config_file}.json'
126 | # print(f'Downloading {name_or_path}.{extension}...', end=' ')
127 | request.urlretrieve(
128 | f"{url}{cls.model_name}/{name_or_path}.json",
129 | config_file,
130 | reporthook,
131 | )
132 | print()
133 |
134 | return cls._dict_from_json_file(config_file)
135 |
136 | # if not os.path.isfile(os.path.join(config_dir, f'{name_or_path}.pth')):
137 | # from urllib import request
138 | # from .models.yolact.configuration_yolact import yolact_pretrained_models
139 |
140 | # dd = urlparse(yolact_pretrained_models[name_or_path])
141 | # request.urlretrieve(
142 | # yolact_pretrained_models[name_or_path].replace('json', 'pth'),
143 | # 'cache/yolact/yolact-base.pth', reporthook)
144 |
145 | # if os.path.isdir(config_dir):
146 | # config_file = os.path.join(config_dir, f'{name_or_path}.json')
147 | # if os.path.isfile(config_file):
148 | # return cls._dict_from_json_file(config_file)
149 | # else:
150 | # config_file = urlparse()
151 | # else:
152 | # os.mkdir(config_dir)
153 | # return
154 |
155 | # config_dict = cls._dict_from_json_file(config_file)
156 |
157 | # return config_dict, kwargs
158 |
159 | # @classmethod
160 | # def from_json(cls, json_file: str):
161 | # with open(path, 'r') as json_file:
162 | # config_dict = json.loads(json_file)
163 | # config_dict = cls.dict_from_json_fiel(json_file)
164 | # return cls(**config_dict)
165 |
166 | # @classmethod
167 | # def from_pretrained(cls, pretrained_model_or_path: str, **kwargs):
168 | # raise NotImplementedError
169 |
170 | # @classmethod
171 | # def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs) -> Tuple[Dict[str, Any], Dict[str, Any]]:
172 | # cache_dir = kwargs.pop('cache_dir', None)
173 | # force_download = kwargs.pop('force_download', False)
174 | # resume_download = kwargs.pop('resume_download', False)
175 | # proxies = kwargs.pop("proxies", None)
176 | # local_files_only = kwargs.pop("local_files_only", False)
177 | # revision = kwargs.pop("revision", None)
178 |
179 | # if os.path.isdir(pretrained_model_name_or_path):
180 | # config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME)
181 | # elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
182 | # config_file = pretrained_model_name_or_path
183 | # else:
184 | # config_file = hf_bucket_url(
185 | # pretrained_model_name_or_path, filename=CONFIG_NAME, revision=revision, mirror=None
186 | # )
187 |
188 | # try:
189 | # # Load from URL or cache if already cached
190 | # resolved_config_file = cached_path(
191 | # config_file,
192 | # cache_dir=cache_dir,
193 | # force_download=force_download,
194 | # proxies=proxies,
195 | # resume_download=resume_download,
196 | # local_files_only=local_files_only,
197 | # )
198 | # # Load config dict
199 | # config_dict = cls._dict_from_json_file(resolved_config_file)
200 |
201 | # except EnvironmentError as err:
202 | # logger.error(err)
203 | # msg = (
204 | # f"Can't load config for '{pretrained_model_name_or_path}'. Make sure that:\n\n"
205 | # f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
206 | # f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a {CONFIG_NAME} file\n\n"
207 | # )
208 | # raise EnvironmentError(msg)
209 |
210 | # except json.JSONDecodeError:
211 | # msg = (
212 | # "Couldn't reach server at '{}' to download configuration file or "
213 | # "configuration file is not a valid JSON file. "
214 | # "Please check network or file content here: {}.".format(config_file, resolved_config_file)
215 | # )
216 | # raise EnvironmentError(msg)
217 |
218 | # if resolved_config_file == config_file:
219 | # logger.info("loading configuration file {}".format(config_file))
220 | # else:
221 | # logger.info("loading configuration file {} from cache at {}".format(config_file, resolved_config_file))
222 |
223 | # return config_dict, kwargs
224 |
--------------------------------------------------------------------------------
/boda/models/yolact/inference_yolact.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from typing import Tuple, List, Dict
3 |
4 | import torch
5 | import torch.nn.functional as F
6 | from torch import Tensor
7 | from torchvision.ops import batched_nms
8 |
9 |
10 | def decode(boxes: Tensor, default_boxes: Tensor, variances: List[float] = [0.1, 0.2]):
11 | """Decode locations from predictions using priors to undo
12 | the encoding we did for offset regression at train time.
13 |
14 | https://github.com/Hakuyume/chainer-ssd
15 |
16 | Args:
17 | loc (FloatTensor[N, 4]): location predictions for loc layers,
18 | Shape: [num_priors, 4]
19 | priors (tensor): Prior boxes in center-offset form.
20 | Shape: [num_priors, 4].
21 | variances: (`List[float]`) Variances of priorboxes
22 | Return:
23 | decoded bounding box predictions
24 | """
25 | boxes = torch.cat(
26 | (
27 | default_boxes[:, :2] + boxes[:, :2] * variances[0] * default_boxes[:, 2:],
28 | default_boxes[:, 2:] * torch.exp(boxes[:, 2:] * variances[1]),
29 | ),
30 | dim=1,
31 | )
32 | boxes[:, :2] -= boxes[:, 2:] / 2
33 | boxes[:, 2:] += boxes[:, :2]
34 |
35 | return boxes
36 |
37 |
38 | def sanitize_coordinates(
39 | _x1, _x2, img_size: int, padding: int = 0, cast: bool = True
40 | ) -> Tuple[Tensor, Tensor]:
41 | """
42 | Sanitizes the input coordinates so that x1 < x2, x1 != x2, x1 >= 0, and x2 <= image_size.
43 | Also converts from relative to absolute coordinates and casts the results to long tensors.
44 | If cast is false, the result won't be cast to longs.
45 | Warning: this does things in-place behind the scenes so copy if necessary.
46 | """
47 | _x1 = _x1 * img_size
48 | _x2 = _x2 * img_size
49 | if cast:
50 | _x1 = _x1.long()
51 | _x2 = _x2.long()
52 | x1 = torch.min(_x1, _x2)
53 | x2 = torch.max(_x1, _x2)
54 | x1 = torch.clamp(x1 - padding, min=0)
55 | x2 = torch.clamp(x2 + padding, max=img_size)
56 |
57 | return x1, x2
58 |
59 |
60 | def crop(masks, boxes, padding: int = 1) -> Tensor:
61 | """
62 | "Crop" predicted masks by zeroing out everything not in the predicted bbox.
63 | Vectorized by Chong (thanks Chong).
64 | Args:
65 | # TODO: torchvision mask rcnn masks UInt8Tensor[N, H, W]
66 | # TODO: torchvision boxes FloatTensor[N, 4]
67 | - masks should be a size [h, w, n] tensor of masks
68 | - boxes should be a size [n, 4] tensor of bbox coords in relative point form
69 | """
70 | h, w, n = masks.size()
71 | x1, x2 = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, padding, cast=False)
72 | y1, y2 = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, padding, cast=False)
73 |
74 | rows = (
75 | torch.arange(w, device=masks.device, dtype=x1.dtype)
76 | .view(1, -1, 1)
77 | .expand(h, w, n)
78 | )
79 | cols = (
80 | torch.arange(h, device=masks.device, dtype=x1.dtype)
81 | .view(-1, 1, 1)
82 | .expand(h, w, n)
83 | )
84 |
85 | masks_left = rows >= x1.view(1, 1, -1)
86 | masks_right = rows < x2.view(1, 1, -1)
87 | masks_up = cols >= y1.view(1, 1, -1)
88 | masks_down = cols < y2.view(1, 1, -1)
89 |
90 | crop_mask = masks_left * masks_right * masks_up * masks_down
91 |
92 | return masks * crop_mask.float()
93 |
94 |
95 | class PostprocessYolact:
96 | def __init__(
97 | self,
98 | num_classes: int = 80,
99 | top_k: int = 10,
100 | nms_threshold: float = 0.3,
101 | score_threshold: float = 0.2,
102 | ) -> None:
103 | """
104 | Args:
105 | num_classes (int)
106 | top_k
107 | nms_threshold
108 | score_threshold
109 | nms ()
110 | """
111 | self.config = None
112 | self.num_classes = num_classes + 1
113 | self.background_label = 0
114 | self.top_k = top_k
115 | self.nms_threshold = 0.5
116 | self.score_threshold = 0.2
117 |
118 | self.nms = batched_nms
119 | # if self.nms is None:
120 | # self.nms = fast_nms
121 |
122 | def __call__(
123 | self, preds: Dict[str, Tensor], image_sizes: List[Tuple[int]]
124 | ) -> List[Dict[str, Tensor]]:
125 | """
126 | preds (Dict[str, Tensor]):
127 | boxes (FloatTensor[B, N, 4])
128 | scores (FloatTensor[B, N, 81])
129 |
130 | mask_coefs (FloatTensor[B, N, 32])
131 | default_boxes (FloatTensor[N, 4])
132 | proto_masks (FloatTensor[1, 138, 138, 32])
133 | """
134 | pred_boxes = None
135 | pred_scores = None
136 | default_boxes = None
137 | pred_masks = None
138 | proto_masks = None
139 | if "boxes" in preds:
140 | pred_boxes = preds["boxes"]
141 | if "scores" in preds:
142 | pred_scores = preds["scores"]
143 | print("before", pred_scores.size())
144 | if "default_boxes" in preds:
145 | default_boxes = preds["default_boxes"]
146 | if "mask_coefs" in preds:
147 | pred_masks = preds["mask_coefs"]
148 | if "proto_masks" in preds:
149 | proto_masks = preds["proto_masks"]
150 |
151 | batch_size = pred_boxes.size(0)
152 | num_prior_boxes = default_boxes.size(0)
153 | # pred_scores = preds['scores'].view(batch_size, num_prior_boxes, self.num_classes).transpose(2, 1).contiguous()
154 |
155 | pred_scores = preds["scores"].view(
156 | batch_size, num_prior_boxes, self.num_classes
157 | )
158 | pred_scores = pred_scores.transpose(2, 1).contiguous()
159 | # test_scores, test_index = torch.max(preds['scores'], dim=1)
160 |
161 | return_list = []
162 | for i, image_size in enumerate(image_sizes):
163 | decoded_boxes = decode(pred_boxes[i], default_boxes)
164 | results = self._filter_overlaps(i, decoded_boxes, pred_masks, pred_scores)
165 | results["proto_masks"] = proto_masks[i]
166 |
167 | return_list.append(_convert_boxes_and_masks(results, image_size))
168 |
169 | for result in return_list:
170 | scores = result["scores"].detach()
171 | sorted_index = range(len(scores))[: self.top_k]
172 | # sorted_index = scores.argsort(0, descending=True)[:5]
173 |
174 | boxes = result["boxes"][sorted_index]
175 | labels = result["labels"][sorted_index]
176 | scores = scores[sorted_index]
177 | masks = result["masks"][sorted_index]
178 | print(masks[0].sum())
179 |
180 | result["boxes"] = boxes
181 | result["scores"] = scores
182 | result["labels"] = labels
183 | result["masks"] = masks
184 |
185 | return return_list
186 |
187 | def _filter_overlaps(
188 | self, batch_index, decoded_boxes, pred_masks, pred_scores
189 | ) -> Dict[str, Tensor]:
190 | """
191 | batch_index (int)
192 | decoded_boxes ()
193 | pred_masks (FloatTensor[B, N, 32])
194 | pred_scores ()
195 | """
196 | scores = pred_scores[batch_index, 1:, :]
197 | max_scores, labels = torch.max(scores, dim=0)
198 |
199 | keep = max_scores > self.score_threshold # 0.05
200 | scores = scores[:, keep]
201 | boxes = decoded_boxes[keep, :]
202 | labels = labels[keep]
203 | masks = pred_masks[batch_index, keep, :]
204 |
205 | if scores.size(1) == 0:
206 | return None
207 |
208 | return_dict = defaultdict()
209 | for _class in range(scores.size(0)):
210 | _scores = scores[_class, :]
211 | indices = self.nms(boxes, _scores, labels, iou_threshold=0.3)
212 |
213 | return_dict["boxes"] = boxes[indices]
214 | return_dict["scores"] = scores[indices]
215 | return_dict["mask_coefs"] = masks[indices]
216 | return_dict["labels"] = labels[indices]
217 |
218 | return dict(return_dict)
219 |
220 |
221 | def _convert_boxes_and_masks(preds, size):
222 | """
223 | Args:
224 | preds
225 | boxes (FloatTensor[N, 4])
226 | mask_coefs (FloatTensor[N, 32])
227 | proto_masks (FloatTensor[138, 138, 32])
228 | size (): (h, w)
229 |
230 | """
231 | h, w = size
232 | boxes = preds["boxes"]
233 | mask_coefs = preds["mask_coefs"]
234 | proto_masks = preds["proto_masks"]
235 | print(boxes.size(), mask_coefs.size(), proto_masks.size())
236 |
237 | # masks = proto_masks @ mask_coefs.t()
238 | masks = torch.matmul(proto_masks, mask_coefs.t())
239 | print(mask_coefs)
240 | masks = torch.sigmoid(masks)
241 | print(masks.size())
242 | print(masks[0].sum().long())
243 |
244 | masks = crop(masks, boxes)
245 |
246 | masks = masks.permute(2, 0, 1).contiguous()
247 | print(masks.size())
248 |
249 | masks = F.interpolate(
250 | masks.unsqueeze(0), (h, w), mode="bilinear", align_corners=False
251 | ).squeeze(0)
252 | masks.gt_(0.5) # Binarize the masks
253 | print(masks[0].sum())
254 | boxes[:, 0], boxes[:, 2] = sanitize_coordinates(
255 | boxes[:, 0], boxes[:, 2], w, cast=False
256 | )
257 | boxes[:, 1], boxes[:, 3] = sanitize_coordinates(
258 | boxes[:, 1], boxes[:, 3], h, cast=False
259 | )
260 | boxes = boxes.long()
261 |
262 | preds["boxes"] = boxes
263 | preds["masks"] = masks
264 |
265 | del preds["proto_masks"]
266 | del preds["mask_coefs"]
267 |
268 | return preds
269 |
--------------------------------------------------------------------------------
/boda/models/feature_extractor/mobilenetv2.py:
--------------------------------------------------------------------------------
1 | from typing import Callable, Any, Optional, List
2 |
3 | import torch
4 | from torch import Tensor
5 | from torch import nn
6 | from torch.hub import load_state_dict_from_url
7 |
8 |
9 | # __all__ = ['MobileNetV2', 'mobilenet_v2']
10 |
11 |
12 | model_urls = {
13 | "mobilenet_v2": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth",
14 | }
15 |
16 |
17 | def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int:
18 | """
19 | This function is taken from the original tf repo.
20 | It ensures that all layers have a channel number that is divisible by 8
21 | It can be seen here:
22 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
23 | """
24 | if min_value is None:
25 | min_value = divisor
26 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
27 | # Make sure that round down does not go down by more than 10%.
28 | if new_v < 0.9 * v:
29 | new_v += divisor
30 | return new_v
31 |
32 |
33 | class ConvBNActivation(nn.Sequential):
34 | def __init__(
35 | self,
36 | in_planes: int,
37 | out_planes: int,
38 | kernel_size: int = 3,
39 | stride: int = 1,
40 | groups: int = 1,
41 | norm_layer: Optional[Callable[..., nn.Module]] = None,
42 | activation_layer: Optional[Callable[..., nn.Module]] = None,
43 | dilation: int = 1,
44 | ) -> None:
45 | padding = (kernel_size - 1) // 2 * dilation
46 | if norm_layer is None:
47 | norm_layer = nn.BatchNorm2d
48 | if activation_layer is None:
49 | activation_layer = nn.ReLU6
50 | super(ConvBNReLU, self).__init__(
51 | nn.Conv2d(
52 | in_planes,
53 | out_planes,
54 | kernel_size,
55 | stride,
56 | padding,
57 | dilation=dilation,
58 | groups=groups,
59 | bias=False,
60 | ),
61 | norm_layer(out_planes),
62 | activation_layer(inplace=True),
63 | )
64 | self.out_channels = out_planes
65 |
66 |
67 | # necessary for backwards compatibility
68 | ConvBNReLU = ConvBNActivation
69 |
70 |
71 | class InvertedResidual(nn.Module):
72 | def __init__(
73 | self,
74 | inp: int,
75 | oup: int,
76 | stride: int,
77 | expand_ratio: int,
78 | norm_layer: Optional[Callable[..., nn.Module]] = None,
79 | ) -> None:
80 | super(InvertedResidual, self).__init__()
81 | self.stride = stride
82 | assert stride in [1, 2]
83 |
84 | if norm_layer is None:
85 | norm_layer = nn.BatchNorm2d
86 |
87 | hidden_dim = int(round(inp * expand_ratio))
88 | self.use_res_connect = self.stride == 1 and inp == oup
89 |
90 | layers: List[nn.Module] = []
91 | if expand_ratio != 1:
92 | # pw
93 | layers.append(
94 | ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer)
95 | )
96 | layers.extend(
97 | [
98 | # dw
99 | ConvBNReLU(
100 | hidden_dim,
101 | hidden_dim,
102 | stride=stride,
103 | groups=hidden_dim,
104 | norm_layer=norm_layer,
105 | ),
106 | # pw-linear
107 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
108 | norm_layer(oup),
109 | ]
110 | )
111 | self.conv = nn.Sequential(*layers)
112 | self.out_channels = oup
113 | self._is_cn = stride > 1
114 |
115 | def forward(self, x: Tensor) -> Tensor:
116 | if self.use_res_connect:
117 | return x + self.conv(x)
118 | else:
119 | return self.conv(x)
120 |
121 |
122 | class MobileNetV2(nn.Module):
123 | def __init__(
124 | self,
125 | num_classes: int = 1000,
126 | width_mult: float = 1.0,
127 | inverted_residual_setting: Optional[List[List[int]]] = None,
128 | round_nearest: int = 8,
129 | block: Optional[Callable[..., nn.Module]] = None,
130 | norm_layer: Optional[Callable[..., nn.Module]] = None,
131 | ) -> None:
132 | """
133 | MobileNet V2 main class
134 | Args:
135 | num_classes (int): Number of classes
136 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
137 | inverted_residual_setting: Network structure
138 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number
139 | Set to 1 to turn off rounding
140 | block: Module specifying inverted residual building block for mobilenet
141 | norm_layer: Module specifying the normalization layer to use
142 | """
143 | super(MobileNetV2, self).__init__()
144 |
145 | if block is None:
146 | block = InvertedResidual
147 |
148 | if norm_layer is None:
149 | norm_layer = nn.BatchNorm2d
150 |
151 | input_channel = 32
152 | last_channel = 1280
153 |
154 | if inverted_residual_setting is None:
155 | inverted_residual_setting = [
156 | # t, c, n, s
157 | [1, 16, 1, 1],
158 | [6, 24, 2, 2],
159 | [6, 32, 3, 2],
160 | [6, 64, 4, 2],
161 | [6, 96, 3, 1],
162 | [6, 160, 3, 2],
163 | [6, 320, 1, 1],
164 | ]
165 |
166 | # only check the first element, assuming user knows t,c,n,s are required
167 | if (
168 | len(inverted_residual_setting) == 0
169 | or len(inverted_residual_setting[0]) != 4
170 | ):
171 | raise ValueError(
172 | "inverted_residual_setting should be non-empty "
173 | "or a 4-element list, got {}".format(inverted_residual_setting)
174 | )
175 |
176 | # building first layer
177 | input_channel = _make_divisible(input_channel * width_mult, round_nearest)
178 | self.last_channel = _make_divisible(
179 | last_channel * max(1.0, width_mult), round_nearest
180 | )
181 | features: List[nn.Module] = [
182 | ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)
183 | ]
184 | # building inverted residual blocks
185 | for t, c, n, s in inverted_residual_setting:
186 | output_channel = _make_divisible(c * width_mult, round_nearest)
187 | for i in range(n):
188 | stride = s if i == 0 else 1
189 | features.append(
190 | block(
191 | input_channel,
192 | output_channel,
193 | stride,
194 | expand_ratio=t,
195 | norm_layer=norm_layer,
196 | )
197 | )
198 | input_channel = output_channel
199 | # building last several layers
200 | features.append(
201 | ConvBNReLU(
202 | input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer
203 | )
204 | )
205 | # make it nn.Sequential
206 | self.features = nn.Sequential(*features)
207 |
208 | # building classifier
209 | self.classifier = nn.Sequential(
210 | nn.Dropout(0.2),
211 | nn.Linear(self.last_channel, num_classes),
212 | )
213 |
214 | # weight initialization
215 | for m in self.modules():
216 | if isinstance(m, nn.Conv2d):
217 | nn.init.kaiming_normal_(m.weight, mode="fan_out")
218 | if m.bias is not None:
219 | nn.init.zeros_(m.bias)
220 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
221 | nn.init.ones_(m.weight)
222 | nn.init.zeros_(m.bias)
223 | elif isinstance(m, nn.Linear):
224 | nn.init.normal_(m.weight, 0, 0.01)
225 | nn.init.zeros_(m.bias)
226 |
227 | def _forward_impl(self, x: Tensor) -> Tensor:
228 | # This exists since TorchScript doesn't support inheritance, so the superclass method
229 | # (this one) needs to have a name other than `forward` that can be accessed in a subclass
230 | x = self.features(x)
231 | # Cannot use "squeeze" as batch-size can be 1
232 | x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
233 | x = torch.flatten(x, 1)
234 | x = self.classifier(x)
235 | return x
236 |
237 | def forward(self, x: Tensor) -> Tensor:
238 | return self._forward_impl(x)
239 |
240 |
241 | def mobilenet_v2(
242 | pretrained: bool = False, progress: bool = True, **kwargs: Any
243 | ) -> MobileNetV2:
244 | """
245 | Constructs a MobileNetV2 architecture from
246 | `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_.
247 | Args:
248 | pretrained (bool): If True, returns a model pre-trained on ImageNet
249 | progress (bool): If True, displays a progress bar of the download to stderr
250 | """
251 | model = MobileNetV2(**kwargs)
252 | if pretrained:
253 | state_dict = load_state_dict_from_url(
254 | model_urls["mobilenet_v2"], progress=progress
255 | )
256 | model.load_state_dict(state_dict)
257 | return model
258 |
--------------------------------------------------------------------------------
/boda/models/solov2/README.md:
--------------------------------------------------------------------------------
1 | # SOLO (Segmenting Objects by Locations)
2 |
3 | ```
4 | ██████╗ ██████╗ ██╗ ██████╗
5 | ██╔════╝ ██╔═══██╗██║ ██╔═══██╗
6 | ╚██████╗ ██║ ██║██║ ██║ ██║██╗ ██╗
7 | ╚════██╗██║ ██║██║ ██║ ██║ ██╗ ██╔╝
8 | ██████╔╝╚██████╔╝███████╗╚██████╔╝ ████╔╝
9 | ╚═════╝ ╚═════╝ ╚══════╝ ╚═════╝ ╚═══╝
10 | ```
11 |
12 | ## SOLO Architecture
13 |
14 | ```{bash}
15 | ==========================================================================================
16 | Layer (type:depth-idx) Output Shape Param #
17 | ==========================================================================================
18 | ├─ResNet: 1-1 [-1, 256, 334, 200] --
19 | | └─Conv2d: 2-1 [-1, 64, 667, 400] 9,408
20 | | └─BatchNorm2d: 2-2 [-1, 64, 667, 400] 128
21 | | └─ReLU: 2-3 [-1, 64, 667, 400] --
22 | | └─MaxPool2d: 2-4 [-1, 64, 334, 200] --
23 | | └─ModuleList: 2 [] --
24 | | | └─Sequential: 3-1 [-1, 256, 334, 200] 215,808
25 | | | └─Sequential: 3-2 [-1, 512, 167, 100] 1,219,584
26 | | | └─Sequential: 3-3 [-1, 1024, 84, 50] 7,098,368
27 | | | └─Sequential: 3-4 [-1, 2048, 42, 25] 14,964,736
28 | ├─Solov1PredictNeck: 1-2 [-1, 256, 334, 200] --
29 | | └─ModuleList: 2 [] --
30 | | | └─Conv2d: 3-5 [-1, 256, 42, 25] 524,544
31 | | | └─Conv2d: 3-6 [-1, 256, 84, 50] 262,400
32 | | | └─Conv2d: 3-7 [-1, 256, 167, 100] 131,328
33 | | | └─Conv2d: 3-8 [-1, 256, 334, 200] 65,792
34 | | └─ModuleList: 2 [] --
35 | | | └─Conv2d: 3-9 [-1, 256, 42, 25] 590,080
36 | | | └─Conv2d: 3-10 [-1, 256, 84, 50] 590,080
37 | | | └─Conv2d: 3-11 [-1, 256, 167, 100] 590,080
38 | | | └─Conv2d: 3-12 [-1, 256, 334, 200] 590,080
39 | | └─ModuleList: 2 [] --
40 | | | └─Conv2d: 3-13 [-1, 256, 21, 13] 590,080
41 | ├─Solov1PredictHead: 1-3 [2, 1600, 334, 200] --
42 | | └─ModuleList: 2 [] --
43 | | | └─Sequential: 3-14 [-1, 256, 167, 100] 595,200
44 | | | └─Sequential: 3-15 [-1, 256, 167, 100] 590,592
45 | | | └─Sequential: 3-16 [-1, 256, 167, 100] 590,592
46 | | | └─Sequential: 3-17 [-1, 256, 167, 100] 590,592
47 | | | └─Sequential: 3-18 [-1, 256, 167, 100] 590,592
48 | | | └─Sequential: 3-19 [-1, 256, 167, 100] 590,592
49 | | | └─Sequential: 3-20 [-1, 256, 167, 100] 590,592
50 | | └─ModuleList: 2 [] --
51 | | | └─Conv2d: 3-21 [-1, 1600, 334, 200] 411,200
52 | | └─ModuleList: 2 [] --
53 | | | └─Sequential: 3-22 [-1, 256, 40, 40] 590,592
54 | | | └─Sequential: 3-23 [-1, 256, 40, 40] 590,592
55 | | | └─Sequential: 3-24 [-1, 256, 40, 40] 590,592
56 | | | └─Sequential: 3-25 [-1, 256, 40, 40] 590,592
57 | | | └─Sequential: 3-26 [-1, 256, 40, 40] 590,592
58 | | | └─Sequential: 3-27 [-1, 256, 40, 40] 590,592
59 | | | └─Sequential: 3-28 [-1, 256, 40, 40] 590,592
60 | | └─Conv2d: 2-5 [-1, 79, 40, 40] 182,095
61 | | └─ModuleList: 2 [] --
62 | | | └─Sequential: 3-29 [-1, 256, 167, 100] (recursive)
63 | | | └─Sequential: 3-30 [-1, 256, 167, 100] (recursive)
64 | | | └─Sequential: 3-31 [-1, 256, 167, 100] (recursive)
65 | | | └─Sequential: 3-32 [-1, 256, 167, 100] (recursive)
66 | | | └─Sequential: 3-33 [-1, 256, 167, 100] (recursive)
67 | | | └─Sequential: 3-34 [-1, 256, 167, 100] (recursive)
68 | | | └─Sequential: 3-35 [-1, 256, 167, 100] (recursive)
69 | | └─ModuleList: 2 [] --
70 | | | └─Conv2d: 3-36 [-1, 1296, 334, 200] 333,072
71 | | └─ModuleList: 2 [] --
72 | | | └─Sequential: 3-37 [-1, 256, 36, 36] (recursive)
73 | | | └─Sequential: 3-38 [-1, 256, 36, 36] (recursive)
74 | | | └─Sequential: 3-39 [-1, 256, 36, 36] (recursive)
75 | | | └─Sequential: 3-40 [-1, 256, 36, 36] (recursive)
76 | | | └─Sequential: 3-41 [-1, 256, 36, 36] (recursive)
77 | | | └─Sequential: 3-42 [-1, 256, 36, 36] (recursive)
78 | | | └─Sequential: 3-43 [-1, 256, 36, 36] (recursive)
79 | | └─Conv2d: 2-6 [-1, 79, 36, 36] (recursive)
80 | | └─ModuleList: 2 [] --
81 | | | └─Sequential: 3-44 [-1, 256, 84, 50] (recursive)
82 | | | └─Sequential: 3-45 [-1, 256, 84, 50] (recursive)
83 | | | └─Sequential: 3-46 [-1, 256, 84, 50] (recursive)
84 | | | └─Sequential: 3-47 [-1, 256, 84, 50] (recursive)
85 | | | └─Sequential: 3-48 [-1, 256, 84, 50] (recursive)
86 | | | └─Sequential: 3-49 [-1, 256, 84, 50] (recursive)
87 | | | └─Sequential: 3-50 [-1, 256, 84, 50] (recursive)
88 | | └─ModuleList: 2 [] --
89 | | | └─Conv2d: 3-51 [-1, 576, 168, 100] 148,032
90 | | └─ModuleList: 2 [] --
91 | | | └─Sequential: 3-52 [-1, 256, 24, 24] (recursive)
92 | | | └─Sequential: 3-53 [-1, 256, 24, 24] (recursive)
93 | | | └─Sequential: 3-54 [-1, 256, 24, 24] (recursive)
94 | | | └─Sequential: 3-55 [-1, 256, 24, 24] (recursive)
95 | | | └─Sequential: 3-56 [-1, 256, 24, 24] (recursive)
96 | | | └─Sequential: 3-57 [-1, 256, 24, 24] (recursive)
97 | | | └─Sequential: 3-58 [-1, 256, 24, 24] (recursive)
98 | | └─Conv2d: 2-7 [-1, 79, 24, 24] (recursive)
99 | | └─ModuleList: 2 [] --
100 | | | └─Sequential: 3-59 [-1, 256, 42, 25] (recursive)
101 | | | └─Sequential: 3-60 [-1, 256, 42, 25] (recursive)
102 | | | └─Sequential: 3-61 [-1, 256, 42, 25] (recursive)
103 | | | └─Sequential: 3-62 [-1, 256, 42, 25] (recursive)
104 | | | └─Sequential: 3-63 [-1, 256, 42, 25] (recursive)
105 | | | └─Sequential: 3-64 [-1, 256, 42, 25] (recursive)
106 | | | └─Sequential: 3-65 [-1, 256, 42, 25] (recursive)
107 | | └─ModuleList: 2 [] --
108 | | | └─Conv2d: 3-66 [-1, 256, 84, 50] 65,792
109 | | └─ModuleList: 2 [] --
110 | | | └─Sequential: 3-67 [-1, 256, 16, 16] (recursive)
111 | | | └─Sequential: 3-68 [-1, 256, 16, 16] (recursive)
112 | | | └─Sequential: 3-69 [-1, 256, 16, 16] (recursive)
113 | | | └─Sequential: 3-70 [-1, 256, 16, 16] (recursive)
114 | | | └─Sequential: 3-71 [-1, 256, 16, 16] (recursive)
115 | | | └─Sequential: 3-72 [-1, 256, 16, 16] (recursive)
116 | | | └─Sequential: 3-73 [-1, 256, 16, 16] (recursive)
117 | | └─Conv2d: 2-8 [-1, 79, 16, 16] (recursive)
118 | | └─ModuleList: 2 [] --
119 | | | └─Sequential: 3-74 [-1, 256, 42, 25] (recursive)
120 | | | └─Sequential: 3-75 [-1, 256, 42, 25] (recursive)
121 | | | └─Sequential: 3-76 [-1, 256, 42, 25] (recursive)
122 | | | └─Sequential: 3-77 [-1, 256, 42, 25] (recursive)
123 | | | └─Sequential: 3-78 [-1, 256, 42, 25] (recursive)
124 | | | └─Sequential: 3-79 [-1, 256, 42, 25] (recursive)
125 | | | └─Sequential: 3-80 [-1, 256, 42, 25] (recursive)
126 | | └─ModuleList: 2 [] --
127 | | | └─Conv2d: 3-81 [-1, 144, 84, 50] 37,008
128 | | └─ModuleList: 2 [] --
129 | | | └─Sequential: 3-82 [-1, 256, 12, 12] (recursive)
130 | | | └─Sequential: 3-83 [-1, 256, 12, 12] (recursive)
131 | | | └─Sequential: 3-84 [-1, 256, 12, 12] (recursive)
132 | | | └─Sequential: 3-85 [-1, 256, 12, 12] (recursive)
133 | | | └─Sequential: 3-86 [-1, 256, 12, 12] (recursive)
134 | | | └─Sequential: 3-87 [-1, 256, 12, 12] (recursive)
135 | | | └─Sequential: 3-88 [-1, 256, 12, 12] (recursive)
136 | | └─Conv2d: 2-9 [-1, 79, 12, 12] (recursive)
137 | ==========================================================================================
138 | Total params: 36,892,591
139 | Trainable params: 36,892,591
140 | Non-trainable params: 0
141 | Total mult-adds (G): 296.58
142 | ==========================================================================================
143 | Input size (MB): 12.20
144 | Forward/backward pass size (MB): 2671.69
145 | Params size (MB): 140.73
146 | Estimated Total Size (MB): 2824.63
147 | ==========================================================================================
148 | ```
--------------------------------------------------------------------------------
/boda/models/feature_extractor/efficientnet.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import math
3 | from functools import partial
4 | from typing import Any, Callable, Optional, List
5 |
6 | import torch
7 | from torch import nn, Tensor
8 | from torchvision.ops import StochasticDepth
9 | from torchvision.ops.misc import ConvNormActivation, SqueezeExcitation
10 |
11 | # from torchvision._internally_replaced_utils import load_state_dict_from_url
12 |
13 |
14 | __all__ = [
15 | "EfficientNet",
16 | "efficientnet_b0",
17 | "efficientnet_b1",
18 | "efficientnet_b2",
19 | "efficientnet_b3",
20 | "efficientnet_b4",
21 | "efficientnet_b5",
22 | "efficientnet_b6",
23 | "efficientnet_b7",
24 | ]
25 |
26 |
27 | model_urls = {
28 | # Weights ported from https://github.com/rwightman/pytorch-image-models/
29 | "efficientnet_b0": "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth",
30 | "efficientnet_b1": "https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth",
31 | "efficientnet_b2": "https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth",
32 | "efficientnet_b3": "https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth",
33 | "efficientnet_b4": "https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth",
34 | # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
35 | "efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
36 | "efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
37 | "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
38 | }
39 |
40 |
41 | def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int:
42 | if min_value is None:
43 | min_value = divisor
44 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
45 | # Make sure that round down does not go down by more than 10%.
46 | if new_v < 0.9 * v:
47 | new_v += divisor
48 | return new_v
49 |
50 |
51 | class MBConvConfig:
52 | # Stores information listed at Table 1 of the EfficientNet paper
53 | def __init__(
54 | self,
55 | expand_ratio: float,
56 | kernel: int,
57 | stride: int,
58 | input_channels: int,
59 | out_channels: int,
60 | num_layers: int,
61 | width_mult: float,
62 | depth_mult: float,
63 | ) -> None:
64 | self.expand_ratio = expand_ratio
65 | self.kernel = kernel
66 | self.stride = stride
67 | self.input_channels = self.adjust_channels(input_channels, width_mult)
68 | self.out_channels = self.adjust_channels(out_channels, width_mult)
69 | self.num_layers = self.adjust_depth(num_layers, depth_mult)
70 |
71 | def __repr__(self) -> str:
72 | s = self.__class__.__name__ + "("
73 | s += "expand_ratio={expand_ratio}"
74 | s += ", kernel={kernel}"
75 | s += ", stride={stride}"
76 | s += ", input_channels={input_channels}"
77 | s += ", out_channels={out_channels}"
78 | s += ", num_layers={num_layers}"
79 | s += ")"
80 | return s.format(**self.__dict__)
81 |
82 | @staticmethod
83 | def adjust_channels(
84 | channels: int, width_mult: float, min_value: Optional[int] = None
85 | ) -> int:
86 | return _make_divisible(channels * width_mult, 8, min_value)
87 |
88 | @staticmethod
89 | def adjust_depth(num_layers: int, depth_mult: float):
90 | return int(math.ceil(num_layers * depth_mult))
91 |
92 |
93 | def _efficientnet_conf(width_mult: float, depth_mult: float) -> List[MBConvConfig]:
94 | bneck_conf = partial(MBConvConfig, width_mult=width_mult, depth_mult=depth_mult)
95 | inverted_residual_setting = [
96 | bneck_conf(1, 3, 1, 32, 16, 1),
97 | bneck_conf(6, 3, 2, 16, 24, 2),
98 | bneck_conf(6, 5, 2, 24, 40, 2),
99 | bneck_conf(6, 3, 2, 40, 80, 3),
100 | bneck_conf(6, 5, 1, 80, 112, 3),
101 | bneck_conf(6, 5, 2, 112, 192, 4),
102 | bneck_conf(6, 3, 1, 192, 320, 1),
103 | ]
104 |
105 | return inverted_residual_setting
106 |
107 |
108 | class MBConv(nn.Module):
109 | def __init__(
110 | self,
111 | cnf: MBConvConfig,
112 | stochastic_depth_prob: float,
113 | norm_layer: Callable[..., nn.Module],
114 | se_layer: Callable[..., nn.Module] = SqueezeExcitation,
115 | ) -> None:
116 | super().__init__()
117 |
118 | if not (1 <= cnf.stride <= 2):
119 | raise ValueError("illegal stride value")
120 |
121 | self.use_res_connect = (
122 | cnf.stride == 1 and cnf.input_channels == cnf.out_channels
123 | )
124 |
125 | layers: List[nn.Module] = []
126 | activation_layer = nn.SiLU
127 |
128 | # expand
129 | expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
130 | if expanded_channels != cnf.input_channels:
131 | layers.append(
132 | ConvNormActivation(
133 | cnf.input_channels,
134 | expanded_channels,
135 | kernel_size=1,
136 | norm_layer=norm_layer,
137 | activation_layer=activation_layer,
138 | )
139 | )
140 |
141 | # depthwise
142 | layers.append(
143 | ConvNormActivation(
144 | expanded_channels,
145 | expanded_channels,
146 | kernel_size=cnf.kernel,
147 | stride=cnf.stride,
148 | groups=expanded_channels,
149 | norm_layer=norm_layer,
150 | activation_layer=activation_layer,
151 | )
152 | )
153 |
154 | # squeeze and excitation
155 | squeeze_channels = max(1, cnf.input_channels // 4)
156 | layers.append(
157 | se_layer(
158 | expanded_channels,
159 | squeeze_channels,
160 | activation=partial(nn.SiLU, inplace=True),
161 | )
162 | )
163 |
164 | # project
165 | layers.append(
166 | ConvNormActivation(
167 | expanded_channels,
168 | cnf.out_channels,
169 | kernel_size=1,
170 | norm_layer=norm_layer,
171 | activation_layer=None,
172 | )
173 | )
174 |
175 | self.block = nn.Sequential(*layers)
176 | self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
177 | self.out_channels = cnf.out_channels
178 | self.stride = cnf.stride
179 |
180 | def forward(self, input: Tensor) -> Tensor:
181 | result = self.block(input)
182 | if self.use_res_connect:
183 | result = self.stochastic_depth(result)
184 | result += input
185 | return result
186 |
187 |
188 | class EfficientNet(nn.Module):
189 | def __init__(
190 | self,
191 | width_mult: float,
192 | depth_mult: float,
193 | stochastic_depth_prob: float = 0.2,
194 | block: Optional[Callable[..., nn.Module]] = None,
195 | norm_layer: Optional[Callable[..., nn.Module]] = None,
196 | **kwargs: Any,
197 | ) -> None:
198 | super().__init__()
199 | self.layers = nn.ModuleList()
200 | self.channels = []
201 |
202 | if block is None:
203 | block = MBConv
204 |
205 | if norm_layer is None:
206 | norm_layer = nn.BatchNorm2d
207 |
208 | self.inverted_residual_setting = _efficientnet_conf(
209 | width_mult=width_mult, depth_mult=depth_mult
210 | )
211 |
212 | # building first layer
213 | firstconv_output_channels = self.inverted_residual_setting[0].input_channels
214 | self.firstconv_layer = ConvNormActivation(
215 | 3,
216 | firstconv_output_channels,
217 | kernel_size=3,
218 | stride=2,
219 | norm_layer=norm_layer,
220 | activation_layer=nn.SiLU,
221 | )
222 |
223 | # building inverted residual blocks
224 | total_stage_blocks = sum(
225 | cnf.num_layers for cnf in self.inverted_residual_setting
226 | )
227 | stage_block_id = 0
228 | for cnf in self.inverted_residual_setting:
229 | stage: List[nn.Module] = []
230 | for _ in range(cnf.num_layers):
231 | # copy to avoid modifications. shallow copy is enough
232 | block_cnf = copy.copy(cnf)
233 |
234 | # overwrite info if not the first conv in the stage
235 | if stage:
236 | block_cnf.input_channels = block_cnf.out_channels
237 | block_cnf.stride = 1
238 |
239 | # adjust stochastic depth probability based on the depth of the stage block
240 | sd_prob = (
241 | stochastic_depth_prob * float(stage_block_id) / total_stage_blocks
242 | )
243 |
244 | stage.append(block(block_cnf, sd_prob, norm_layer))
245 | stage_block_id += 1
246 |
247 | # self.channels.append(block_cnf.out_channels)
248 | self.layers.extend(stage)
249 |
250 | for m in self.modules():
251 | if isinstance(m, nn.Conv2d):
252 | nn.init.kaiming_normal_(m.weight, mode="fan_out")
253 | if m.bias is not None:
254 | nn.init.zeros_(m.bias)
255 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
256 | nn.init.ones_(m.weight)
257 | nn.init.zeros_(m.bias)
258 |
259 | def forward(self, inputs: Tensor) -> Tensor:
260 | x = self.firstconv_layer(inputs)
261 |
262 | outputs = []
263 | last_x = None
264 | for i, layer in enumerate(self.layers):
265 | x = layer(x)
266 |
267 | if layer.stride == 2:
268 | outputs.append(last_x)
269 | elif i == len(self.layers) - 1:
270 | outputs.append(x)
271 | last_x = x
272 |
273 | del last_x
274 |
275 | return outputs[1:]
276 |
277 | def from_pretrained(self, path):
278 | state_dict = torch.load(path)
279 | # state_dict = load_state_dict_from_url(model_urls[arch], progress=True)
280 |
281 | try:
282 | excepted_keys = [
283 | key
284 | for key in list(state_dict)
285 | if key.startswith("features.8") or key.startswith("classifier")
286 | ]
287 | for excepted_key in excepted_keys:
288 | state_dict.pop(excepted_key)
289 | except KeyError:
290 | pass
291 |
292 | self.load_state_dict(state_dict, strict=False)
293 |
294 |
295 | def efficientnet_b0() -> EfficientNet:
296 | backbone = EfficientNet(width_mult=1.0, depth_mult=1.0)
297 | return backbone
298 |
299 |
300 | def efficientnet_b1() -> EfficientNet:
301 | backbone = EfficientNet(width_mult=1.0, depth_mult=1.1)
302 | return backbone
303 |
304 |
305 | def efficientnet_b2() -> EfficientNet:
306 | backbone = EfficientNet(width_mult=1.1, depth_mult=1.2)
307 | return backbone
308 |
309 |
310 | def efficientnet_b3() -> EfficientNet:
311 | backbone = EfficientNet(width_mult=1.2, depth_mult=1.4)
312 | return backbone
313 |
314 |
315 | def efficientnet_b4() -> EfficientNet:
316 | backbone = EfficientNet(width_mult=1.4, depth_mult=1.8)
317 | return backbone
318 |
319 |
320 | def efficientnet_b5() -> EfficientNet:
321 | backbone = EfficientNet(
322 | width_mult=1.6,
323 | depth_mult=2.2,
324 | norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
325 | )
326 | return backbone
327 |
328 |
329 | def efficientnet_b6() -> EfficientNet:
330 | backbone = EfficientNet(
331 | width_mult=1.8,
332 | depth_mult=2.6,
333 | norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
334 | )
335 | return backbone
336 |
337 |
338 | def efficientnet_b7() -> EfficientNet:
339 | backbone = EfficientNet(
340 | width_mult=2.0,
341 | depth_mult=3.1,
342 | norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
343 | )
344 | return backbone
345 |
--------------------------------------------------------------------------------
/boda/lib/torchsummary/torchsummary.py:
--------------------------------------------------------------------------------
1 | """ torchsummary.py """
2 | from typing import (
3 | Any,
4 | Dict,
5 | Iterable,
6 | Iterator,
7 | List,
8 | Mapping,
9 | Optional,
10 | Sequence,
11 | Tuple,
12 | Union,
13 | )
14 |
15 | import torch
16 | import torch.nn as nn
17 | from torch.utils.hooks import RemovableHandle
18 |
19 | from .formatting import FormattingOptions, Verbosity
20 | from .layer_info import LayerInfo
21 | from .model_statistics import CORRECTED_INPUT_SIZE_TYPE, HEADER_TITLES, ModelStatistics
22 |
23 | # Some modules do the computation themselves using parameters
24 | # or the parameters of children. Treat these as layers.
25 | LAYER_MODULES = (torch.nn.MultiheadAttention,)
26 | INPUT_SIZE_TYPE = Sequence[Union[int, Sequence[Any], torch.Size]]
27 | INPUT_DATA_TYPE = Optional[
28 | Union[torch.Tensor, torch.Size, Sequence[torch.Tensor], INPUT_SIZE_TYPE]
29 | ]
30 | DEFAULT_COLUMN_NAMES = ("output_size", "num_params")
31 |
32 |
33 | def summary(
34 | model: nn.Module,
35 | input_data: INPUT_DATA_TYPE = None,
36 | *args: Any,
37 | batch_dim: Optional[int] = 0,
38 | branching: bool = True,
39 | col_names: Optional[Iterable[str]] = None,
40 | col_width: int = 25,
41 | depth: int = 3,
42 | device: Optional[torch.device] = None,
43 | dtypes: Optional[List[torch.dtype]] = None,
44 | verbose: int = 1,
45 | **kwargs: Any,
46 | ) -> ModelStatistics:
47 | """
48 | Summarize the given PyTorch model. Summarized information includes:
49 | 1) Layer names,
50 | 2) input/output shapes,
51 | 3) kernel shape,
52 | 4) # of parameters,
53 | 5) # of operations (Mult-Adds)
54 |
55 | Args:
56 | model (nn.Module):
57 | PyTorch model to summarize
58 |
59 | input_data (Sequence of Sizes or Tensors):
60 | Example input tensor of the model (dtypes inferred from model input).
61 | - OR -
62 | Shape of input data as a List/Tuple/torch.Size
63 | (dtypes must match model input, default is FloatTensors).
64 | You should NOT include batch size in the tuple.
65 | - OR -
66 | If input_data is not provided, no forward pass through the network is
67 | performed, and the provided model information is limited to layer names.
68 | Default: None
69 |
70 | batch_dim (int):
71 | Batch_dimension of input data. If batch_dim is None, the input data
72 | is assumed to contain the batch dimension.
73 | WARNING: in a future version, the default will change to None.
74 | Default: 0
75 |
76 | branching (bool):
77 | Whether to use the branching layout for the printed output.
78 | Default: True
79 |
80 | col_names (Iterable[str]):
81 | Specify which columns to show in the output. Currently supported:
82 | ("input_size", "output_size", "num_params", "kernel_size", "mult_adds")
83 | If input_data is not provided, only "num_params" is used.
84 | Default: ("output_size", "num_params")
85 |
86 | col_width (int):
87 | Width of each column.
88 | Default: 25
89 |
90 | depth (int):
91 | Number of nested layers to traverse (e.g. Sequentials).
92 | Default: 3
93 |
94 | device (torch.Device):
95 | Uses this torch device for model and input_data.
96 | If not specified, uses result of torch.cuda.is_available().
97 | Default: None
98 |
99 | dtypes (List[torch.dtype]):
100 | For multiple inputs, specify the size of both inputs, and
101 | also specify the types of each parameter here.
102 | Default: None
103 |
104 | verbose (int):
105 | 0 (quiet): No output
106 | 1 (default): Print model summary
107 | 2 (verbose): Show weight and bias layers in full detail
108 | Default: 1
109 |
110 | *args, **kwargs:
111 | Other arguments used in `model.forward` function.
112 |
113 | Return:
114 | ModelStatistics object
115 | See torchsummary/model_statistics.py for more information.
116 | """
117 | if col_names is None:
118 | col_names = ("num_params",) if input_data is None else DEFAULT_COLUMN_NAMES
119 |
120 | validate_user_params(input_data, col_names, verbose)
121 | input_size: CORRECTED_INPUT_SIZE_TYPE = []
122 | summary_list: List[LayerInfo] = []
123 | hooks: Optional[List[RemovableHandle]] = None if input_data is None else []
124 | idx: Dict[int, int] = {}
125 | apply_hooks(model, model, batch_dim, depth, summary_list, idx, hooks)
126 |
127 | if input_data is not None:
128 | if device is None:
129 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
130 |
131 | x, input_size = process_input_data(input_data, batch_dim, device, dtypes)
132 | args, kwargs = set_device(args, device), set_device(kwargs, device)
133 | try:
134 | with torch.no_grad():
135 | _ = model.to(device)(*x, *args, **kwargs) # type: ignore[misc]
136 | except Exception as e:
137 | executed_layers = [layer for layer in summary_list if layer.executed]
138 | raise RuntimeError(
139 | "Failed to run torchsummary. See above stack traces for more details. "
140 | "Executed layers up to: {}".format(executed_layers)
141 | ) from e
142 | finally:
143 | if hooks is not None:
144 | for hook in hooks:
145 | hook.remove()
146 |
147 | formatting = FormattingOptions(branching, depth, verbose, col_names, col_width)
148 | formatting.set_layer_name_width(summary_list)
149 | results = ModelStatistics(summary_list, input_size, formatting)
150 | if verbose > Verbosity.QUIET.value:
151 | print(results)
152 | return results
153 |
154 |
155 | def validate_user_params(
156 | input_data: INPUT_DATA_TYPE, col_names: Iterable[str], verbose: int
157 | ) -> None:
158 | """Raise exceptions if the user's input is invalid."""
159 | if verbose not in (0, 1, 2):
160 | raise ValueError(
161 | "Verbose must be either 0 (quiet), 1 (default), or 2 (verbose)."
162 | )
163 |
164 | for col in col_names:
165 | if col not in HEADER_TITLES.keys():
166 | raise ValueError(f"Column {col} is not a valid column name.")
167 | if input_data is None and col not in ("num_params", "kernel_size"):
168 | raise ValueError(f"You must pass input_data in order to use column {col}")
169 |
170 |
171 | def set_device(data: Any, device: torch.device) -> Any:
172 | """Sets device for all input types and collections of input types."""
173 | if torch.is_tensor(data):
174 | return data.to(device, non_blocking=True)
175 |
176 | # Recursively apply to collection items
177 | elem_type = type(data)
178 | if isinstance(data, Mapping):
179 | return elem_type({k: set_device(v, device) for k, v in data.items()})
180 | if isinstance(data, tuple) and hasattr(data, "_fields"): # Named tuple
181 | return elem_type(*(set_device(d, device) for d in data))
182 | if isinstance(data, Iterable) and not isinstance(data, str):
183 | return elem_type([set_device(d, device) for d in data])
184 | # Data is neither a tensor nor a collection
185 | return data
186 |
187 |
188 | def process_input_data(
189 | input_data: INPUT_DATA_TYPE,
190 | batch_dim: Optional[int],
191 | device: torch.device,
192 | dtypes: Optional[List[torch.dtype]],
193 | ) -> Tuple[INPUT_DATA_TYPE, CORRECTED_INPUT_SIZE_TYPE]:
194 | """Create sample input data and the corrected input size."""
195 | if isinstance(input_data, torch.Tensor):
196 | input_size = get_correct_input_sizes(input_data.size())
197 | x = [input_data.to(device)]
198 |
199 | elif isinstance(input_data, (list, tuple)):
200 | if all(isinstance(data, torch.Tensor) for data in input_data):
201 | input_sizes = [
202 | data.size() for data in input_data # type: ignore[union-attr]
203 | ]
204 | input_size = get_correct_input_sizes(input_sizes)
205 | x = set_device(input_data, device)
206 | else:
207 | if dtypes is None:
208 | dtypes = [torch.float] * len(input_data)
209 | input_size = get_correct_input_sizes(input_data)
210 | x = get_input_tensor(input_size, batch_dim, dtypes, device)
211 |
212 | else:
213 | raise TypeError(
214 | "Input type is not recognized. Please ensure input_data is valid.\n"
215 | "For multiple inputs to the network, ensure input_data passed in is "
216 | "a sequence of tensors or a list of tuple sizes. If you are having "
217 | "trouble here, please submit a GitHub issue."
218 | )
219 |
220 | return x, input_size
221 |
222 |
223 | def get_input_tensor(
224 | input_size: CORRECTED_INPUT_SIZE_TYPE,
225 | batch_dim: Optional[int],
226 | dtypes: List[torch.dtype],
227 | device: torch.device,
228 | ) -> List[torch.Tensor]:
229 | """Get input_tensor with batch size 2 for use in model.forward()"""
230 | x = []
231 | for size, dtype in zip(input_size, dtypes):
232 | # add batch_size of 2 for BatchNorm
233 | input_tensor = torch.rand(*size)
234 | if batch_dim is not None:
235 | input_tensor = input_tensor.unsqueeze(dim=batch_dim)
236 | input_tensor = torch.cat([input_tensor] * 2, dim=batch_dim)
237 | x.append(input_tensor.to(device).type(dtype))
238 | return x
239 |
240 |
241 | def get_correct_input_sizes(input_size: INPUT_SIZE_TYPE) -> CORRECTED_INPUT_SIZE_TYPE:
242 | """
243 | Convert input_size to the correct form, which is a list of tuples.
244 | Also handles multiple inputs to the network.
245 | """
246 |
247 | def flatten(nested_array: INPUT_SIZE_TYPE) -> Iterator[Any]:
248 | """Flattens a nested array."""
249 | for item in nested_array:
250 | if isinstance(item, (list, tuple)):
251 | yield from flatten(item)
252 | else:
253 | yield item
254 |
255 | if not input_size or any(size <= 0 for size in flatten(input_size)):
256 | raise ValueError("Input_data is invalid, or negative size found in input_data.")
257 |
258 | if isinstance(input_size, list) and isinstance(input_size[0], int):
259 | return [tuple(input_size)]
260 | if isinstance(input_size, list):
261 | return input_size
262 | if isinstance(input_size, tuple) and isinstance(input_size[0], tuple):
263 | return list(input_size)
264 | return [input_size]
265 |
266 |
267 | def apply_hooks(
268 | module: nn.Module,
269 | orig_model: nn.Module,
270 | batch_dim: Optional[int],
271 | depth: int,
272 | summary_list: List[LayerInfo],
273 | idx: Dict[int, int],
274 | hooks: Optional[List[RemovableHandle]],
275 | curr_depth: int = 0,
276 | parent_info: Optional[LayerInfo] = None,
277 | ) -> None:
278 | """
279 | If input_data is provided, recursively adds hooks to all layers of the model.
280 | Else, fills summary_list with layer info without computing a
281 | forward pass through the network.
282 | """
283 | # Fallback is used if the layer's hook is never called, in ModuleLists, for example.
284 | info = LayerInfo(module, curr_depth, None, parent_info)
285 |
286 | def pre_hook(module: nn.Module, inputs: Any) -> None:
287 | """Create a LayerInfo object to aggregate information about that layer."""
288 | del inputs
289 | nonlocal info
290 | idx[curr_depth] = idx.get(curr_depth, 0) + 1
291 | info = LayerInfo(module, curr_depth, idx[curr_depth], parent_info)
292 | info.check_recursive(summary_list)
293 | summary_list.append(info)
294 |
295 | def hook(module: nn.Module, inputs: Any, outputs: Any) -> None:
296 | """Update LayerInfo after forward pass."""
297 | del module
298 | info.input_size = info.calculate_size(inputs, batch_dim)
299 | info.output_size = info.calculate_size(outputs, batch_dim)
300 | info.calculate_macs()
301 | info.executed = True
302 |
303 | submodules = [m for m in module.modules() if m is not orig_model]
304 | if module != orig_model or isinstance(module, LAYER_MODULES) or not submodules:
305 | if hooks is None:
306 | pre_hook(module, None)
307 | else:
308 | hooks.append(module.register_forward_pre_hook(pre_hook))
309 | hooks.append(module.register_forward_hook(hook))
310 |
311 | if curr_depth <= depth:
312 | for child in module.children():
313 | apply_hooks(
314 | child,
315 | orig_model,
316 | batch_dim,
317 | depth,
318 | summary_list,
319 | idx,
320 | hooks,
321 | curr_depth + 1,
322 | info,
323 | )
324 |
--------------------------------------------------------------------------------