├── mypy.ini
├── CHANGELOG.md
├── pytest.ini
├── boda
    ├── lib
    │   ├── __init__.py
    │   ├── torchinfo
    │   │   ├── __init__.py
    │   │   ├── formatting.py
    │   │   ├── layer_info.py
    │   │   └── model_statistics.py
    │   └── torchsummary
    │   │   ├── __init__.py
    │   │   ├── formatting.py
    │   │   ├── layer_info.py
    │   │   ├── model_statistics.py
    │   │   └── torchsummary.py
    ├── ops
    │   ├── __init__.py
    │   └── anchor_generators.py
    ├── models
    │   ├── centermask
    │   │   └── __init__.py
    │   ├── feature_extractor
    │   │   ├── __init__.py
    │   │   ├── vggnet.py
    │   │   ├── pafpn.py
    │   │   ├── fpn.py
    │   │   ├── resnet.py
    │   │   ├── mobilenetv2.py
    │   │   └── efficientnet.py
    │   ├── __init__.py
    │   ├── yolox
    │   │   ├── __init__.py
    │   │   ├── configuration_yolox.py
    │   │   ├── loss_yolox.py
    │   │   └── utils.py
    │   ├── ssd
    │   │   ├── __init__.py
    │   │   ├── configuration_ssd.py
    │   │   ├── README.md
    │   │   ├── inference_ssd.py
    │   │   └── loss_ssd.py
    │   ├── solov2
    │   │   ├── __init__.py
    │   │   ├── configuration_solov1.py
    │   │   ├── architecture_decoupled_solov1.py
    │   │   ├── inference_solov1.py
    │   │   └── README.md
    │   └── yolact
    │   │   ├── __init__.py
    │   │   ├── configuration_yolact.py
    │   │   ├── README.md
    │   │   └── inference_yolact.py
    ├── __init__.py
    ├── setup.py
    ├── README.md
    ├── file_utils.py
    ├── custom_activation.py
    ├── custom_modules.py
    ├── postprocessing.py
    └── base_configuration.py
├── benchmarks
    └── benchmark_yolact.py
├── .flake8
├── boda.png
├── docs
    ├── requirements.txt
    ├── source
    │   ├── index.rst
    │   └── conf.py
    ├── Makefile
    └── make.bat
├── environment.yml
├── run_test_ssd.py
├── CONTRIBUTING.md
├── .gitignore
├── calc_flops.py
├── setup.py
├── README.md
└── run_test.py


/mypy.ini:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boda/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boda/ops/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/benchmarks/benchmark_yolact.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boda/models/centermask/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 100
3 | 


--------------------------------------------------------------------------------
/boda.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unerue/boda/HEAD/boda.png


--------------------------------------------------------------------------------
/boda/models/feature_extractor/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import FeaturePyramidNetworks
2 | from .vggnet import *
3 | from .resnet import *
4 | 


--------------------------------------------------------------------------------
/boda/models/__init__.py:
--------------------------------------------------------------------------------
1 | # from .feature_extractor import *
2 | from .ssd import *
3 | from .yolact import *
4 | 
5 | # from .yolox import *
6 | 


--------------------------------------------------------------------------------
/boda/lib/torchinfo/__init__.py:
--------------------------------------------------------------------------------
1 | """ torchinfo """
2 | from .model_statistics import ModelStatistics
3 | from .torchinfo import summary
4 | 
5 | __all__ = ("ModelStatistics", "summary")
6 | 


--------------------------------------------------------------------------------
/boda/lib/torchsummary/__init__.py:
--------------------------------------------------------------------------------
1 | """ torchsummary """
2 | from .model_statistics import ModelStatistics
3 | from .torchsummary import summary
4 | 
5 | __all__ = ("ModelStatistics", "summary")
6 | 


--------------------------------------------------------------------------------
/boda/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import *
2 | 
3 | 
4 | # __all__ = [
5 | #     'SsdConfig', 'SsdModel', 'SsdLoss',
6 | #     'YolactConfig', 'YolactModel', 'YolactLoss',
7 | #     'Solov1Config', 'Solov1Model',
8 | # ]
9 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | numpy
3 | sphinx-copybutton>=0.3.1
4 | sphinx-gallery>=0.9.0
5 | sphinx==3.5.4
6 | tabulate
7 | Jinja2<3.1.*
8 | -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme


--------------------------------------------------------------------------------
/boda/models/yolox/__init__.py:
--------------------------------------------------------------------------------
1 | # from .configuration_yolox import YoloXConfig
2 | # from .architecture_yolox import YoloXModel
3 | # # from .loss_yolox import Yo
4 | 
5 | 
6 | # __all__ = [
7 | #     'YoloXConfig', 'YoloXModel',
8 | # ]
9 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: boda
 2 | channels:
 3 |   - conda-forge
 4 |   - pytorch
 5 |   - anaconda
 6 | dependencies:
 7 |   - python=3.7
 8 |   - pytorch=1.7
 9 |   - torchvision=0.8
10 |   - cudatoolkit=10.2
11 |   - numpy
12 |   - cython
13 |   # - pip:
14 |     # - pycocotools
15 |     # - opencv-python


--------------------------------------------------------------------------------
/boda/models/ssd/__init__.py:
--------------------------------------------------------------------------------
 1 | from .architecture_ssd import SsdPredictNeck, SsdPredictHead, SsdModel
 2 | from .configuration_ssd import SsdConfig
 3 | 
 4 | # from .loss_ssd import SsdLoss
 5 | 
 6 | 
 7 | __all__ = [
 8 |     "SsdConfig",
 9 |     "SsdPredictNeck",
10 |     "SsdPredictHead",
11 |     "SsdModel",
12 | ]
13 | 


--------------------------------------------------------------------------------
/run_test_ssd.py:
--------------------------------------------------------------------------------
 1 | from boda.models import SsdModel, SsdConfig
 2 | 
 3 | 
 4 | from boda.lib.torchsummary import summary
 5 | import torch
 6 | 
 7 | config = SsdConfig(num_classes=80)
 8 | model = SsdModel(config).to('cuda')
 9 | model.eval()
10 | print(model)
11 | # print(summary(model, input_size=(16, 3, 550, 550), verbose=0))
12 | print(summary(model, input_data=(3, 550, 550), verbose=0))


--------------------------------------------------------------------------------
/boda/models/solov2/__init__.py:
--------------------------------------------------------------------------------
 1 | # from .configuration_solov1 import Solov1Config
 2 | # from .architecture_solov1 import Solov1PredictNeck, Solov1PredictHead, Solov1Model
 3 | # # from .architecture_decoupled_solov1 import DecoupledSolov1Model
 4 | # from .loss_solov1 import Solov1Loss
 5 | 
 6 | 
 7 | # __all__ = [
 8 | #     'Solov1Loss', 'Solov1Config', 'Solov1PredictNeck',
 9 | #     'Solov1PredictHead', 'Solov1Model', 'Solov1Loss'
10 | # ]
11 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to boda
 2 | ---
 3 | 
 4 | ## Code formatting and typing
 5 | 
 6 | ### Formatting
 7 | 
 8 | To format your code, install `ufmt`
 9 | 
10 | ```bash
11 | pip install ufmt==1.3.2 black==21.9b0 usort==0.6.4
12 | ```
13 | 
14 | ```bash
15 | ufmt format boda
16 | ```
17 | 
18 | ### Type annotations
19 | 
20 | ```bash
21 | mypy --config-file mypy.ini
22 | ```
23 | 
24 | ## Unit tests
25 | 
26 | ```bash
27 | pytest test -vvv
28 | ```
29 | 
30 | ## Documentation
31 | ```bash
32 | cd docs
33 | make html-noplot
34 | ```


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. boda documentation master file, created by
 2 |    sphinx-quickstart on Mon Jul 11 19:55:45 2022.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to boda's documentation!
 7 | ================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 | 
14 | 
15 | Indices and tables
16 | ==================
17 | 
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/boda/setup.py:
--------------------------------------------------------------------------------
 1 | # from Cython.Build import cythonize
 2 | # from numpy.distutils.misc_util import Configuration
 3 | 
 4 | 
 5 | # def cythonize_extensions(top_path, config):
 6 | #     config.ext_modules = cythonize(
 7 | #         config.ext_modules,
 8 | #         compiler_directives={'language_level': '3'})
 9 | 
10 | 
11 | # def configuration(parent_package='', top_path=None):
12 | #     config = Configuration('boda', parent_package, top_path)
13 | #     config.add_subpackage('models')
14 | #     config.add_subpackage('utils')
15 | #     config.add_subpackage('lib')
16 | #     cythonize_extensions(top_path, config)
17 | 
18 | #     return config
19 | 
20 | 
21 | # if __name__ == '__main__':
22 | #     from numpy.distutils.core import setup
23 | 
24 | #     setup(**configuration(top_path='').todict())
25 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .ipynb_checkpoints
 2 | .vscode
 3 | .DS_Store
 4 | __pycache__
 5 | boda.egg-info
 6 | build
 7 | dist
 8 | tests
 9 | misc
10 | doc
11 | boda/models/cascade_mask_rcnn/
12 | boda/models/efficientdet/
13 | boda/models/faster_rcnn/
14 | boda/models/fcos/
15 | boda/models/keypoint_rcnn/
16 | boda/models/mask_rcnn/
17 | boda/models/polarmask/
18 | # boda/models/solov1/
19 | boda/models/yolact_edge/
20 | boda/models/yolov4/
21 | 
22 | # boda/ops/
23 | 
24 | benchmarks/data/
25 | benchmarks/*pth
26 | benchmarks/samples/
27 | benchmarks/dataset/
28 | benchmarks/benchmark_yolov1.py
29 | benchmarks/benchmark_backbone.py
30 | 
31 | boda/dev/
32 | dev/
33 | old/
34 | cache/
35 | 
36 | run.py
37 | test_ssd.py
38 | test_yolov1.py
39 | test_yolact.py
40 | test_solov1.py
41 | test_fcos.py
42 | test_backbone.py
43 | test_centermask.py
44 | eval_yolact.py
45 | test_faster_rcnn.py
46 | test_mask_rcnn.py
47 | test_keypoint_rcnn.py
48 | 
49 | logo.pptx
50 | *.pth
51 | *.zip
52 | *.jpg


--------------------------------------------------------------------------------
/boda/models/yolox/configuration_yolox.py:
--------------------------------------------------------------------------------
 1 | # from typing import List
 2 | 
 3 | # from ...base_configuration import BaseConfig
 4 | 
 5 | 
 6 | # class YoloXConfig(BaseConfig):
 7 | #     model_name = 'yolox'
 8 | 
 9 | #     def __init__(
10 | #         self,
11 | #         num_classes: int = 80,
12 | #         image_size: int = 640,
13 | #         depth: float = 1.0,
14 | #         width: float = 1.0,
15 | #         act: str = 'silu',
16 | #         selected_backbone_layers: List[int] = [2, 3, 4],
17 | #         depthwise: bool = False,
18 | #         test_conf: float = 0.01,
19 | #         nmsthre: float = 0.65,
20 | #     ):
21 | #         super().__init__(
22 | #             num_classes=num_classes,
23 | #             max_size=image_size,
24 | #         )
25 | #         self.depth = depth
26 | #         self.width = width
27 | #         self.act = act
28 | 
29 | #         self.selected_backbone_layers = selected_backbone_layers
30 | 
31 | #         self.depthwise = depthwise
32 | 
33 | #         self.test_conf = test_conf
34 | #         self.nmsthre = nmsthre
35 | 


--------------------------------------------------------------------------------
/boda/README.md:
--------------------------------------------------------------------------------
 1 | # Models
 2 | 
 3 | ## Library Structure
 4 | ```{bash}
 5 | .
 6 | +-- models
 7 | |   +-- model
 8 | |   |   +-- configuration_model.py
 9 | |   |   +-- architecture_model.py
10 | |   |   +-- loss_model.py
11 | |   |   +-- inference_model.py
12 | |   |   +-- README.md
13 | |   +-- backbone.py
14 | |   +-- neck.py
15 | +-- utils
16 | |   +-- box.py
17 | |   +-- mask.py
18 | |   +-- nms.py
19 | +-- lib
20 | |   +-- torchsummary
21 | +-- base_architecture.py
22 | +-- base_configuration.py
23 | +-- modules.py
24 | +-- activation.py
25 | +-- setup.py
26 | ```
27 | 
28 | ## Abstract Structure
29 | 
30 | ```{python}
31 | class Backbone(nn.Module):
32 |     def __init__(self):
33 |         super().__init__()
34 | 
35 |     def forward(self):
36 |         return
37 | 
38 | 
39 | class Neck(nn.Module):
40 |     def __init__(self):
41 |         super().__init__()
42 |     
43 |     def _make_layer(self):
44 | 
45 |     def forward(self):
46 |         return
47 | 
48 | 
49 | class Head(nn.Module):
50 | 
51 | 
52 | class Pretrained:
53 |     
54 | 
55 | class Model()
56 | ```


--------------------------------------------------------------------------------
/calc_flops.py:
--------------------------------------------------------------------------------
 1 | from boda.models import YolactConfig, YolactModel
 2 | # from boda.lib.torchsummary import summary
 3 | from torchinfo import summary
 4 | from boda.models.backbone_mobilenetv3 import mobilenet_v3_large, mobilenet_v3_small
 5 | from boda.models.backbone_resnet import resnet101, resnet18, resnet34, resnet50
 6 | # from torchvision.models import resnet50, mobilenet_v3_large, resnet101
 7 | 
 8 | 
 9 | config = YolactConfig(num_classes=90)
10 | # model = YolactModel(config, backbone=mobilenet_v3_small(), selected_backbone_layers=[3, 8, 11]).to('cuda:0')
11 | # model = YolactModel(config, backbone=mobilenet_v3_large(), selected_backbone_layers=[6, 12, 15]).to('cuda:0')
12 | # model = YolactModel(config, backbone=resnet50(), selected_backbone_layers=[1, 2, 3]).to('cuda:0')
13 | # print(summary(model, (1, 3, 550, 550), verbose=0))
14 | 
15 | # from boda.resnet import resnet101
16 | model = mobilenet_v3_small().to('cuda')
17 | # model = mobilenet_v3_large().to('cuda')
18 | # print(summary(model, input_data=(3, 550, 550), depth=2, verbose=0))
19 | 
20 | # model = resnet101().to('cuda')
21 | # model = resnet50().to('cuda')
22 | print(summary(model, (1, 3, 224, 224), depth=3, verbose=0))
23 | 


--------------------------------------------------------------------------------
/boda/models/ssd/configuration_ssd.py:
--------------------------------------------------------------------------------
 1 | from ...base_configuration import BaseConfig
 2 | 
 3 | 
 4 | SSD_PRETRAINED_CONFIG = {
 5 |     "ssd300": None,
 6 |     "ssd512": None,
 7 | }
 8 | 
 9 | 
10 | class SsdConfig(BaseConfig):
11 |     """Configuration for SSD
12 | 
13 |     Arguments:
14 |         max_size ():
15 | 
16 |     """
17 | 
18 |     def __init__(
19 |         self,
20 |         num_classes: int = 20,
21 |         max_size: int = 300,
22 |         preserve_aspect_ratio: bool = False,
23 |         selected_layers: int = -1,
24 |         num_grids: int = 7,
25 |         **kwargs
26 |     ) -> None:
27 |         super().__init__(max_size=max_size, **kwargs)
28 |         self.selected_layers = [3, 4]
29 |         self.boxes = [4, 6, 6, 6, 4, 4]
30 |         self.num_classes = num_classes
31 |         self.backbone_name = "vgg16"
32 |         self.aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
33 |         self.variance = [0.1, 0.2]
34 |         self.min_sizes = [30, 60, 111, 162, 213, 264]
35 |         self.max_sizes = [60, 111, 162, 213, 264, 315]
36 |         self.steps = [8, 16, 32, 64, 100, 300]
37 |         self.clip = True
38 |         # self.grid_sizes = [38, 19, 10, 5, 3, 1]
39 | 


--------------------------------------------------------------------------------
/boda/models/yolox/loss_yolox.py:
--------------------------------------------------------------------------------
 1 | # import torch
 2 | # from torch import nn
 3 | 
 4 | 
 5 | # class IOUloss(nn.Module):
 6 | #     def __init__(self, reduction="none", loss_type="iou"):
 7 | #         super(IOUloss, self).__init__()
 8 | #         self.reduction = reduction
 9 | #         self.loss_type = loss_type
10 | 
11 | #     def forward(self, pred, target):
12 | #         assert pred.shape[0] == target.shape[0]
13 | 
14 | #         pred = pred.view(-1, 4)
15 | #         target = target.view(-1, 4)
16 | #         tl = torch.max(
17 | #             (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
18 | #         )
19 | #         br = torch.min(
20 | #             (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
21 | #         )
22 | 
23 | #         area_p = torch.prod(pred[:, 2:], 1)
24 | #         area_g = torch.prod(target[:, 2:], 1)
25 | 
26 | #         en = (tl < br).type(tl.type()).prod(dim=1)
27 | #         area_i = torch.prod(br - tl, 1) * en
28 | #         area_u = area_p + area_g - area_i
29 | #         iou = (area_i) / (area_u + 1e-16)
30 | 
31 | #         if self.loss_type == "iou":
32 | #             loss = 1 - iou ** 2
33 | #         elif self.loss_type == "giou":
34 | #             c_tl = torch.min(
35 | #                 (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
36 | #             )
37 | #             c_br = torch.max(
38 | #                 (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
39 | #             )
40 | #             area_c = torch.prod(c_br - c_tl, 1)
41 | #             giou = iou - (area_c - area_u) / area_c.clamp(1e-16)
42 | #             loss = 1 - giou.clamp(min=-1.0, max=1.0)
43 | 
44 | #         if self.reduction == "mean":
45 | #             loss = loss.mean()
46 | #         elif self.reduction == "sum":
47 | #             loss = loss.sum()
48 | 
49 | #         return loss
50 | 


--------------------------------------------------------------------------------
/boda/lib/torchinfo/formatting.py:
--------------------------------------------------------------------------------
 1 | """ formatting.py """
 2 | import math
 3 | from enum import Enum, unique
 4 | from typing import Dict, Iterable, List
 5 | 
 6 | from .layer_info import LayerInfo
 7 | 
 8 | 
 9 | @unique
10 | class Verbosity(Enum):
11 |     """Contains verbosity levels."""
12 | 
13 |     QUIET, DEFAULT, VERBOSE = 0, 1, 2
14 | 
15 | 
16 | class FormattingOptions:
17 |     """Class that holds information about formatting the table output."""
18 | 
19 |     def __init__(
20 |         self,
21 |         max_depth: int,
22 |         verbose: int,
23 |         col_names: Iterable[str],
24 |         col_width: int,
25 |     ):
26 |         self.max_depth = max_depth
27 |         self.verbose = verbose
28 |         self.col_names = col_names
29 |         self.col_width = col_width
30 |         self.layer_name_width = 40
31 | 
32 |     def set_layer_name_width(
33 |         self, summary_list: List[LayerInfo], align_val: int = 5
34 |     ) -> None:
35 |         """
36 |         Set layer name width by taking the longest line length and rounding up to
37 |         the nearest multiple of align_val.
38 |         """
39 |         max_length = 0
40 |         for info in summary_list:
41 |             depth_indent = info.depth * align_val + 1
42 |             max_length = max(max_length, len(str(info)) + depth_indent)
43 |         if max_length >= self.layer_name_width:
44 |             self.layer_name_width = math.ceil(max_length / align_val) * align_val
45 | 
46 |     def get_total_width(self) -> int:
47 |         """Calculate the total width of all lines in the table."""
48 |         return len(tuple(self.col_names)) * self.col_width + self.layer_name_width
49 | 
50 |     def format_row(self, layer_name: str, row_values: Dict[str, str]) -> str:
51 |         """Get the string representation of a single layer of the model."""
52 |         info_to_use = [row_values.get(row_type, "") for row_type in self.col_names]
53 |         new_line = f"{layer_name:<{self.layer_name_width}} "
54 |         for info in info_to_use:
55 |             new_line += f"{info:<{self.col_width}} "
56 |         return new_line.rstrip() + "\n"
57 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'boda'
21 | copyright = '2022, Kyung-Su Kang'
22 | author = 'Kyung-Su Kang'
23 | 
24 | # The full version, including alpha/beta/rc tags
25 | release = '0.01a'
26 | 
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 | ]
35 | 
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ['_templates']
38 | 
39 | # List of patterns, relative to source directory, that match files and
40 | # directories to ignore when looking for source files.
41 | # This pattern also affects html_static_path and html_extra_path.
42 | exclude_patterns = []
43 | 
44 | 
45 | # -- Options for HTML output -------------------------------------------------
46 | 
47 | # The theme to use for HTML and HTML Help pages.  See the documentation for
48 | # a list of builtin themes.
49 | #
50 | html_theme = 'alabaster'
51 | 
52 | # Add any paths that contain custom static files (such as style sheets) here,
53 | # relative to this directory. They are copied after the builtin static files,
54 | # so a file named "default.css" will overwrite the builtin "default.css".
55 | html_static_path = ['_static']


--------------------------------------------------------------------------------
/boda/lib/torchsummary/formatting.py:
--------------------------------------------------------------------------------
 1 | """ formatting.py """
 2 | import math
 3 | from enum import Enum, unique
 4 | from typing import Dict, Iterable, List
 5 | 
 6 | from .layer_info import LayerInfo
 7 | 
 8 | 
 9 | @unique
10 | class Verbosity(Enum):
11 |     """Contains verbosity levels."""
12 | 
13 |     QUIET, DEFAULT, VERBOSE = 0, 1, 2
14 | 
15 | 
16 | class FormattingOptions:
17 |     """Class that holds information about formatting the table output."""
18 | 
19 |     def __init__(
20 |         self,
21 |         use_branching: bool,
22 |         max_depth: int,
23 |         verbose: int,
24 |         col_names: Iterable[str],
25 |         col_width: int,
26 |     ):
27 |         self.use_branching = use_branching
28 |         self.max_depth = max_depth
29 |         self.verbose = verbose
30 |         self.col_names = col_names
31 |         self.col_width = col_width
32 |         self.layer_name_width = 40
33 | 
34 |     def set_layer_name_width(
35 |         self, summary_list: List[LayerInfo], align_val: int = 5
36 |     ) -> None:
37 |         """
38 |         Set layer name width by taking the longest line length and rounding up to
39 |         the nearest multiple of align_val.
40 |         """
41 |         max_length = 0
42 |         for info in summary_list:
43 |             depth_indent = info.depth * align_val + 1
44 |             max_length = max(max_length, len(str(info)) + depth_indent)
45 |         if max_length >= self.layer_name_width:
46 |             self.layer_name_width = math.ceil(max_length / align_val) * align_val
47 | 
48 |     def get_total_width(self) -> int:
49 |         """Calculate the total width of all lines in the table."""
50 |         return len(tuple(self.col_names)) * self.col_width + self.layer_name_width
51 | 
52 |     def format_row(self, layer_name: str, row_values: Dict[str, str]) -> str:
53 |         """Get the string representation of a single layer of the model."""
54 |         info_to_use = [row_values.get(row_type, "") for row_type in self.col_names]
55 |         new_line = "{:<{}} ".format(layer_name, self.layer_name_width)
56 |         for info in info_to_use:
57 |             new_line += "{:<{}} ".format(info, self.col_width)
58 |         return new_line.rstrip() + "\n"
59 | 


--------------------------------------------------------------------------------
/boda/file_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | from urllib.request import urlretrieve
 5 | 
 6 | 
 7 | class DataEncoder(json.JSONEncoder):
 8 |     def default(self, obj):
 9 |         if isinstance(obj, list):
10 |             return json.JSONEncoder().encode(obj)
11 | 
12 |         return json.JSONEncoder.default(self, obj)
13 | 
14 | 
15 | def progressbar(cur, total=100):
16 |     percent = "{:.2%}".format(cur / total)
17 |     sys.stdout.write("\r")
18 |     # sys.stdout.write("[%-50s] %s" % ('=' * int(math.floor(cur * 50 / total)),percent))
19 |     sys.stdout.write("[%-100s] %s" % ("=" * int(cur), percent))
20 |     sys.stdout.flush()
21 | 
22 | 
23 | def schedule(blocknum, blocksize, totalsize):
24 |     """
25 |     blocknum: currently downloaded block
26 |          blocksize: block size for each transfer
27 |          totalsize: total size of web page files
28 |     """
29 |     if totalsize == 0:
30 |         percent = 0
31 |     else:
32 |         percent = blocknum * blocksize / totalsize
33 |     if percent > 1.0:
34 |         percent = 1.0
35 | 
36 |     percent = percent * 100
37 |     print("download : %.2f%%" % (percent))
38 |     progressbar(percent)
39 | 
40 | 
41 | def reporthook(count, block_size, total_size):
42 |     """
43 |     https://blog.shichao.io/2012/10/04/progress_speed_indicator_for_urlretrieve_in_python.html
44 |     """
45 |     # global start_time
46 |     # if count == 0:
47 |     #     start_time = time.time()
48 |     #     return
49 |     # duration = time.time() - start_time
50 |     progress_size = int(count * block_size)
51 |     # speed = int(progress_size / (1024 * duration))
52 |     percent = int(count * block_size * 100 / total_size)
53 |     # min(int(count*blockSize*100/totalSize),100)
54 |     sys.stdout.write(
55 |         f"\rDownload file for pretrained model: {percent:>3}% {progress_size / (1024*1204):>4.1f} MB"
56 |     )
57 | 
58 |     # sys.stdout.write("\rDownload pretrained model: %d%%, %d MB, %d KB/s, %d seconds passed" %
59 |     #                 (percent, progress_size / (1024 * 1024), speed, duration))
60 |     sys.stdout.flush()
61 | 
62 | 
63 | def get_file_from_url(
64 |     file_name: str,
65 | ):
66 |     """
67 |     file_name (): model_name/file_name.json or pth
68 |     """
69 |     url = "https://unerue.synology.me/boda/models/"
70 |     urlretrieve(f"{url}{file_name}", config_file, reporthook)
71 |     print()
72 | 


--------------------------------------------------------------------------------
/boda/custom_activation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | 
 6 | class Swish(nn.Module):
 7 |     """Swish https://arxiv.org/pdf/1905.02244.pdf"""
 8 | 
 9 |     @staticmethod
10 |     def forward(x):
11 |         return x * torch.sigmoid(x)
12 | 
13 | 
14 | class Hardswish(nn.Module):
15 |     """export-friendly version of nn.Hardswish()
16 | 
17 |     Return:
18 |         x * F.hardsigmoid(x) for torchscript and CoreML
19 |     """
20 | 
21 |     @staticmethod
22 |     def forward(x):
23 |         # for torchscript, CoreML and ONNX
24 |         return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0
25 | 
26 | 
27 | class MemoryEfficientSwish(nn.Module):
28 |     class F(torch.autograd.Function):
29 |         @staticmethod
30 |         def forward(ctx, x):
31 |             ctx.save_for_backward(x)
32 |             return x * torch.sigmoid(x)
33 | 
34 |         @staticmethod
35 |         def backward(ctx, grad_output):
36 |             x = ctx.saved_tensors[0]
37 |             sx = torch.sigmoid(x)
38 |             return grad_output * (sx * (1 + x * (1 - sx)))
39 | 
40 |     def forward(self, x):
41 |         return self.F.apply(x)
42 | 
43 | 
44 | class Mish(nn.Module):
45 |     """# Mish https://github.com/digantamisra98/Mish"""
46 | 
47 |     @staticmethod
48 |     def forward(x):
49 |         return x * F.softplus(x).tanh()
50 | 
51 | 
52 | class MemoryEfficientMish(nn.Module):
53 |     class F(torch.autograd.Function):
54 |         @staticmethod
55 |         def forward(ctx, x):
56 |             ctx.save_for_backward(x)
57 |             return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
58 | 
59 |         @staticmethod
60 |         def backward(ctx, grad_output):
61 |             x = ctx.saved_tensors[0]
62 |             sx = torch.sigmoid(x)
63 |             fx = F.softplus(x).tanh()
64 |             return grad_output * (fx + x * sx * (1 - fx * fx))
65 | 
66 |     def forward(self, x):
67 |         return self.F.apply(x)
68 | 
69 | 
70 | class FReLU(nn.Module):
71 |     """FReLU https://arxiv.org/abs/2007.11824"""
72 | 
73 |     def __init__(self, in_channels, kernel_size=3):
74 |         super().__init__()
75 |         self.conv = nn.Conv2d(
76 |             in_channels,
77 |             in_channels,
78 |             kernel_size,
79 |             stride=1,
80 |             padding=1,
81 |             groups=in_channels,
82 |             bias=False,
83 |         )
84 |         self.bn = nn.BatchNorm2d(in_channels)
85 | 
86 |     def forward(self, x):
87 |         return torch.max(x, self.bn(self.conv(x)))
88 | 


--------------------------------------------------------------------------------
/boda/models/yolact/__init__.py:
--------------------------------------------------------------------------------
 1 | from .architecture_yolact import YolactPredictHead, YolactModel
 2 | from .configuration_yolact import YolactConfig
 3 | from .inference_yolact import PostprocessYolact
 4 | 
 5 | # from .loss_yolact import YolactLoss
 6 | 
 7 | 
 8 | __all__ = ["YolactConfig", "PostprocessYolact", "YolactPredictHead", "YolactModel"]
 9 | 
10 | # _import_structure = {
11 | #     'configuration_yolact': ['YolactConfig'],
12 | #     'architecture_yolact': ['YolactPredictNeck', 'YolactPredictHead', 'YolactModel'],
13 | #     'loss_yolact': ['YolactLoss']
14 | # }
15 | # import importlib
16 | # import os
17 | # import sys
18 | 
19 | 
20 | # class _BaseLazyModule(ModuleType):
21 | #     """
22 | #     Module class that surfaces all objects but only performs associated imports when the objects are requested.
23 | #     """
24 | 
25 | #     # Very heavily inspired by optuna.integration._IntegrationModule
26 | #     # https://github.com/optuna/optuna/blob/master/optuna/integration/__init__.py
27 | #     def __init__(self, name, import_structure):
28 | #         super().__init__(name)
29 | #         self._modules = set(import_structure.keys())
30 | #         self._class_to_module = {}
31 | #         for key, values in import_structure.items():
32 | #             for value in values:
33 | #                 self._class_to_module[value] = key
34 | #         # Needed for autocompletion in an IDE
35 | #         self.__all__ = list(import_structure.keys()) + sum(import_structure.values(), [])
36 | 
37 | #     # Needed for autocompletion in an IDE
38 | #     def __dir__(self):
39 | #         return super().__dir__() + self.__all__
40 | 
41 | #     def __getattr__(self, name: str) -> Any:
42 | #         if name in self._modules:
43 | #             value = self._get_module(name)
44 | #         elif name in self._class_to_module.keys():
45 | #             module = self._get_module(self._class_to_module[name])
46 | #             value = getattr(module, name)
47 | #         else:
48 | #             raise AttributeError(f"module {self.__name__} has no attribute {name}")
49 | 
50 | #         setattr(self, name, value)
51 | #         return value
52 | 
53 | #     def _get_module(self, module_name: str) -> ModuleType:
54 | #         raise NotImplementedError
55 | 
56 | 
57 | # class _LazyModule(_BaseLazyModule):
58 | #     """
59 | #     Module class that surfaces all objects but only performs associated imports when the objects are requested.
60 | #     """
61 | 
62 | #     __file__ = globals()["__file__"]
63 | #     __path__ = [os.path.dirname(__file__)]
64 | 
65 | #     def _get_module(self, module_name: str):
66 | #         return importlib.import_module("." + module_name, self.__name__)
67 | 


--------------------------------------------------------------------------------
/boda/models/solov2/configuration_solov1.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Tuple, Sequence, Union, Any
 3 | 
 4 | from ...base_configuration import BaseConfig
 5 | 
 6 | 
 7 | solov1_pretrained_models = {"solov1-base": ""}
 8 | 
 9 | 
10 | class Solov1Config(BaseConfig):
11 |     """Configuration for SOLOv1
12 | 
13 |     Arguments:
14 |         max_size ():
15 |         padding ():
16 |         proto_net_structure (List):
17 |     """
18 | 
19 |     config_name = "solov1"
20 | 
21 |     def __init__(
22 |         self,
23 |         num_classes: int = 80,
24 |         min_size: int = 800,
25 |         max_size: int = 1333,
26 |         preserve_aspect_ratio: bool = True,
27 |         selected_layers: Sequence[int] = [0, 1, 2, 3],
28 |         fpn_channels: int = 256,
29 |         num_extra_fpn_layers: int = 1,
30 |         scales: Sequence[int] = [[8, 32], [16, 64], [32, 128], [64, 256], [128, 512]],
31 |         grids: Sequence[int] = [40, 36, 24, 16, 12],
32 |         strides: Sequence[int] = [4, 8, 16, 32, 64],
33 |         base_edges: Sequence[int] = [16, 32, 64, 128, 256],
34 |         **kwargs
35 |     ) -> None:
36 |         super().__init__(max_size=max_size, **kwargs)
37 |         self.num_classes = num_classes
38 |         self.selected_layers = selected_layers
39 |         self.fpn_channels = fpn_channels
40 |         self.num_extra_fpn_layers = num_extra_fpn_layers
41 |         self.scales = scales
42 |         self.grids = grids
43 |         self.strides = strides
44 |         self.base_edges = base_edges
45 | 
46 |         self.cate_down_pos = 0
47 | 
48 | 
49 | class DecoupledSolov1Config(BaseConfig):
50 |     """Configuration for SOLOv1
51 | 
52 |     Arguments:
53 |         max_size ():
54 |         padding ():
55 |         proto_net_structure (List):
56 |     """
57 | 
58 |     config_name = "solov1"
59 | 
60 |     def __init__(
61 |         self,
62 |         num_classes: int = 80,
63 |         max_size: Tuple[int] = (1333, 800),
64 |         selected_layers: Sequence[int] = [0, 1, 2, 3],
65 |         fpn_channels: int = 256,
66 |         num_extra_fpn_layers: int = 1,
67 |         scales: Sequence[int] = [[8, 32], [16, 64], [32, 128], [64, 256], [128, 512]],
68 |         grids: Sequence[int] = [40, 36, 24, 16, 12],
69 |         strides: Sequence[int] = [4, 8, 16, 32, 64],
70 |         base_edges: Sequence[int] = [16, 32, 64, 128, 256],
71 |         **kwargs
72 |     ) -> None:
73 |         super().__init__(max_size=max_size, **kwargs)
74 |         self.num_classes = num_classes
75 |         self.selected_layers = selected_layers
76 |         self.fpn_channels = fpn_channels
77 |         self.num_extra_fpn_layers = num_extra_fpn_layers
78 |         self.scales = scales
79 |         self.grids = grids
80 |         self.strides = strides
81 |         self.base_edges = base_edges
82 | 
83 |         self.cate_down_pos = 0
84 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages
 2 | from numpy.distutils.core import setup
 3 | from numpy.distutils.misc_util import Configuration
 4 | from distutils.command.clean import clean as Clean
 5 | from distutils.command.sdist import sdist
 6 | import os
 7 | import shutil
 8 | 
 9 | 
10 | def configuration(parent_package='', top_path=None):
11 |     config = Configuration(None, parent_package, top_path)
12 | 
13 |     config.set_options(
14 |         ignore_setup_xxx_py=True,
15 |         assume_default_configuration=True,
16 |         delegate_options_to_subpackages=True,
17 |         quiet=True)
18 |     config.add_subpackage('boda')
19 | 
20 |     return config
21 | 
22 | 
23 | class CleanCommand(Clean):
24 |     description = 'Remove build artifacts from the source tree'
25 | 
26 |     def run(self):
27 |         Clean.run(self)
28 |         # Remove c files if we are not within a sdist package
29 |         cwd = os.path.abspath(os.path.dirname(__file__))
30 |         remove_c_files = not os.path.exists(os.path.join(cwd, 'PKG-INFO'))
31 |         if remove_c_files:
32 |             print('Will remove generated .c files')
33 |         if os.path.exists('build'):
34 |             shutil.rmtree('build')
35 |         for dirpath, dirnames, filenames in os.walk('sklearn'):
36 |             for filename in filenames:
37 |                 if any(filename.endswith(suffix) for suffix in
38 |                        (".so", ".pyd", ".dll", ".pyc")):
39 |                     os.unlink(os.path.join(dirpath, filename))
40 |                     continue
41 |                 extension = os.path.splitext(filename)[1]
42 |                 if remove_c_files and extension in ['.c', '.cpp']:
43 |                     pyx_file = str.replace(filename, extension, '.pyx')
44 |                     if os.path.exists(os.path.join(dirpath, pyx_file)):
45 |                         os.unlink(os.path.join(dirpath, filename))
46 |             for dirname in dirnames:
47 |                 if dirname == '__pycache__':
48 |                     shutil.rmtree(os.path.join(dirpath, dirname))
49 | 
50 | 
51 | cmdclass = {'clean': CleanCommand, 'sdist': sdist}
52 | 
53 | 
54 | def setup_packages():
55 |     metadata = dict(
56 |         name='boda',
57 |         version='0.0.1',
58 |         install_requires=['torch', 'numpy', 'cython'],
59 |         author='Kang, Kyung-Su',
60 |         author_email='unerue@me.com',
61 |         maintainer='Kang, Kyung-Su',
62 |         maintainer_email='unerue@me.com',
63 |         description='boda is a library for instance segmentation.',
64 |         packages=find_packages(),
65 |         # include_package_data=True,
66 |         classifiers=[
67 |             'Programming Language :: C',
68 |             'Programming Language :: Python',
69 |             'Programming Language :: Python :: 3.6',
70 |             'Programming Language :: Python :: 3.7',
71 |             'Programming Language :: Python :: 3.8'],
72 |         cmdclass=cmdclass,
73 |         configuration=configuration,
74 |         python_requires='>=3.6')
75 | 
76 |     setup(**metadata)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     setup_packages()
81 | 


--------------------------------------------------------------------------------
/boda/ops/anchor_generators.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import itertools
 3 | import math
 4 | from collections import defaultdict
 5 | from typing import List, Tuple
 6 | 
 7 | import torch
 8 | from torch import Tensor
 9 | 
10 | 
11 | def default_box_cache(func):
12 |     cache = defaultdict()
13 | 
14 |     @functools.wraps(func)
15 |     def wrapper(*args):
16 |         k, v = func(*args)
17 |         if k not in cache:
18 |             cache[k] = v
19 |         return k, cache[k]
20 | 
21 |     return wrapper
22 | 
23 | 
24 | class DefaultBoxGenerator:
25 |     """
26 |     Args:
27 |         aspect_ratios (:obj:`List[int]`):
28 |         scales (:obj:):
29 |         max_size ():
30 |         use_preapply_sqrt ():
31 |         use_pixel_scales ():
32 |         use_square_anchors (:obj:`bool`): default `True`
33 |     """
34 | 
35 |     def __init__(
36 |         self,
37 |         aspect_ratios: List[int],
38 |         scales: List[float],
39 |         max_size: Tuple[int] = (550, 550),
40 |         use_preapply_sqrt: bool = True,
41 |         use_pixel_scales: bool = True,
42 |         use_square_anchors: bool = True,
43 |     ) -> None:
44 |         self.aspect_ratios = aspect_ratios
45 |         self.scales = scales
46 |         self.clip = False
47 |         self.max_size = max_size
48 |         self.use_preapply_sqrt = use_preapply_sqrt
49 |         self.use_pixel_scales = use_pixel_scales
50 |         self.use_square_anchors = use_square_anchors
51 | 
52 |     @default_box_cache
53 |     def generate(
54 |         self, h: int, w: int, device: str = "cuda:0"
55 |     ) -> Tuple[Tuple[int], Tensor]:
56 |         """DefaultBoxGenerator is
57 | 
58 |         Args:
59 |             h (:obj:`int`): feature map size from backbone
60 |             w (:obj:`int`): feature map size from backbone
61 |             device (:obj:`str`): default `cuda`
62 | 
63 |         Returns
64 |             size (:obj:`Tuple[int]`): feature map size
65 |             prior_boxes (:obj:`FloatTensor[N, 4]`):
66 |         """
67 |         size = (h, w)
68 |         default_boxes = []
69 |         for j, i in itertools.product(range(h), range(w)):
70 |             cx = (i + 0.5) / w
71 |             cy = (j + 0.5) / h
72 |             for ratios in self.aspect_ratios:
73 |                 for scale in self.scales:
74 |                     for ratio in ratios:
75 |                         if not self.use_preapply_sqrt:
76 |                             ratio = math.sqrt(ratio)
77 | 
78 |                         if self.use_pixel_scales:
79 |                             _h = scale / ratio / self.max_size[0]
80 |                             _w = scale * ratio / self.max_size[1]
81 |                         else:
82 |                             _h = scale / ratio / h
83 |                             _w = scale * ratio / w
84 | 
85 |                         if self.use_square_anchors:
86 |                             _h = _w
87 | 
88 |                         default_boxes += [cx, cy, _w, _h]
89 | 
90 |         default_boxes = torch.tensor(
91 |             default_boxes, dtype=torch.float32, device=device, requires_grad=False
92 |         ).view(-1, 4)
93 |         if self.clip:
94 |             default_boxes.clamp_(min=0, max=1)
95 |         # prior_boxes.requires_grad = False
96 | 
97 |         return size, default_boxes
98 | 


--------------------------------------------------------------------------------
/boda/models/yolact/configuration_yolact.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional, Tuple, List, Union, Any
 3 | 
 4 | from ...base_configuration import BaseConfig
 5 | 
 6 | 
 7 | YOLACT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 8 |     "yolact-base": "https://unerue.synology.me/boda/models/yolact/yolact-base.json",
 9 |     "yolact-550-r50": "",
10 |     "yolact-300-r101": "",
11 |     "yolact-700-r101": "",
12 | }
13 | 
14 | 
15 | class YolactConfig(BaseConfig):
16 |     """Configuration for YOLACT
17 | 
18 |     Args:
19 |         num_classes (:obj:`int`):
20 |         max_size (:obj:`Union[int, Tuple[int]]`):
21 |         num_grids (:obj:`int`):
22 |         num_grid_sizes (:obj:`int`):
23 |         num_mask_dim (:obj:`int`):
24 |         fpn_channels (:obj:`int`):
25 |         extra_fpn_layers (:obj:`bool`):
26 |         num_extra_fpn_layers (:obj:`int`):
27 |         mask_dim (:obj:`int`):
28 |         num_grid_sizes (:obj:`int`):
29 |         num_mask_dim (:obj:`int`):
30 |     """
31 | 
32 |     model_name = "yolact"
33 | 
34 |     def __init__(
35 |         self,
36 |         num_classes: int = 80,
37 |         max_size: Tuple[int] = (550, 550),
38 |         preserve_aspect_ratio: bool = False,
39 |         selected_backbone_layers: List[int] = [1, 2, 3],
40 |         fpn_channels: int = 256,
41 |         extra_fpn_layers: bool = True,
42 |         num_extra_fpn_layers: int = 2,
43 |         aspect_ratios: List = [1, 1 / 2, 2],
44 |         scales: List = [24, 48, 96, 192, 384],
45 |         num_extra_box_layers: int = 0,
46 |         num_extra_mask_layers: int = 0,
47 |         num_extra_score_layers: int = 0,
48 |         use_preapply_sqrt: bool = False,
49 |         use_pixel_scales: bool = True,
50 |         use_square_anchors: bool = True,
51 |         num_grids: int = 0,
52 |         mask_size: int = 16,
53 |         mask_dim: int = 0,
54 |         box_weight: float = 1.0,
55 |         mask_weight: float = 6.125,
56 |         score_weight: float = 1.0,
57 |         semantic_weight: float = 1.0,
58 |         **kwargs
59 |     ) -> None:
60 |         super().__init__(max_size=max_size, **kwargs)
61 |         self.num_classes = num_classes + 1
62 |         self.preserve_aspect_ratio = preserve_aspect_ratio
63 |         self.fpn_channels = fpn_channels
64 |         self.extra_fpn_layers = extra_fpn_layers
65 |         self.num_extra_fpn_layers = num_extra_fpn_layers
66 |         self.selected_backbone_layers = selected_backbone_layers
67 |         self.aspect_ratios = aspect_ratios
68 |         self.scales = scales
69 |         self.num_grids = num_grids
70 |         self.mask_size = mask_size
71 |         self.use_preapply_sqrt = use_preapply_sqrt
72 |         self.use_pixel_scales = use_pixel_scales
73 |         self.use_square_anchors = use_square_anchors
74 | 
75 |         self.num_extra_box_layers = num_extra_box_layers
76 |         self.num_extra_mask_layers = num_extra_mask_layers
77 |         self.num_extra_score_layers = num_extra_score_layers
78 |         self.num_grids = num_grids
79 |         self.mask_size = mask_size
80 |         self.mask_dim = mask_dim
81 | 
82 |         self.box_weight = box_weight
83 |         self.mask_weight = mask_weight
84 |         self.score_weight = score_weight
85 |         self.semantic_weight = semantic_weight
86 | 
87 |         self.label_map = kwargs.get("label_map", None)
88 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |   <img height=110em src="boda.png">
 3 | </p>
 4 | <p align="center">
 5 |   <img alt="Kyungsu" src="https://img.shields.io/badge/Version%20-0.0.1b-orange.svg?style=flat&colorA=E1523D&colorB=blue" />
 6 |   <!-- <img alt="SCIE" src="https://img.shields.io/badge/SCIE%20-orange.svg" /> -->
 7 |   <!-- <img alt="KCI" src="https://img.shields.io/badge/KCI%20-yellow.svg" /> -->
 8 |   <!-- <img alt="PythonVersion" src="https://camo.githubusercontent.com/08d69975ce61c30b175f504182ae3a335c6284cbadc26acd9b79e29db442ddea/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f707974686f6e2d332e36253230253743253230332e37253230253743253230332e382d626c7565" data-canonical-src="https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9-blue" style="max-width:100%;" /> -->
 9 |   <img alt="Kyungsu" src="https://img.shields.io/badge/Python%20-3.6%20%7C%203.7%20%7C%203.8-orange.svg?style=flat&colorA=gray&colorB=blue" style="max-width:100%;" />
10 |   <img alt="Kyungsu" src="https://img.shields.io/badge/PyTorch%20-1.6%20%7C%201.7-orange.svg?style=flat&colorA=E1523D&colorB=blue" />
11 |   <img src="https://badgen.net/badge/icon/terminal?icon=terminal&label" />
12 | </p>
13 | 
14 | ## Deep learning-based Computer Vision Models for PyTorch
15 | 
16 | Boda (보다) means to see in Korean. This library was inspired by 🤗 Transformers.
17 | 
18 | ## Get started
19 | 
20 | ```bash
21 | git clone https://github.com/unerue/boda.git && cd boda
22 | conda env create -f environment.yml
23 | conda activate boda
24 | python setup.py install
25 | ```
26 | 
27 | ```python
28 | from boda.models import YolactConfig, YolactModel, YolactLoss
29 | 
30 | config = YolactConfig(num_classes=80)
31 | model = YolactModel(config)
32 | criterion = YolactLoss()
33 | 
34 | outputs = model(images)
35 | losses = criterion(outputs, targets)
36 | print(losses)
37 | ```
38 | 
39 | ## Comparison
40 | 
41 | |Model|State|Training|Inference|Original|Ours|
42 | |:----|:---:|:------:|:-------:|-------:|---:|
43 | |Mask R-CNN|😡|❌|❌|||
44 | |[YOLACT](boda/models/yolact/)|😆|✔️|✔️|||
45 | |SOLOv2|🙂|❌|✔️|||
46 | |[CenterMask]()|😡|❌|❌|||
47 | |YOLACT EDGE|😡|❌|❌|||
48 | ||
49 | 
50 | ### Misc
51 | 
52 | |Model|State|Training|Inference|Original|Ours|
53 | |:----|:---:|:------:|:-------:|-------:|---:|
54 | |[SSD](boda/models/ssd/)|🙂|❌|✔️|||
55 | |Faster R-CNN|🙂|❌|✔️|||
56 | |[FCOS](boda/models/fcos/)|🙂|❌|✔️|||
57 | |Keypoint R-CNN|🙂|❌|✔️|||
58 | |YOLOv4|😡|❌|❌|||
59 | ||
60 | 
61 | ## Pretrained Model Configurations
62 | 
63 | |Model|Config name|Status|Original|Ours|
64 | |:----|:----|:------:|-------:|---:|
65 | |[SSD](boda/models/ssd/)|`ssd-base`|🙂|||
66 | |                       |`ssd-512`|😡|||
67 | |[Faster R-CNN]()|`faster-rcnn-base`|🙂|||
68 | |                |`faster-rcnn-r101`|😡|||
69 | |Mask R-CNN|`mask-rcnn-base`|😡|||
70 | |          |`mask-rcnn-r50`|😡|||
71 | |Keypoint R-CNN|`keypoint-rcnn-base`|🙂|||
72 | |              |`keypoint-rcnn-mobile`|😡|||
73 | |[FCOS](boda/models/fcos/)|`fcos-base`|🙂|||
74 | |PolarMask|`polarmask-base`|😡|||
75 | |YOLOv4|`yolov4-base`|😡|||
76 | |[YOLACT](boda/models/yolact/)|`yolact-base`|😆|||
77 | |                             |`yolact-r101`|😡|||
78 | |                             |`yolact-r101-300`|😡|||
79 | |                             |`yolact-r101-700`|😡|||
80 | |[SOLOv1](boda/models/solov1/)|`solov1-base`|🙂|||
81 | |                             |`solov1-r101`|😡|||
82 | |SOLOv2|`solov2-base`|😡||||
83 | |[CenterMask]()|`centermask-base`|😡|||
84 | |YOLACT EDGE|`yolact-edge-base`|😡|||
85 | ||


--------------------------------------------------------------------------------
/boda/models/feature_extractor/vggnet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from collections import OrderedDict
  3 | from typing import Tuple, List, Dict, Optional
  4 | 
  5 | import torch
  6 | import torch.nn.functional as F
  7 | from torch import nn, Tensor
  8 | from torch.nn.modules.batchnorm import BatchNorm2d
  9 | 
 10 | # from ..base_architecture import Backbone
 11 | 
 12 | 
 13 | class VGG(nn.Module):
 14 |     """
 15 |     This function is derived from torchvision VGG make_layers()
 16 |     https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
 17 |     https://github.com/dbolya/yolact/blob/master/backbone.py
 18 |     """
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         structure,
 23 |         bn: bool = False,
 24 |         norm_layer: Optional[nn.Module] = nn.BatchNorm2d,
 25 |         num_classes: int = 1000,
 26 |     ) -> None:
 27 |         super().__init__()
 28 |         self.bn = bn
 29 |         self.in_channels = 3
 30 |         self.channels = []
 31 |         self.layers = nn.ModuleList()
 32 | 
 33 |         for layer in structure:
 34 |             self._make_layer(layer)
 35 | 
 36 |     def forward(self, inputs):
 37 |         outputs = []
 38 |         for layer in self.layers:
 39 |             inputs = layer(inputs)
 40 |             outputs.append(inputs)
 41 | 
 42 |         return outputs
 43 | 
 44 |     def _make_layer(self, config):
 45 |         # _layers = []
 46 |         _layers = OrderedDict()
 47 |         i = 0
 48 |         for v in config:
 49 |             kwargs = None
 50 |             if isinstance(v, tuple):
 51 |                 kwargs = v[1]
 52 |                 v = v[0]
 53 | 
 54 |             if v == "M":
 55 |                 if kwargs is None:
 56 |                     kwargs = {"kernel_size": 2, "stride": 2}
 57 | 
 58 |                 # _layers.append(nn.MaxPool2d(**kwargs))
 59 |                 # _layers.update({'maxpool': nn.MaxPool2d(**kwargs)})
 60 |                 _layers.update({f"maxpool{i}": nn.MaxPool2d(**kwargs)})
 61 |             else:
 62 |                 if kwargs is None:
 63 |                     kwargs = {"kernel_size": 3, "padding": 1}
 64 | 
 65 |                 conv2d = nn.Conv2d(
 66 |                     in_channels=self.in_channels, out_channels=v, **kwargs
 67 |                 )
 68 | 
 69 |                 if self.bn:
 70 |                     # _layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU()]
 71 |                     # _layers.update({'conv': conv2d, 'bn': nn.BatchNorm2d(v), 'relu': nn.ReLU()})
 72 |                     _layers.update(
 73 |                         {
 74 |                             f"{i}": conv2d,
 75 |                             f"bn{i}": nn.BatchNorm2d(v),
 76 |                             f"relu{i}": nn.ReLU(),
 77 |                         }
 78 |                     )
 79 |                 else:
 80 |                     # _layers += [conv2d, nn.ReLU()]
 81 |                     # _layers.update({'conv': conv2d, 'relu': nn.ReLU()})
 82 |                     _layers.update({f"{i}": conv2d, f"relu{i}": nn.ReLU()})
 83 | 
 84 |                 self.in_channels = v
 85 |                 i += 1
 86 | 
 87 |         self.channels.append(self.in_channels)
 88 |         self.layers.append(nn.Sequential(_layers))
 89 | 
 90 | 
 91 | structures = {
 92 |     "vgg16": [
 93 |         [64, 64],
 94 |         ["M", 128, 128],
 95 |         ["M", 256, 256, 256],
 96 |         [("M", {"kernel_size": 2, "stride": 2, "ceil_mode": True}), 512, 512, 512],
 97 |         ["M", 512, 512, 512],
 98 |     ]
 99 | }
100 | 
101 | 
102 | def vgg16(config: Dict = None):
103 |     model = VGG(structures["vgg16"])
104 | 
105 |     return model
106 | 


--------------------------------------------------------------------------------
/boda/models/ssd/README.md:
--------------------------------------------------------------------------------
 1 | # SSD (Single Shot MultiBox Object Detector)
 2 | 
 3 | ```
 4 |  ██████╗  ██████╗ ███████╗ 
 5 | ██╔════╝ ██╔════╝ ██╔═══██╗
 6 | ╚██████╗ ╚██████╗ ██║   ██║
 7 |  ╚════██╗ ╚════██╗██║   ██║
 8 |  ██████╔╝ ██████╔╝███████╔╝
 9 |  ╚═════╝  ╚═════╝ ╚══════╝
10 | ```
11 | 
12 | ## SSD Architecture
13 | 
14 | ```{bash}
15 | ==========================================================================================
16 | Layer (type:depth-idx)                   Output Shape              Param #
17 | ==========================================================================================
18 | ├─VGG: 1-1                               [-1, 64, 300, 300]        --
19 | |    └─ModuleList: 2                     []                        --
20 | |    |    └─Sequential: 3-1              [-1, 64, 300, 300]        38,720
21 | |    |    └─Sequential: 3-2              [-1, 128, 150, 150]       221,440
22 | |    |    └─Sequential: 3-3              [-1, 256, 75, 75]         1,475,328
23 | |    |    └─Sequential: 3-4              [-1, 512, 38, 38]         5,899,776
24 | |    |    └─Sequential: 3-5              [-1, 512, 19, 19]         7,079,424
25 | ├─SsdPredictNeck: 1-2                    [-1, 512, 38, 38]         --
26 | |    └─L2Norm: 2-1                       [-1, 512, 38, 38]         512
27 | |    └─ModuleList: 2                     []                        --
28 | |    |    └─Sequential: 3-6              [-1, 1024, 19, 19]        5,769,216
29 | |    |    └─Sequential: 3-7              [-1, 512, 10, 10]         1,442,560
30 | |    |    └─Sequential: 3-8              [-1, 256, 5, 5]           360,832
31 | |    |    └─Sequential: 3-9              [-1, 256, 3, 3]           328,064
32 | |    |    └─Sequential: 3-10             [-1, 256, 1, 1]           328,064
33 | ├─ModuleList: 1                          []                        --
34 | |    └─SsdPredictHead: 2-2               [[-1, 4]]                 --
35 | |    |    └─Sequential: 3-11             [-1, 16, 38, 38]          73,744
36 | |    |    └─Sequential: 3-12             [-1, 84, 38, 38]          387,156
37 | |    └─SsdPredictHead: 2-3               [[-1, 4]]                 --
38 | |    |    └─Sequential: 3-13             [-1, 24, 19, 19]          221,208
39 | |    |    └─Sequential: 3-14             [-1, 126, 19, 19]         1,161,342
40 | |    └─SsdPredictHead: 2-4               [[-1, 4]]                 --
41 | |    |    └─Sequential: 3-15             [-1, 24, 10, 10]          110,616
42 | |    |    └─Sequential: 3-16             [-1, 126, 10, 10]         580,734
43 | |    └─SsdPredictHead: 2-5               [[-1, 4]]                 --
44 | |    |    └─Sequential: 3-17             [-1, 24, 5, 5]            55,320
45 | |    |    └─Sequential: 3-18             [-1, 126, 5, 5]           290,430
46 | |    └─SsdPredictHead: 2-6               [[-1, 4]]                 --
47 | |    |    └─Sequential: 3-19             [-1, 16, 3, 3]            36,880
48 | |    |    └─Sequential: 3-20             [-1, 84, 3, 3]            193,620
49 | |    └─SsdPredictHead: 2-7               [[-1, 4]]                 --
50 | |    |    └─Sequential: 3-21             [-1, 16, 1, 1]            36,880
51 | |    |    └─Sequential: 3-22             [-1, 84, 1, 1]            193,620
52 | ==========================================================================================
53 | Total params: 26,285,486
54 | Trainable params: 26,285,486
55 | Non-trainable params: 0
56 | Total mult-adds (G): 31.43
57 | ==========================================================================================
58 | Input size (MB): 1.03
59 | Forward/backward pass size (MB): 200.19
60 | Params size (MB): 100.27
61 | Estimated Total Size (MB): 301.49
62 | ```
63 | 
64 | weight https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth
65 | 
66 | 
67 | ## References
68 | 
69 | [](https://github.com/amdegroot/ssd.pytorch)
70 | [](https://github.com/open-mmlab/mmdetection)
71 | []()


--------------------------------------------------------------------------------
/boda/models/feature_extractor/pafpn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | from .backbone_darknet import BaseConv, CSPLayer, DWConv
  5 | 
  6 | 
  7 | class YOLOPAFPN(nn.Module):
  8 |     """
  9 |     YOLOv3 model. Darknet 53 is the default backbone of this model.
 10 |     """
 11 | 
 12 |     def __init__(
 13 |         self,
 14 |         in_channels,
 15 |         depth=1.0,
 16 |         width=1.0,
 17 |         depthwise=False,
 18 |         act="silu",
 19 |     ):
 20 |         super().__init__()
 21 |         self.in_channels = in_channels
 22 |         print(self.in_channels)
 23 |         Conv = DWConv if depthwise else BaseConv
 24 | 
 25 |         self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
 26 |         self.lateral_conv0 = BaseConv(
 27 |             int(self.in_channels[2] * width),
 28 |             int(self.in_channels[1] * width),
 29 |             1,
 30 |             1,
 31 |             act=act,
 32 |         )
 33 |         self.C3_p4 = CSPLayer(
 34 |             int(2 * self.in_channels[1] * width),
 35 |             int(self.in_channels[1] * width),
 36 |             round(3 * depth),
 37 |             False,
 38 |             depthwise=depthwise,
 39 |             act=act,
 40 |         )  # cat
 41 | 
 42 |         self.reduce_conv1 = BaseConv(
 43 |             int(self.in_channels[1] * width),
 44 |             int(self.in_channels[0] * width),
 45 |             1,
 46 |             1,
 47 |             act=act,
 48 |         )
 49 |         self.C3_p3 = CSPLayer(
 50 |             int(2 * self.in_channels[0] * width),
 51 |             int(self.in_channels[0] * width),
 52 |             round(3 * depth),
 53 |             False,
 54 |             depthwise=depthwise,
 55 |             act=act,
 56 |         )
 57 | 
 58 |         # bottom-up conv
 59 |         self.bu_conv2 = Conv(
 60 |             int(self.in_channels[0] * width),
 61 |             int(self.in_channels[0] * width),
 62 |             3,
 63 |             2,
 64 |             act=act,
 65 |         )
 66 |         self.C3_n3 = CSPLayer(
 67 |             int(2 * self.in_channels[0] * width),
 68 |             int(self.in_channels[1] * width),
 69 |             round(3 * depth),
 70 |             False,
 71 |             depthwise=depthwise,
 72 |             act=act,
 73 |         )
 74 | 
 75 |         # bottom-up conv
 76 |         self.bu_conv1 = Conv(
 77 |             int(self.in_channels[1] * width),
 78 |             int(self.in_channels[1] * width),
 79 |             3,
 80 |             2,
 81 |             act=act,
 82 |         )
 83 |         self.C3_n4 = CSPLayer(
 84 |             int(2 * self.in_channels[1] * width),
 85 |             int(self.in_channels[2] * width),
 86 |             round(3 * depth),
 87 |             False,
 88 |             depthwise=depthwise,
 89 |             act=act,
 90 |         )
 91 | 
 92 |     def forward(self, inputs):
 93 |         """
 94 |         Args:
 95 |             inputs: input images.
 96 | 
 97 |         Returns:
 98 |             Tuple[Tensor]: FPN feature.
 99 |         """
100 | 
101 |         #  backbone
102 |         [x2, x1, x0] = inputs
103 | 
104 |         fpn_out0 = self.lateral_conv0(x0)  # 1024->512/32
105 |         f_out0 = self.upsample(fpn_out0)  # 512/16
106 |         f_out0 = torch.cat([f_out0, x1], 1)  # 512->1024/16
107 |         f_out0 = self.C3_p4(f_out0)  # 1024->512/16
108 | 
109 |         fpn_out1 = self.reduce_conv1(f_out0)  # 512->256/16
110 |         f_out1 = self.upsample(fpn_out1)  # 256/8
111 |         f_out1 = torch.cat([f_out1, x2], 1)  # 256->512/8
112 |         pan_out2 = self.C3_p3(f_out1)  # 512->256/8
113 | 
114 |         p_out1 = self.bu_conv2(pan_out2)  # 256->256/16
115 |         p_out1 = torch.cat([p_out1, fpn_out1], 1)  # 256->512/16
116 |         pan_out1 = self.C3_n3(p_out1)  # 512->512/16
117 | 
118 |         p_out0 = self.bu_conv1(pan_out1)  # 512->512/32
119 |         p_out0 = torch.cat([p_out0, fpn_out0], 1)  # 512->1024/32
120 |         pan_out0 = self.C3_n4(p_out0)  # 1024->1024/32
121 | 
122 |         outputs = (pan_out2, pan_out1, pan_out0)
123 |         return outputs
124 | 


--------------------------------------------------------------------------------
/boda/models/yolox/utils.py:
--------------------------------------------------------------------------------
  1 | # import torch
  2 | # import torch.nn.functional as F
  3 | # import torchvision
  4 | 
  5 | 
  6 | # def preproc(img, input_size):
  7 | #     padded_img = torch.ones(3, input_size[0], input_size[1]) * 0.48
  8 | #     r = min(input_size[0] / img.shape[1], input_size[1] / img.shape[2])
  9 | #     resized_img = F.interpolate(
 10 | #         img[None],
 11 | #         size=(int(img.shape[1] * r), int(img.shape[2] * r)),
 12 | #         mode='bilinear',
 13 | #         align_corners=False
 14 | #     )[0]
 15 | #     print(resized_img.shape)
 16 | 
 17 | #     padded_img[:, :int(img.shape[1] * r), :int(img.shape[2] * r)] = resized_img
 18 | #     padded_img = padded_img.contiguous().type(torch.float32)
 19 | 
 20 | #     return padded_img, r
 21 | 
 22 | 
 23 | # def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):
 24 | #     if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
 25 | #         raise IndexError
 26 | 
 27 | #     if xyxy:
 28 | #         tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])
 29 | #         br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])
 30 | #         area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
 31 | #         area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
 32 | #     else:
 33 | #         tl = torch.max(
 34 | #             (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
 35 | #             (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2),
 36 | #         )
 37 | #         br = torch.min(
 38 | #             (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
 39 | #             (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2),
 40 | #         )
 41 | 
 42 | #         area_a = torch.prod(bboxes_a[:, 2:], 1)
 43 | #         area_b = torch.prod(bboxes_b[:, 2:], 1)
 44 | #     en = (tl < br).type(tl.type()).prod(dim=2)
 45 | #     area_i = torch.prod(br - tl, 2) * en  # * ((tl < br).all())
 46 | #     return area_i / (area_a[:, None] + area_b - area_i)
 47 | 
 48 | 
 49 | # def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
 50 | #     box_corner = prediction.new(prediction.shape)
 51 | #     box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
 52 | #     box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
 53 | #     box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
 54 | #     box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
 55 | #     prediction[:, :, :4] = box_corner[:, :, :4]
 56 | 
 57 | #     output = [None for _ in range(len(prediction))]
 58 | #     for i, image_pred in enumerate(prediction):
 59 | 
 60 | #         # If none are remaining => process next image
 61 | #         if not image_pred.size(0):
 62 | #             continue
 63 | #         # Get score and class with highest confidence
 64 | #         class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)
 65 | 
 66 | #         conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
 67 | #         # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
 68 | #         detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
 69 | #         detections = detections[conf_mask]
 70 | #         if not detections.size(0):
 71 | #             continue
 72 | 
 73 | #         if class_agnostic:
 74 | #             nms_out_index = torchvision.ops.nms(
 75 | #                 detections[:, :4],
 76 | #                 detections[:, 4] * detections[:, 5],
 77 | #                 nms_thre,
 78 | #             )
 79 | #         else:
 80 | #             nms_out_index = torchvision.ops.batched_nms(
 81 | #                 detections[:, :4],
 82 | #                 detections[:, 4] * detections[:, 5],
 83 | #                 detections[:, 6],
 84 | #                 nms_thre,
 85 | #             )
 86 | 
 87 | #         detections = detections[nms_out_index]
 88 | #         if output[i] is None:
 89 | #             output[i] = detections
 90 | #         else:
 91 | #             output[i] = torch.cat((output[i], detections))
 92 | 
 93 | #     output = [{
 94 | #         'boxes': o[:, :4],
 95 | #         'labels': o[:, 6],
 96 | #         'scores': o[:, 4] * o[:, 5],
 97 | #     } for o in output]
 98 | 
 99 | #     return output
100 | 


--------------------------------------------------------------------------------
/boda/models/feature_extractor/fpn.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Sequence
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | from torch import nn, Tensor
  6 | 
  7 | 
  8 | class FeaturePyramidNetworks(nn.Module):
  9 |     """Pyramid Feature Networks
 10 | 
 11 |     Example::
 12 |         >>> backbone = resnet101()
 13 |         >>> neck = FeaturePyramidNetworks(backbone.channels, [1, 2, 3])
 14 |         >>> print(neck.channels, neck.selected_layers)
 15 |     """
 16 | 
 17 |     def __init__(
 18 |         self,
 19 |         in_channels: Sequence[int] = [256, 512, 1024, 2048],
 20 |         selected_layers: Sequence[int] = [1, 2, 3],
 21 |         out_channels: int = 256,
 22 |         extra_layers: bool = False,
 23 |         num_extra_predict_layers: int = 2,
 24 |         **kwargs
 25 |     ) -> None:
 26 |         """
 27 |         Args:
 28 |             channels (:obj:`List[int]`): out channels from backbone
 29 |             selected_layers (:obj:`List[int]`): to use selected backbone layers
 30 |             out_channels (:obj:`int`):
 31 |             num_extra_predict_layers (:obj:`int`): make extra predict layers for training
 32 |             num_downsamples: (:obj:`int`): use predict layers does not training
 33 |         """
 34 |         super().__init__()
 35 |         self.in_channels = [in_channels[i] for i in selected_layers]
 36 |         self.selected_layers = selected_layers
 37 |         self.selected_backbones = selected_layers
 38 | 
 39 |         self.extra_layers = extra_layers
 40 |         self.num_extra_layers = 0
 41 |         self.num_extra_predict_layers = num_extra_predict_layers
 42 | 
 43 |         self.selected_layers = list(
 44 |             range(len(self.selected_layers) + self.num_extra_predict_layers)
 45 |         )
 46 | 
 47 |         self.lateral_layers = nn.ModuleList()
 48 |         for _in_channels in reversed(self.in_channels):
 49 |             self.lateral_layers.append(
 50 |                 nn.Conv2d(
 51 |                     _in_channels,
 52 |                     out_channels,
 53 |                     kernel_size=kwargs.get("lateral_kernel_size", 1),
 54 |                     stride=kwargs.get("lateral_stride", 1),
 55 |                     padding=kwargs.get("lateral_padding", 0),
 56 |                 )
 57 |             )
 58 | 
 59 |         self.predict_layers = nn.ModuleList()
 60 |         for _ in self.in_channels:
 61 |             self.predict_layers.append(
 62 |                 nn.Conv2d(
 63 |                     out_channels,
 64 |                     out_channels,
 65 |                     kernel_size=kwargs.get("", 3),
 66 |                     stride=kwargs.get("", 1),
 67 |                     padding=kwargs.get("", 1),
 68 |                 )
 69 |             )
 70 | 
 71 |         if self.num_extra_predict_layers > 0:
 72 |             self.extra_layers = nn.ModuleList(
 73 |                 [
 74 |                     nn.Conv2d(
 75 |                         out_channels, out_channels, kernel_size=3, stride=2, padding=1
 76 |                     )
 77 |                     for _ in range(self.num_extra_predict_layers)
 78 |                 ]
 79 |             )
 80 |             # self.channels.append(self.out_channels)
 81 | 
 82 |         self.channels = [out_channels] * len(self.selected_layers)
 83 | 
 84 |     def forward(self, inputs: List[Tensor]) -> List[Tensor]:
 85 |         """
 86 |         Args:
 87 |             inputs (:obj:`FloatTensor[B, C, H, W]`)
 88 | 
 89 |         Returns:
 90 |             outputs (:obj:`List[FloatTensor[B, C, H, W]]`)
 91 |         """
 92 |         device = inputs[0].device
 93 |         inputs = [inputs[i] for i in self.selected_backbones]
 94 | 
 95 |         x = torch.zeros(1, device=device)
 96 |         outputs = [x for _ in range(len(inputs))]
 97 | 
 98 |         i = len(inputs)
 99 |         for lateral_layer in self.lateral_layers:
100 |             i -= 1
101 |             if i < len(inputs) - 1:
102 |                 _, _, h, w = inputs[i].size()
103 |                 x = F.interpolate(x, size=(h, w), mode="bilinear", align_corners=False)
104 | 
105 |             x = x + lateral_layer(inputs[i])
106 |             outputs[i] = x
107 | 
108 |         i = len(inputs)
109 |         for predict_layer in self.predict_layers:
110 |             i -= 1
111 |             outputs[i] = F.relu(predict_layer(outputs[i]))
112 | 
113 |         if self.extra_layers:
114 |             for extra_layer in self.extra_layers:
115 |                 outputs.append(extra_layer(outputs[-1]))
116 | 
117 |         elif self.num_extra_predict_layers > 0:
118 |             for _ in range(self.num_extra_predict_layers):
119 |                 outputs.append(self.predict_layers[-1](outputs[-1]))
120 | 
121 |         return outputs
122 | 


--------------------------------------------------------------------------------
/run_test.py:
--------------------------------------------------------------------------------
  1 | from boda.models import YolactConfig, YolactModel
  2 | from boda.models.feature_extractor import resnet50, resnet101
  3 | # from boda.lib.torchinfo import summary
  4 | from boda.lib.torchsummary import summary
  5 | import torch
  6 | 
  7 | config = YolactConfig(num_classes=80)
  8 | model = YolactModel(config, backbone=resnet101()).to('cuda')
  9 | model.train()
 10 | print(model)
 11 | # print(summary(model, input_size=(16, 3, 550, 550), verbose=0))
 12 | print(summary(model, input_data=(3, 550, 550), verbose=0))
 13 | 
 14 | # model.load_weights('cache/yolact-base.pth')
 15 | 
 16 | 
 17 | from boda.models import PostprocessYolact
 18 | from PIL import Image
 19 | from torchvision import transforms
 20 | 
 21 | image = Image.open('test6.jpg')
 22 | model = YolactModel.from_pretrained('yolact-base').cuda()
 23 | model.eval()
 24 | 
 25 | aug = transforms.Compose([
 26 |     transforms.ToTensor(),
 27 |     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
 28 |     # transforms.Normalize([0.406, 0.456, 0.485], [0.225, 0.224, 0.229])
 29 | ])
 30 | 
 31 | outputs = model([aug(image).cuda()])
 32 | 
 33 | print(outputs.keys())
 34 | post = PostprocessYolact()
 35 | outputs = post(outputs, outputs['image_sizes'])
 36 | print(outputs[0]['boxes'])
 37 | import cv2
 38 | import numpy as np
 39 | import matplotlib.pyplot as plt
 40 | import matplotlib.patches as patches
 41 | from skimage.measure import find_contours
 42 | import adjustText
 43 | 
 44 | np_image = np.array(image)
 45 | np_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
 46 | # for box in outputs[0]['boxes']:
 47 | #     # box = list(map(int, boxes[j, :]))
 48 | #     x1, y1, x2, y2 = box.detach().cpu().numpy()
 49 | #     # score = scores[j]
 50 | #     # label = labels[j]
 51 | #     cv2.rectangle(np_image, (x1, y1), (x2, y2), (0, 0, 255), thickness=1)
 52 | 
 53 | plt.imshow(image)
 54 | ax = plt.gca()
 55 | threshold = 0
 56 | COCO_CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
 57 |                 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
 58 |                 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
 59 |                 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
 60 |                 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 61 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
 62 |                 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
 63 |                 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
 64 |                 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
 65 |                 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
 66 |                 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
 67 |                 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
 68 |                 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
 69 |                 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
 70 | 
 71 | COLORS = {
 72 |     1: 'deepskyblue',
 73 |     2: 'orangered',
 74 |     3: 'yellowgreen',
 75 |     4: 'darkorange',
 76 |     5: 'chocolate',
 77 |     6: 'slategrey',
 78 |     7: 'darkgoldenrod',
 79 |     8: 'purple',
 80 |     9: 'saddlebrown',
 81 |     10: 'olive',
 82 | }
 83 | 
 84 | for output in outputs:
 85 |     boxes = output['boxes']
 86 |     scores = output['scores']
 87 |     labels = output['labels']
 88 |     masks = output['masks']
 89 |     print(scores)
 90 | 
 91 |     for i, box in enumerate(boxes):
 92 |         x1, y1, x2, y2 = box.detach().cpu().numpy()
 93 |         score = scores[i].detach().cpu().numpy()
 94 |         label = labels[i].detach().cpu().numpy()
 95 |         mask = masks[i].detach().cpu().numpy().astype(np.int64)
 96 | 
 97 |         color = COLORS[(label+1) % 11]
 98 |         contours = find_contours(mask, 0.5)
 99 | 
100 |         if score >= threshold:
101 |             cx = x2 - x1
102 |             cy = y2 - y1
103 |             ax.text(x1, y1, f"{COCO_CLASSES[label]}", c='black', size=8, va='bottom', ha='left', alpha=0.5)
104 | 
105 |             rect = patches.Rectangle(
106 |                 (x1, y1),
107 |                 cx, cy,
108 |                 linewidth=1,
109 |                 edgecolor=color,
110 |                 facecolor='none'
111 |             )
112 |             ax.add_patch(rect)
113 | 
114 |             ## contours
115 |             for contour in contours:
116 |                 shapes = []
117 |                 for point in contour:
118 |                     shapes.append([int(point[1]), int(point[0])])
119 | 
120 |                 polygon_edge = patches.Polygon(
121 |                         (shapes),
122 |                         edgecolor=color,
123 |                         facecolor='none',
124 |                         linewidth=1,
125 |                         fill=False,
126 |                     )
127 | 
128 |                 polygon_fill = patches.Polygon(
129 |                     (shapes),
130 |                     alpha=0.5,
131 |                     edgecolor='none',
132 |                     facecolor=color,
133 |                     fill=True
134 |                 )
135 | 
136 |                 ax.add_patch(polygon_edge)
137 |                 ax.add_patch(polygon_fill)
138 |                 
139 | 
140 | plt.axis('off')
141 | plt.savefig('test.jpg' ,dpi=100, bbox_inches='tight', pad_inches=0)
142 | 


--------------------------------------------------------------------------------
/boda/lib/torchsummary/layer_info.py:
--------------------------------------------------------------------------------
  1 | """ layer_info.py """
  2 | from typing import Any, Dict, List, Optional, Sequence, Union
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | DETECTED_INPUT_OUTPUT_TYPES = Union[
  9 |     Sequence[Any], Dict[Any, torch.Tensor], torch.Tensor
 10 | ]
 11 | 
 12 | 
 13 | class LayerInfo:
 14 |     """Class that holds information about a layer module."""
 15 | 
 16 |     def __init__(
 17 |         self,
 18 |         module: nn.Module,
 19 |         depth: int,
 20 |         depth_index: Optional[int] = None,
 21 |         parent_info: Optional["LayerInfo"] = None,
 22 |     ):
 23 |         # Identifying information
 24 |         self.layer_id = id(module)
 25 |         self.module = module
 26 |         self.class_name = str(module.__class__).split(".")[-1].split("'")[0]
 27 |         self.inner_layers: Dict[str, List[int]] = {}
 28 |         self.depth = depth
 29 |         self.depth_index = depth_index
 30 |         self.executed = False
 31 |         self.parent_info = parent_info
 32 | 
 33 |         # Statistics
 34 |         self.trainable = True
 35 |         self.is_recursive = False
 36 |         self.input_size: List[int] = []
 37 |         self.output_size: List[int] = []
 38 |         self.kernel_size: List[int] = []
 39 |         self.num_params = 0
 40 |         self.macs = 0
 41 |         self.calculate_num_params()
 42 | 
 43 |     def __repr__(self) -> str:
 44 |         if self.depth_index is None:
 45 |             return f"{self.class_name}: {self.depth}"
 46 |         return f"{self.class_name}: {self.depth}-{self.depth_index}"
 47 | 
 48 |     @staticmethod
 49 |     def calculate_size(
 50 |         inputs: DETECTED_INPUT_OUTPUT_TYPES, batch_dim: Optional[int]
 51 |     ) -> List[int]:
 52 |         """Set input_size or output_size using the model's inputs."""
 53 | 
 54 |         def nested_list_size(inputs: Sequence[Any]) -> List[int]:
 55 |             """Flattens nested list size."""
 56 |             if hasattr(inputs[0], "size") and callable(inputs[0].size):
 57 |                 return list(inputs[0].size())
 58 |             if isinstance(inputs, (list, tuple)):
 59 |                 return nested_list_size(inputs[0])
 60 |             return []
 61 | 
 62 |         # pack_padded_seq and pad_packed_seq store feature into data attribute
 63 |         if isinstance(inputs, (list, tuple)) and len(inputs) == 0:
 64 |             size = []
 65 |         elif isinstance(inputs, (list, tuple)) and hasattr(inputs[0], "data"):
 66 |             size = list(inputs[0].data.size())
 67 |             if batch_dim is not None:
 68 |                 size = size[:batch_dim] + [-1] + size[batch_dim + 1 :]
 69 | 
 70 |         elif isinstance(inputs, dict):
 71 |             # TODO avoid overwriting the previous size every time?
 72 |             for _, output in inputs.items():
 73 |                 size = list(output.size())
 74 |                 if batch_dim is not None:
 75 |                     size = [size[:batch_dim] + [-1] + size[batch_dim + 1 :]]
 76 | 
 77 |         elif isinstance(inputs, torch.Tensor):
 78 |             size = list(inputs.size())
 79 |             if batch_dim is not None:
 80 |                 size[batch_dim] = -1
 81 | 
 82 |         elif isinstance(inputs, (list, tuple)):
 83 |             size = nested_list_size(inputs)
 84 | 
 85 |         else:
 86 |             raise TypeError(
 87 |                 "Model contains a layer with an unsupported "
 88 |                 "input or output type: {}".format(inputs)
 89 |             )
 90 | 
 91 |         return size
 92 | 
 93 |     def calculate_num_params(self) -> None:
 94 |         """
 95 |         Set num_params, trainable, inner_layers, and kernel_size
 96 |         using the module's parameters.
 97 |         """
 98 |         for name, param in self.module.named_parameters():
 99 |             self.num_params += param.nelement()
100 |             self.trainable &= param.requires_grad
101 | 
102 |             if name == "weight":
103 |                 ksize = list(param.size())
104 |                 # to make [in_shape, out_shape, ksize, ksize]
105 |                 if len(ksize) > 1:
106 |                     ksize[0], ksize[1] = ksize[1], ksize[0]
107 |                 self.kernel_size = ksize
108 | 
109 |             # RNN modules have inner weights such as weight_ih_l0
110 |             elif "weight" in name:
111 |                 self.inner_layers[name] = list(param.size())
112 | 
113 |     def calculate_macs(self) -> None:
114 |         """
115 |         Set MACs using the module's parameters and layer's output size, which is
116 |         used for computing number of operations for Conv layers.
117 |         """
118 |         for name, param in self.module.named_parameters():
119 |             if name == "weight":
120 |                 # ignore N, C when calculate Mult-Adds in ConvNd
121 |                 if "Conv" in self.class_name:
122 |                     self.macs += int(param.nelement() * np.prod(self.output_size[2:]))
123 |                 else:
124 |                     self.macs += param.nelement()
125 |             # RNN modules have inner weights such as weight_ih_l0
126 |             elif "weight" in name:
127 |                 self.macs += param.nelement()
128 | 
129 |     def check_recursive(self, summary_list: List["LayerInfo"]) -> None:
130 |         """
131 |         If the current module is already-used, mark as (recursive).
132 |         Must check before adding line to the summary.
133 |         """
134 |         if list(self.module.named_parameters()):
135 |             for other_layer in summary_list:
136 |                 if self.layer_id == other_layer.layer_id:
137 |                     self.is_recursive = True
138 | 
139 |     def macs_to_str(self, reached_max_depth: bool) -> str:
140 |         """Convert MACs to string."""
141 |         if self.num_params > 0 and (
142 |             reached_max_depth or not any(self.module.children())
143 |         ):
144 |             return f"{self.macs:,}"
145 |         return "--"
146 | 
147 |     def num_params_to_str(self, reached_max_depth: bool = False) -> str:
148 |         """Convert num_params to string."""
149 |         if self.is_recursive:
150 |             return "(recursive)"
151 |         if self.num_params > 0:
152 |             param_count_str = f"{self.num_params:,}"
153 |             if reached_max_depth or not any(self.module.children()):
154 |                 if not self.trainable:
155 |                     return f"({param_count_str})"
156 |                 return param_count_str
157 |         return "--"
158 | 


--------------------------------------------------------------------------------
/boda/lib/torchinfo/layer_info.py:
--------------------------------------------------------------------------------
  1 | """ layer_info.py """
  2 | from typing import Any, Dict, Iterable, List, Optional, Sequence, Union
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | DETECTED_INPUT_OUTPUT_TYPES = Union[
  8 |     Sequence[Any], Dict[Any, torch.Tensor], torch.Tensor
  9 | ]
 10 | 
 11 | 
 12 | class LayerInfo:
 13 |     """Class that holds information about a layer module."""
 14 | 
 15 |     def __init__(
 16 |         self,
 17 |         module: nn.Module,
 18 |         depth: int,
 19 |         depth_index: Optional[int] = None,
 20 |         parent_info: Optional["LayerInfo"] = None,
 21 |     ):
 22 |         # Identifying information
 23 |         self.layer_id = id(module)
 24 |         self.module = module
 25 |         self.class_name = str(module.__class__).split(".")[-1].split("'")[0]
 26 |         self.inner_layers: Dict[str, List[int]] = {}
 27 |         self.depth = depth
 28 |         self.depth_index = depth_index
 29 |         self.executed = False
 30 |         self.parent_info = parent_info
 31 | 
 32 |         # Statistics
 33 |         self.trainable = True
 34 |         self.is_recursive = False
 35 |         self.input_size: List[int] = []
 36 |         self.output_size: List[int] = []
 37 |         self.kernel_size: List[int] = []
 38 |         self.num_params = 0
 39 |         self.macs = 0
 40 |         self.calculate_num_params()
 41 | 
 42 |     def __repr__(self) -> str:
 43 |         layer_name = f"{self.class_name}: {self.depth}"
 44 |         if self.depth_index is None:
 45 |             return layer_name
 46 |         return f"{layer_name}-{self.depth_index}"
 47 | 
 48 |     @staticmethod
 49 |     def calculate_size(
 50 |         inputs: DETECTED_INPUT_OUTPUT_TYPES, batch_dim: Optional[int]
 51 |     ) -> List[int]:
 52 |         """Set input_size or output_size using the model's inputs."""
 53 | 
 54 |         def nested_list_size(inputs: Sequence[Any]) -> List[int]:
 55 |             """Flattens nested list size."""
 56 |             if hasattr(inputs[0], "size") and callable(inputs[0].size):
 57 |                 return list(inputs[0].size())
 58 |             if isinstance(inputs, (list, tuple)):
 59 |                 return nested_list_size(inputs[0])
 60 |             return []
 61 | 
 62 |         size = []
 63 |         # pack_padded_seq and pad_packed_seq store feature into data attribute
 64 |         if isinstance(inputs, (list, tuple)) and inputs and hasattr(inputs[0], "data"):
 65 |             size = list(inputs[0].data.size())
 66 |             if batch_dim is not None:
 67 |                 size = size[:batch_dim] + [-1] + size[batch_dim + 1 :]
 68 | 
 69 |         elif isinstance(inputs, dict):
 70 |             # TODO avoid overwriting the previous size every time?
 71 |             for _, output in inputs.items():
 72 |                 size = list(output.size())
 73 |                 if batch_dim is not None:
 74 |                     size = [size[:batch_dim] + [-1] + size[batch_dim + 1 :]]
 75 | 
 76 |         elif isinstance(inputs, torch.Tensor):
 77 |             size = list(inputs.size())
 78 |             if batch_dim is not None:
 79 |                 size[batch_dim] = -1
 80 | 
 81 |         elif isinstance(inputs, (list, tuple)):
 82 |             size = nested_list_size(inputs)
 83 | 
 84 |         else:
 85 |             raise TypeError(
 86 |                 "Model contains a layer with an unsupported "
 87 |                 f"input or output type: {inputs}"
 88 |             )
 89 | 
 90 |         return size
 91 | 
 92 |     def calculate_num_params(self) -> None:
 93 |         """
 94 |         Set num_params, trainable, inner_layers, and kernel_size
 95 |         using the module's parameters.
 96 |         """
 97 |         for name, param in self.module.named_parameters():
 98 |             self.num_params += param.nelement()
 99 |             self.trainable &= param.requires_grad
100 | 
101 |             if name == "weight":
102 |                 ksize = list(param.size())
103 |                 # to make [in_shape, out_shape, ksize, ksize]
104 |                 if len(ksize) > 1:
105 |                     ksize[0], ksize[1] = ksize[1], ksize[0]
106 |                 self.kernel_size = ksize
107 | 
108 |             # RNN modules have inner weights such as weight_ih_l0
109 |             elif "weight" in name:
110 |                 self.inner_layers[name] = list(param.size())
111 | 
112 |     def calculate_macs(self) -> None:
113 |         """
114 |         Set MACs using the module's parameters and layer's output size, which is
115 |         used for computing number of operations for Conv layers.
116 |         """
117 |         for name, param in self.module.named_parameters():
118 |             if name == "weight":
119 |                 # ignore N, C when calculate Mult-Adds in ConvNd
120 |                 if "Conv" in self.class_name:
121 |                     self.macs += int(param.nelement() * prod(self.output_size[2:]))
122 |                 else:
123 |                     self.macs += param.nelement()
124 |             # RNN modules have inner weights such as weight_ih_l0
125 |             elif "weight" in name:
126 |                 self.macs += param.nelement()
127 | 
128 |     def check_recursive(self, summary_list: List["LayerInfo"]) -> None:
129 |         """
130 |         If the current module is already-used, mark as (recursive).
131 |         Must check before adding line to the summary.
132 |         """
133 |         if list(self.module.named_parameters()):
134 |             for other_layer in summary_list:
135 |                 if self.layer_id == other_layer.layer_id:
136 |                     self.is_recursive = True
137 | 
138 |     def macs_to_str(self, reached_max_depth: bool) -> str:
139 |         """Convert MACs to string."""
140 |         if self.num_params > 0 and (
141 |             reached_max_depth or not any(self.module.children())
142 |         ):
143 |             return f"{self.macs:,}"
144 |         return "--"
145 | 
146 |     def num_params_to_str(self, reached_max_depth: bool = False) -> str:
147 |         """Convert num_params to string."""
148 |         if self.is_recursive:
149 |             return "(recursive)"
150 |         if self.num_params > 0:
151 |             param_count_str = f"{self.num_params:,}"
152 |             if reached_max_depth or not any(self.module.children()):
153 |                 if not self.trainable:
154 |                     return f"({param_count_str})"
155 |                 return param_count_str
156 |         return "--"
157 | 
158 | 
159 | def prod(num_list: Union[Iterable[Any], torch.Size]) -> int:
160 |     result = 1
161 |     for num in num_list:
162 |         result *= num
163 |     return abs(result)
164 | 


--------------------------------------------------------------------------------
/boda/models/solov2/architecture_decoupled_solov1.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import itertools
  3 | import math
  4 | import os
  5 | from collections import defaultdict, OrderedDict
  6 | from typing import Tuple, List, Dict, Any, Callable, TypeVar, Union, Sequence
  7 | 
  8 | import torch
  9 | import torch.nn.functional as F
 10 | from torch import nn, Tensor
 11 | 
 12 | from ...base_architecture import Neck, Head, Model
 13 | from ...utils.mask import points_nms
 14 | from ..backbone_resnet import resnet101, resnet50
 15 | from ..neck_fpn import FeaturePyramidNetworks
 16 | from .architecture_solov1 import (
 17 |     InstanceLayer,
 18 |     CategoryLayer,
 19 |     Solov1PredictNeck,
 20 |     Solov1PredictHead,
 21 |     Solov1Model,
 22 | )
 23 | from .configuration_solov1 import Solov1Config
 24 | 
 25 | 
 26 | class DecoupledSolov1PredictHead(Solov1PredictHead):
 27 |     def __init__(
 28 |         self,
 29 |         config: Solov1Config,
 30 |         in_channels: int = 256,
 31 |         fpn_channels: int = 256,
 32 |         num_head_layers: int = 7,
 33 |         grids: List = [40, 36, 24, 16, 12],
 34 |         strides: List = [4, 8, 16, 32, 64],
 35 |         base_edges: List = [16, 32, 64, 128, 256],
 36 |         scales: List = [[8, 32], [16, 64], [32, 128], [64, 256], [128, 512]],
 37 |         num_classes: int = 80,
 38 |     ) -> None:
 39 |         super().__init__()
 40 |         self.config = config
 41 |         self.in_channels = in_channels
 42 |         self.fpn_channels = fpn_channels
 43 |         self.num_head_layers = num_head_layers
 44 |         self.grids = grids
 45 |         self.strides = strides
 46 |         self.base_edges = base_edges
 47 |         self.scales = scales
 48 |         self.num_classes = num_classes
 49 | 
 50 |         self.cate_down_pos = 0
 51 | 
 52 |         delattr(self, "instance_layers")
 53 | 
 54 |         self.x_instance_layers = nn.ModuleList()
 55 |         self.y_instance_layers = nn.ModuleList()
 56 |         self.category_layers = nn.ModuleList()
 57 |         for i in range(self.num_head_layers):
 58 |             if i == 0:
 59 |                 in_channels = self.in_channels + 1
 60 |             else:
 61 |                 in_channels = self.fpn_channels
 62 | 
 63 |             self.x_instance_layers.append(
 64 |                 InstanceLayer(
 65 |                     in_channels,
 66 |                     self.fpn_channels,
 67 |                     kernel_size=3,
 68 |                     stride=1,
 69 |                     padding=1,
 70 |                     bias=True,
 71 |                     num_groups=32,
 72 |                 )
 73 |             )
 74 | 
 75 |             self.y_instance_layers.append(
 76 |                 InstanceLayer(
 77 |                     in_channels,
 78 |                     self.fpn_channels,
 79 |                     kernel_size=3,
 80 |                     stride=1,
 81 |                     padding=1,
 82 |                     bias=True,
 83 |                     num_groups=32,
 84 |                 )
 85 |             )
 86 | 
 87 |             if i == 0:
 88 |                 in_channels = self.in_channels
 89 |             else:
 90 |                 in_channels = self.fpn_channels
 91 | 
 92 |             self.category_layers.append(
 93 |                 CategoryLayer(
 94 |                     in_channels,
 95 |                     self.fpn_channels,
 96 |                     kernel_size=3,
 97 |                     stride=1,
 98 |                     padding=1,
 99 |                     bias=True,
100 |                     num_groups=32,
101 |                 )
102 |             )
103 | 
104 |         self.x_decoupled_instance_layers = nn.ModuleList()
105 |         self.y_decoupled_instance_layers = nn.ModuleList()
106 |         self.pred_instance_layers = nn.ModuleList()
107 |         for grid in self.grids:
108 |             self.x_decoupled_instance_layers.append(
109 |                 nn.Conv2d(self.fpn_channels, grid, kernel_size=3, padding=1)
110 |             )
111 |             self.y_decoupled_instance_layers.append(
112 |                 nn.Conv2d(self.fpn_channels, grid, kernel_size=3, padding=1)
113 |             )
114 | 
115 |         self.pred_category_layer = nn.Conv2d(
116 |             self.fpn_channels, self.num_classes - 1, kernel_size=3, padding=1
117 |         )
118 | 
119 |     def forward(self, inputs: List[Tensor]):
120 |         inputs = self.split_feature_maps(inputs)
121 |         feature_map_sizes = [feature_map.size()[-2:] for feature_map in inputs]
122 |         upsampled_size = (feature_map_sizes[0][0] * 2, feature_map_sizes[0][1] * 2)
123 | 
124 |         pred_masks, pred_labels = self.multi_apply(
125 |             self.forward_single,
126 |             inputs,
127 |             list(range(len(self.grids))),
128 |             upsampled_size=upsampled_size,
129 |         )
130 | 
131 |         return pred_masks, pred_labels
132 | 
133 |     def split_feature_maps(self, inputs: List[Tensor]) -> Tuple[Tensor]:
134 |         """
135 |         Returns:
136 |         """
137 |         return (
138 |             F.interpolate(
139 |                 inputs[0],
140 |                 scale_factor=0.5,
141 |                 mode="bilinear",
142 |                 align_corners=False,
143 |                 recompute_scale_factor=True,
144 |             ),
145 |             inputs[1],
146 |             inputs[2],
147 |             inputs[3],
148 |             F.interpolate(
149 |                 inputs[4],
150 |                 size=inputs[3].shape[-2:],
151 |                 mode="bilinear",
152 |                 align_corners=False,
153 |             ),
154 |         )
155 | 
156 |     def forward_single(self, inputs, idx, upsampled_size: Tuple = None):
157 |         instances = inputs
158 |         categories = inputs
159 | 
160 |         x_range = torch.linspace(-1, 1, instances.shape[-1], device=instances.device)
161 |         y_range = torch.linspace(-1, 1, instances.shape[-2], device=categories.device)
162 |         y, x = torch.meshgrid(y_range, x_range)
163 |         y = y.expand([instances.shape[0], 1, -1, -1])
164 |         x = x.expand([instances.shape[0], 1, -1, -1])
165 |         coords = torch.cat([x, y], 1)
166 |         instances = torch.cat([instances, coords], 1)
167 | 
168 |         for i, ins_layer in enumerate(self.instance_layers):
169 |             instances = ins_layer(instances)
170 | 
171 |         instances = F.interpolate(
172 |             instances, scale_factor=2.0, mode="bilinear", align_corners=False
173 |         )
174 |         pred_masks = self.pred_instance_layers[idx](instances)
175 | 
176 |         for i, cate_layer in enumerate(self.category_layers):
177 |             if i == self.cate_down_pos:
178 |                 seg_num_grid = self.grids[idx]
179 |                 categories = F.interpolate(
180 |                     categories, size=seg_num_grid, mode="bilinear", align_corners=False
181 |                 )
182 |             categories = cate_layer(categories)
183 | 
184 |         pred_labels = self.pred_category_layer(categories)
185 | 
186 |         return pred_masks, pred_labels
187 | 


--------------------------------------------------------------------------------
/boda/lib/torchinfo/model_statistics.py:
--------------------------------------------------------------------------------
  1 | """ model_statistics.py """
  2 | from typing import Any, Dict, Iterable, List, Tuple, Union
  3 | 
  4 | import torch
  5 | 
  6 | from .formatting import FormattingOptions, Verbosity
  7 | from .layer_info import LayerInfo, prod
  8 | 
  9 | HEADER_TITLES = {
 10 |     "kernel_size": "Kernel Shape",
 11 |     "input_size": "Input Shape",
 12 |     "output_size": "Output Shape",
 13 |     "num_params": "Param #",
 14 |     "mult_adds": "Mult-Adds",
 15 | }
 16 | CORRECTED_INPUT_SIZE_TYPE = List[Union[Iterable[Any], torch.Size]]
 17 | 
 18 | 
 19 | class ModelStatistics:
 20 |     """Class for storing results of the summary."""
 21 | 
 22 |     def __init__(
 23 |         self,
 24 |         summary_list: List[LayerInfo],
 25 |         input_size: CORRECTED_INPUT_SIZE_TYPE,
 26 |         formatting: FormattingOptions,
 27 |     ):
 28 |         self.summary_list = summary_list
 29 |         self.input_size = input_size
 30 |         self.total_input = sum(prod(sz) for sz in input_size) if input_size else 0
 31 |         self.formatting = formatting
 32 |         self.total_params, self.trainable_params = 0, 0
 33 |         self.total_output, self.total_mult_adds = 0, 0
 34 |         for layer_info in summary_list:
 35 |             self.total_mult_adds += layer_info.macs
 36 |             if not layer_info.is_recursive:
 37 |                 if layer_info.depth == formatting.max_depth or (
 38 |                     not any(layer_info.module.children())
 39 |                     and layer_info.depth < formatting.max_depth
 40 |                 ):
 41 |                     self.total_params += layer_info.num_params
 42 |                     if layer_info.trainable:
 43 |                         self.trainable_params += layer_info.num_params
 44 |                 if layer_info.num_params > 0 and not any(layer_info.module.children()):
 45 |                     # x2 for gradients
 46 |                     self.total_output += 2 * prod(layer_info.output_size)
 47 | 
 48 |     def __repr__(self) -> str:
 49 |         """Print results of the summary."""
 50 |         header_row = self.formatting.format_row("Layer (type:depth-idx)", HEADER_TITLES)
 51 |         layer_rows = self.layers_to_str()
 52 |         divider = "=" * self.formatting.get_total_width()
 53 |         summary_str = (
 54 |             "{0}\n{1}{0}\n{2}{0}"
 55 |             "\nTotal params: {3:,}\n"
 56 |             "Trainable params: {4:,}\n"
 57 |             "Non-trainable params: {5:,}\n".format(
 58 |                 divider,
 59 |                 header_row,
 60 |                 layer_rows,
 61 |                 self.total_params,
 62 |                 self.trainable_params,
 63 |                 self.total_params - self.trainable_params,
 64 |             )
 65 |         )
 66 |         if self.input_size:
 67 |             summary_str += (
 68 |                 "Total mult-adds ({}): {:0.2f}\n"
 69 |                 "{}\n"
 70 |                 "Input size (MB): {:0.2f}\n"
 71 |                 "Forward/backward pass size (MB): {:0.2f}\n"
 72 |                 "Params size (MB): {:0.2f}\n"
 73 |                 "Estimated Total Size (MB): {:0.2f}\n".format(
 74 |                     *self.to_readable(self.total_mult_adds),
 75 |                     divider,
 76 |                     self.to_bytes(self.total_input),
 77 |                     self.to_bytes(self.total_output),
 78 |                     self.to_bytes(self.total_params),
 79 |                     self.to_bytes(
 80 |                         self.total_input + self.total_output + self.total_params
 81 |                     ),
 82 |                 )
 83 |             )
 84 |         summary_str += divider
 85 |         return summary_str
 86 | 
 87 |     @staticmethod
 88 |     def to_bytes(num: int) -> float:
 89 |         """Converts a number (assume floats, 4 bytes each) to megabytes."""
 90 |         return num * 4 / 1e6
 91 | 
 92 |     @staticmethod
 93 |     def to_readable(num: int) -> Tuple[str, float]:
 94 |         """Converts a number to millions, billions, or trillions."""
 95 |         if num >= 1e12:
 96 |             return "T", num / 1e12
 97 |         if num >= 1e9:
 98 |             return "G", num / 1e9
 99 |         return "M", num / 1e6
100 | 
101 |     def layer_info_to_row(
102 |         self, layer_info: LayerInfo, reached_max_depth: bool = False
103 |     ) -> str:
104 |         """Convert layer_info to string representation of a row."""
105 | 
106 |         def get_start_str(depth: int) -> str:
107 |             return "├─" if depth == 1 else "|    " * (depth - 1) + "└─"
108 | 
109 |         row_values = {
110 |             "kernel_size": str(layer_info.kernel_size)
111 |             if layer_info.kernel_size
112 |             else "--",
113 |             "input_size": str(layer_info.input_size),
114 |             "output_size": str(layer_info.output_size),
115 |             "num_params": layer_info.num_params_to_str(reached_max_depth),
116 |             "mult_adds": layer_info.macs_to_str(reached_max_depth),
117 |         }
118 |         depth = layer_info.depth
119 |         name = get_start_str(depth) + str(layer_info)
120 |         new_line = self.formatting.format_row(name, row_values)
121 |         if self.formatting.verbose == Verbosity.VERBOSE.value:
122 |             for inner_name, inner_shape in layer_info.inner_layers.items():
123 |                 prefix = get_start_str(depth + 1)
124 |                 extra_row_values = {"kernel_size": str(inner_shape)}
125 |                 new_line += self.formatting.format_row(
126 |                     prefix + inner_name, extra_row_values
127 |                 )
128 |         return new_line
129 | 
130 |     def layers_to_str(self) -> str:
131 |         """Print each layer of the model using a fancy branching diagram."""
132 |         new_str = ""
133 |         current_hierarchy: Dict[int, LayerInfo] = {}
134 | 
135 |         for layer_info in self.summary_list:
136 |             if layer_info.depth > self.formatting.max_depth:
137 |                 continue
138 | 
139 |             # create full hierarchy of current layer
140 |             hierarchy = {}
141 |             parent = layer_info.parent_info
142 |             while parent is not None and parent.depth > 0:
143 |                 hierarchy[parent.depth] = parent
144 |                 parent = parent.parent_info
145 | 
146 |             # show hierarchy if it is not there already
147 |             for d in range(1, layer_info.depth):
148 |                 if (
149 |                     d not in current_hierarchy
150 |                     or current_hierarchy[d].module is not hierarchy[d].module
151 |                 ):
152 |                     new_str += self.layer_info_to_row(hierarchy[d])
153 |                     current_hierarchy[d] = hierarchy[d]
154 | 
155 |             reached_max_depth = layer_info.depth == self.formatting.max_depth
156 |             new_str += self.layer_info_to_row(layer_info, reached_max_depth)
157 |             current_hierarchy[layer_info.depth] = layer_info
158 | 
159 |             # remove deeper hierarchy
160 |             d = layer_info.depth + 1
161 |             while d in current_hierarchy:
162 |                 current_hierarchy.pop(d)
163 |                 d += 1
164 | 
165 |         return new_str
166 | 


--------------------------------------------------------------------------------
/boda/models/ssd/inference_ssd.py:
--------------------------------------------------------------------------------
  1 | # import torch
  2 | # from torch.autograd import Function
  3 | # from ..box_utils import decode, nms
  4 | # from data import voc as cfg
  5 | # from torchvision.ops import nms
  6 | 
  7 | 
  8 | # # Adapted from https://github.com/Hakuyume/chainer-ssd
  9 | # def decode(loc, priors, variances):
 10 | #     """Decode locations from predictions using priors to undo
 11 | #     the encoding we did for offset regression at train time.
 12 | #     Args:
 13 | #         loc (tensor): location predictions for loc layers,
 14 | #             Shape: [num_priors,4]
 15 | #         priors (tensor): Prior boxes in center-offset form.
 16 | #             Shape: [num_priors,4].
 17 | #         variances: (list[float]) Variances of priorboxes
 18 | #     Return:
 19 | #         decoded bounding box predictions
 20 | #     """
 21 | 
 22 | #     boxes = torch.cat((
 23 | #         priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
 24 | #         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
 25 | #     boxes[:, :2] -= boxes[:, 2:] / 2
 26 | #     boxes[:, 2:] += boxes[:, :2]
 27 | #     return boxes
 28 | 
 29 | 
 30 | # def nms(boxes, scores, overlap=0.5, top_k=200):
 31 | #     """Apply non-maximum suppression at test time to avoid detecting too many
 32 | #     overlapping bounding boxes for a given object.
 33 | #     Args:
 34 | #         boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
 35 | #         scores: (tensor) The class predscores for the img, Shape:[num_priors].
 36 | #         overlap: (float) The overlap thresh for suppressing unnecessary boxes.
 37 | #         top_k: (int) The Maximum number of box preds to consider.
 38 | #     Return:
 39 | #         The indices of the kept boxes with respect to num_priors.
 40 | #     """
 41 | 
 42 | #     keep = scores.new(scores.size(0)).zero_().long()
 43 | #     if boxes.numel() == 0:
 44 | #         return keep
 45 | #     x1 = boxes[:, 0]
 46 | #     y1 = boxes[:, 1]
 47 | #     x2 = boxes[:, 2]
 48 | #     y2 = boxes[:, 3]
 49 | #     area = torch.mul(x2 - x1, y2 - y1)
 50 | #     v, idx = scores.sort(0)  # sort in ascending order
 51 | #     # I = I[v >= 0.01]
 52 | #     idx = idx[-top_k:]  # indices of the top-k largest vals
 53 | #     xx1 = boxes.new()
 54 | #     yy1 = boxes.new()
 55 | #     xx2 = boxes.new()
 56 | #     yy2 = boxes.new()
 57 | #     w = boxes.new()
 58 | #     h = boxes.new()
 59 | 
 60 | #     # keep = torch.Tensor()
 61 | #     count = 0
 62 | #     while idx.numel() > 0:
 63 | #         i = idx[-1]  # index of current largest val
 64 | #         # keep.append(i)
 65 | #         keep[count] = i
 66 | #         count += 1
 67 | #         if idx.size(0) == 1:
 68 | #             break
 69 | #         idx = idx[:-1]  # remove kept element from view
 70 | #         # load bboxes of next highest vals
 71 | #         torch.index_select(x1, 0, idx, out=xx1)
 72 | #         torch.index_select(y1, 0, idx, out=yy1)
 73 | #         torch.index_select(x2, 0, idx, out=xx2)
 74 | #         torch.index_select(y2, 0, idx, out=yy2)
 75 | #         # store element-wise max with next highest score
 76 | #         xx1 = torch.clamp(xx1, min=x1[i])
 77 | #         yy1 = torch.clamp(yy1, min=y1[i])
 78 | #         xx2 = torch.clamp(xx2, max=x2[i])
 79 | #         yy2 = torch.clamp(yy2, max=y2[i])
 80 | #         w.resize_as_(xx2)
 81 | #         h.resize_as_(yy2)
 82 | #         w = xx2 - xx1
 83 | #         h = yy2 - yy1
 84 | #         # check sizes of xx1 and xx2.. after each iteration
 85 | #         w = torch.clamp(w, min=0.0)
 86 | #         h = torch.clamp(h, min=0.0)
 87 | #         inter = w*h
 88 | #         # IoU = i / (area(a) + area(b) - i)
 89 | #         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
 90 | #         union = (rem_areas - inter) + area[i]
 91 | #         IoU = inter/union  # store result in iou
 92 | #         # keep only elements with an IoU <= overlap
 93 | #         idx = idx[IoU.le(overlap)]
 94 | #     return keep, count
 95 | 
 96 | 
 97 | # class Detect(Function):
 98 | #     """At test time, Detect is the final layer of SSD.  Decode location preds,
 99 | #     apply non-maximum suppression to location predictions based on conf
100 | #     scores and threshold to a top_k number of output predictions for both
101 | #     confidence score and locations.
102 | #     """
103 | #     def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh):
104 | #         self.num_classes = num_classes
105 | #         self.background_label = bkg_label
106 | #         self.top_k = top_k
107 | #         # Parameters used in nms.
108 | #         self.nms_thresh = nms_thresh
109 | #         if nms_thresh <= 0:
110 | #             raise ValueError('nms_threshold must be non negative.')
111 | #         self.conf_thresh = conf_thresh
112 | #         self.variance = cfg['variance']
113 | 
114 | #     def forward(self, boxes, scores, prior_boxes):
115 | #         """
116 | #         Args:
117 | #             boxes (:obj:`Tensor`): [B, N, 4]
118 | #             scores (:obj:`Tensor`): [N, C]
119 | #             prior_boxes (:obj:`Tensor`): [N, 4]
120 | 
121 | #             loc_data: (tensor) Loc preds from loc layers
122 | #                 Shape: [batch, num_priors*4]
123 | #             conf_data: (tensor) Shape: Conf preds from conf layers
124 | #                 Shape: [batch*num_priors,num_classes]
125 | #             prior_data: (tensor) Prior boxes and variances from priorbox layers
126 | #                 Shape: [1,num_priors,4]
127 | #         """
128 | #         num = boxes.size(0)  # batch size
129 | #         num_priors = prior_boxes.size(0)
130 | #         output = torch.zeros(num, self.num_classes, self.top_k, 5)
131 | #         conf_preds = scores.view(num, num_priors, self.num_classes).transpose(2, 1)
132 | 
133 | #         # Decode predictions into bboxes.
134 | #         for i in range(num):
135 | #             decoded_boxes = decode(boxes[i], prior_data, self.variance)
136 | #             # For each class, perform nms
137 | #             conf_scores = conf_preds[i].clone()
138 | 
139 | #             for cl in range(1, self.num_classes):
140 | #                 c_mask = conf_scores[cl].gt(self.conf_thresh)
141 | #                 scores = conf_scores[cl][c_mask]
142 | 
143 | #                 score_mask = scores[i].gt(0.05)
144 | 
145 | #                 score = scores[score_mask]
146 | #                 index = index[score_mask]
147 | 
148 | #                 if scores.size(0) == 0:
149 | #                     continue
150 | 
151 | #                 l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
152 | #                 boxes = decoded_boxes[l_mask].view(-1, 4)
153 | #                 # idx of highest scoring and non-overlapping boxes per class
154 | #                 # ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
155 | #                 # boxes (Tensor[N, 4])) – boxes to perform NMS on. They are expected to be in (x1, y1, x2, y2) format
156 | #                 # scores (Tensor[N]) – scores for each one of the boxes
157 | #                 # iou_threshold (float) – discards all overlapping boxes with IoU > iou_threshold
158 | #                 keep = nms(boxes, scores, self.nms_thresh)
159 | #                 output[i, cl, :count] = \
160 | #                     torch.cat((scores[ids[:count]].unsqueeze(1),
161 | #                                boxes[ids[:count]]), 1)
162 | 
163 | #         flt = output.contiguous().view(num, -1, 5)
164 | #         _, idx = flt[:, :, 0].sort(1, descending=True)
165 | #         _, rank = idx.sort(1)
166 | #         flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
167 | #         return output
168 | 


--------------------------------------------------------------------------------
/boda/models/ssd/loss_ssd.py:
--------------------------------------------------------------------------------
  1 | # from typing import Tuple, List, Dict
  2 | 
  3 | # import torch
  4 | # from torch import nn, Tensor
  5 | # import torch.nn.functional as F
  6 | 
  7 | # from ...base_architecture import LossFunction
  8 | # from ...ops.box import jaccard, cxywh_to_xyxy
  9 | # from ...ops.loss import log_sum_exp
 10 | 
 11 | 
 12 | # class Matcher:
 13 | #     """Matcher for SSD
 14 | 
 15 | #     Arguments:
 16 | #         threshold (float):
 17 | #         variances (List[float]):
 18 | #     """
 19 | #     def __init__(
 20 | #         self,
 21 | #         threshold: float = 0.5,
 22 | #         variances: List[float] = [0.1, 0.2]
 23 | #     ) -> None:
 24 | #         self.threshold = threshold
 25 | #         self.variances = variances
 26 | 
 27 | #     def __call__(
 28 | #         self,
 29 | #         pred_boxes,
 30 | #         pred_scores,
 31 | #         pred_priors,
 32 | #         true_boxes,
 33 | #     ) -> Tuple[Tensor]:
 34 | #         """
 35 | #         Arguments:
 36 | #             pred_boxes (Tensor): Size([N, ])
 37 | #             pred_priors (Tensor): default boxes Size([N, 4])
 38 | #             true_boxes (Tensor): ground truth of bounding boxes Size([N, 4])
 39 | 
 40 | #         Returns:
 41 | #             matched_boxes (Tensor): Size([num_priors, 4])
 42 | #             matched_scores (Tensor): Size([num_priors])
 43 | #         """
 44 | #         overlaps = jaccard(
 45 | #             true_boxes, cxcywh_to_xyxy(pred_priors))
 46 | 
 47 | #         # Best prior for each ground truth
 48 | #         best_prior_overlaps, best_prior_indexes = overlaps.max(1, keepdim=True)
 49 | #         best_prior_indexes.squeeze_(1)
 50 | #         best_prior_overlaps.squeeze_(1)
 51 | 
 52 | #         # Best ground truth for each prior boxes (default boxes)
 53 | #         best_truth_overlaps, best_truth_indexes = overlaps.max(0, keepdim=True)
 54 | #         best_truth_indexes.squeeze_(0)
 55 | #         best_truth_overlaps.squeeze_(0)
 56 | #         best_truth_overlaps.index_fill_(0, best_prior_indexes, 2)
 57 | 
 58 | #         # TODO refactor: index  best_prior_idx with long tensor
 59 | #         # Ensure every gt matches with its prior of max overlap
 60 | #         for j in range(best_prior_indexes.size(0)):
 61 | #             best_truth_indexes[best_prior_indexes[j]] = j
 62 | 
 63 | #         matched_boxes = true_boxes[best_truth_indexes]  # Size([N, 4])
 64 | #         matched_scores = pred_scores[best_truth_indexes] + 1  # Size([N])
 65 | #         matched_scores[best_truth_overlaps < self.threshold] = 0  # Size([])
 66 | #         matched_boxes = self.encode(matched_boxes, pred_priors)
 67 | 
 68 | #         return matched_boxes, matched_scores
 69 | 
 70 | #     def encode(self, matched_boxes, pred_priors):
 71 | #         """
 72 | #         Return:
 73 | #             (Tensor): Size([num_priors, 4])
 74 | #         """
 75 | #         gcxcy = (matched_boxes[:, :2] + matched_boxes[:, 2:])/2 - pred_priors[:, :2]
 76 | #         gcxcy /= (self.variances[0] * pred_priors[:, 2:])
 77 | #         gwh = (matched_boxes[:, 2:] - matched_boxes[:, :2]) / pred_priors[:, 2:]
 78 | #         gwh = torch.log(gwh) / self.variances[1]
 79 | #         return torch.cat([gcxcy, gwh], dim=1)
 80 | 
 81 | #     def decode(self, pred_boxes, pred_priors):
 82 | #         boxes = torch.cat((
 83 | #             pred_priors[:, :2] + pred_boxes[:, :2] * self.variances[0] * pred_priors[:, 2:],
 84 | #             pred_priors[:, 2:] * torch.exp(pred_boxes[:, 2:] * self.variances[1])), dim=1)
 85 | #         boxes[:, :2] -= boxes[:, 2:] / 2
 86 | #         boxes[:, 2:] += boxes[:, :2]
 87 | #         return boxes
 88 | 
 89 | 
 90 | # class SsdLoss(LossFunction):
 91 | #     def __init__(
 92 | #         self,
 93 | #         size,
 94 | #         overlap_thresh,
 95 | #         prior_for_matching,
 96 | #         bkg_label,
 97 | #         neg_mining,
 98 | #         neg_pos,
 99 | #         neg_overlap,
100 | #         encode_target,
101 | #         variances: List[float] = [0.1, 0.2]
102 | #     ) -> None:
103 | #         super().__init__()
104 | #         self.num_classes = config.num_classes + 1
105 | #         self.variances = variances
106 | #         self.threshold = overlap_thresh
107 | #         self.background_label = bkg_label
108 | #         self.encode_target = encode_target
109 | #         self.use_prior_for_matching = prior_for_matching
110 | #         self.do_neg_mining = neg_mining
111 | #         self.negpos_ratio = neg_pos
112 | #         self.neg_overlap = neg_overlap
113 | 
114 | #     def forward(self, inputs, targets):
115 | #         """
116 | #         """
117 | #         self.check_targets(targets)
118 | #         targets = self.copy_targets(targets)
119 | 
120 | #         pred_boxes = inputs['boxes']
121 | #         num_boxes = pred_boxes.size(0)
122 | #         pred_scores = inputs['scores']
123 | #         pred_priors = inputs['priors']
124 | #         pred_priors = pred_priors[:pred_boxes.size(1), :]
125 | 
126 | #         batch_size = len(targets)
127 | #         num_priors = pred_priors.size(0)
128 | 
129 | #         # match priors (default boxes) and ground truth boxes
130 | #         matched_true_boxes = pred_boxes.new_tensor(batch_size, num_priors, 4)
131 | #         matched_true_scores = pred_boxes.new_tensor(batch_size, num_priors, dtype=torch.int64)
132 | 
133 | #         for i, target in enumerate(targets):
134 | #             true_boxes = target['boxes']
135 | #             true_labels = target['labels']
136 | #             matched_boxes, matched_scores = Matcher(self.threshold)(
137 | #                 pred_boxes, pred_priors, true_boxes, true_labels)
138 | 
139 | #             matched_true_boxes[i] = matched_boxes
140 | #             matched_true_scores[i] = matched_scores
141 | 
142 | #         matched_true_boxes.requires_grad = False
143 | #         matched_true_scores.requires_grad = False
144 | 
145 | #         # TODO: positive_scores or pos_scores
146 | #         pos = matched_true_scores > 0
147 | #         num_pred_scores = pos.sum(dim=1, keepdim=True)
148 | 
149 | #         pos_indexes = pos.unsqueeze(pos.dim()).expand_as(pred_boxes)
150 | #         matched_pred_boxes = pred_boxes[pos_indexes].view(-1, 4)
151 | #         matched_true_boxes = matched_true_boxes[pos_indexes].view(-1, 4)
152 | 
153 | #         loss_box = F.smooth_l1_loss(
154 | #             matched_pred_boxes, matched_true_boxes, size_average=False)
155 | 
156 | #         # Compute hard negative mining
157 | #         pred_scores = pred_scores.view(-1, self.num_classes)
158 | #         loss_score = log_sum_exp(pred_scores) - pred_scores.gather(1, matched_true_scores.view(-1, 1))
159 | 
160 | #         # Hard negative mining
161 | #         loss_score[pos] = 0
162 | #         loss_score = loss_score.view(num_boxes, -1)
163 | 
164 | #         _, loss_index = loss_score.sort(1, descending=True)
165 | #         _, rank_index = loss_index.sort(1)
166 | 
167 | #         num_pos = pos.long().sum(1, keepdim=True)
168 | #         num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
169 | #         neg = rank_index < num_neg.expand_as(rank_index)
170 | 
171 | #         # Confidence loss including positive and negative samples
172 | #         pos_index = pos.unsqueeze(2).expand_as(pred_scores)
173 | #         neg_index = neg.unsqueeze(2).expand_as(pred_scores)
174 | 
175 | #         pred_scores = pred_boxes[(pos_index + neg_index).gt(0)].view(-1, self.num_classes)
176 | #         weighted_targets = matched_true_scores[(pos+neg).gt(0)]
177 | #         loss_score = F.cross_entropy(pred_scores, weighted_targets, size_average=False)
178 | 
179 | #         losses = {
180 | #             'loss_bbox': None,
181 | #             'loss_conf': None,
182 | #         }
183 | 
184 | #         return losses
185 | 


--------------------------------------------------------------------------------
/boda/models/yolact/README.md:
--------------------------------------------------------------------------------
  1 | # YOLACT (You Only Look At CoefficienTs)
  2 | 
  3 | ```
  4 | ██╗   ██╗ ██████╗ ██╗      █████╗  ██████╗████████╗
  5 | ╚██╗ ██╔╝██╔═══██╗██║     ██╔══██╗██╔════╝╚══██╔══╝
  6 |  ╚████╔╝ ██║   ██║██║     ███████║██║        ██║
  7 |   ╚██╔╝  ██║   ██║██║     ██╔══██║██║        ██║
  8 |    ██║   ╚██████╔╝███████╗██║  ██║╚██████╗   ██║
  9 |    ╚═╝    ╚═════╝ ╚══════╝╚═╝  ╚═╝ ╚═════╝   ╚═╝
 10 | ```
 11 | 
 12 | ## YOLACT Architecture
 13 | 
 14 | ```{bash}
 15 | ==============================================================================
 16 | Layer (type:depth-idx)                   Output Shape              Param #
 17 | ==============================================================================
 18 | ├─ResNet: 1-1                            [-1, 256, 138, 138]               --
 19 | |    └─Conv2d: 2-1                       [-1, 64, 275, 275]             9,408
 20 | |    └─BatchNorm2d: 2-2                  [-1, 64, 275, 275]               128
 21 | |    └─ReLU: 2-3                         [-1, 64, 275, 275]                --
 22 | |    └─MaxPool2d: 2-4                    [-1, 64, 138, 138]                --
 23 | |    └─ModuleList: 2                     --                                --
 24 | |    |    └─Sequential: 3-1              [-1, 256, 138, 138]          215,808
 25 | |    |    └─Sequential: 3-2              [-1, 512, 69, 69]          1,219,584
 26 | |    |    └─Sequential: 3-3              [-1, 1024, 35, 35]        26,090,496
 27 | |    |    └─Sequential: 3-4              [-1, 2048, 18, 18]        14,964,736
 28 | ├─YolactPredictNeck: 1-2                 [-1, 256, 69, 69]                 --
 29 | |    └─ModuleList: 2                     --                                --
 30 | |    |    └─Conv2d: 3-5                  [-1, 256, 18, 18]            524,544
 31 | |    |    └─Conv2d: 3-6                  [-1, 256, 35, 35]            262,400
 32 | |    |    └─Conv2d: 3-7                  [-1, 256, 69, 69]            131,328
 33 | |    └─ModuleList: 2                     --                                --
 34 | |    |    └─Conv2d: 3-8                  [-1, 256, 18, 18]            590,080
 35 | |    |    └─Conv2d: 3-9                  [-1, 256, 35, 35]            590,080
 36 | |    |    └─Conv2d: 3-10                 [-1, 256, 69, 69]            590,080
 37 | |    └─ModuleList: 2                     --                                --
 38 | |    |    └─Conv2d: 3-11                 [-1, 256, 9, 9]              590,080
 39 | |    |    └─Conv2d: 3-12                 [-1, 256, 5, 5]              590,080
 40 | ├─YolactPredictHead: 1                   --                                --
 41 | |    └─HeadBranch: 2-5                   [[-1, 4]]                         --
 42 | |    |    └─Conv2d: 3-13                 [-1, 256, 69, 69]            590,080
 43 | |    |    └─Sequential: 3-14             [-1, 12, 69, 69]              27,660
 44 | |    |    └─Sequential: 3-15             [-1, 96, 69, 69]             221,280
 45 | |    |    └─Sequential: 3-16             [-1, 243, 69, 69]            560,115
 46 | |    └─HeadBranch: 2-6                   [[-1, 4]]                         --
 47 | |    └─HeadBranch: 2                     --                                --
 48 | |    |    └─Conv2d: 3-17                 [-1, 256, 35, 35]         (recursive)
 49 | |    |    └─Sequential: 3-18             [-1, 12, 35, 35]          (recursive)
 50 | |    |    └─Sequential: 3-19             [-1, 96, 35, 35]          (recursive)
 51 | |    |    └─Sequential: 3-20             [-1, 243, 35, 35]         (recursive)
 52 | |    └─HeadBranch: 2-7                   [[-1, 4]]                         --
 53 | |    └─HeadBranch: 2                     --                                --
 54 | |    |    └─Conv2d: 3-21                 [-1, 256, 18, 18]         (recursive)
 55 | |    |    └─Sequential: 3-22             [-1, 12, 18, 18]          (recursive)
 56 | |    |    └─Sequential: 3-23             [-1, 96, 18, 18]          (recursive)
 57 | |    |    └─Sequential: 3-24             [-1, 243, 18, 18]         (recursive)
 58 | ├─ProtoNet: 1-3                          [-1, 32, 138, 138]                --
 59 | |    └─Conv2d: 2-8                       [-1, 256, 69, 69]            590,080
 60 | |    └─Conv2d: 2-9                       [-1, 256, 69, 69]            590,080
 61 | |    └─Conv2d: 2-10                      [-1, 256, 69, 69]            590,080
 62 | |    └─Upsample: 2-11                    [-1, 256, 138, 138]               --
 63 | |    └─Conv2d: 2-12                      [-1, 256, 138, 138]          590,080
 64 | |    └─Conv2d: 2-13                      [-1, 32, 138, 138]             8,224
 65 | ├─SemanticSegmentation: 1-4              [-1, 80, 69, 69]                  --
 66 | |    └─Conv2d: 2-14                      [-1, 80, 69, 69]              20,560
 67 | ==============================================================================
 68 | Total params: 50,157,071
 69 | Trainable params: 50,157,071
 70 | Non-trainable params: 0
 71 | Total mult-adds (G): 34.48
 72 | ==============================================================================
 73 | Input size (MB): 3.46
 74 | Forward/backward pass size (MB): 193.40
 75 | Params size (MB): 191.33
 76 | Estimated Total Size (MB): 388.20
 77 | ==============================================================================
 78 | ```
 79 | 
 80 | ```{python}
 81 | class CocoDataset(Dataset):
 82 |     def __getitem__(self, index: int) -> Tuple[Tensor, Dict]:
 83 |         """
 84 |         Returns:
 85 |             image (Tensor[C, H, W]): Original size
 86 |             targets (Dict[str, Any]): 
 87 |         """
 88 |         return image, {
 89 |             'boxes': FloatTensor[N, 4]: [x1, y1, x2, y2],
 90 |             'labels': LongTensor[N],
 91 |             'masks': ByteTensor[N, H, W],
 92 |             'keypoints' FloatTensor[N, K, 3]: [x, y, visibility],
 93 |             'area': float,
 94 |             'iscrowd': 0 or 1,
 95 |             'width': int,  # width of an original image
 96 |             'height': int,  # height of an original image
 97 |         }
 98 | ```
 99 | 
100 | ```{python}
101 | from boda.models import YolactConfig, YolactModel, YolactLoss
102 | 
103 | config = YolactConfig(num_classes=80)
104 | model = YolactModel(config).to('cuda')
105 | criterion = YolactLoss()
106 | 
107 | for epoch in range(num_epochs):
108 |     for images, targets in train_loader:
109 |         outputs = model(images)
110 |         losses = criterion(outputs, targets)
111 |         loss = sum(loss for loss in losses.values())
112 | ```
113 | 
114 | ```{python}
115 | class YolacModel:
116 |     def forward(self, images):
117 |         if self.training:
118 |             # 전처리가 끝난 outputs?
119 |             return {
120 |                 'boxes': FloatTensor,
121 |                 'masks: Tensor
122 |                 'scores': FloatTensor,
123 |                 'prior_boxes': 'anchors' ???
124 |                 'proposals'??
125 |                 'proto_masks':??
126 |                 'semantic_masks':??
127 |             }
128 |         else:
129 |             # 전처리가 끝난 outputs
130 |             return {
131 |                 'boxes': Tensor,
132 |                 'masks': 
133 |                 'scores': Tensor,
134 |                 'labels': Tensor,
135 |                 'keypoints': Tensor,
136 |             }
137 | ```
138 | 
139 | 
140 | ```{python}
141 | outputs = model(images)
142 | outputs
143 | 
144 | # SSD
145 | {'boxes', 'scores', 'prior_boxes'}
146 | 
147 | # Faster R-CNN
148 | {'boxes', 'proposals', 'scores', 'anchors'}
149 | 
150 | # Keypoint R-CNN
151 | {'boxes', 'proposals', 'scores', 'keypoints'}
152 | 
153 | # YOLACT
154 | {'boxes', 'masks', 'scores', 'prior_boxes', 'proto_masks', 'semantic_masks'}
155 | 
156 | # SOLO
157 | {'category', 'masks'}
158 | 
159 | # CenterMask
160 | ```
161 | 
162 | 


--------------------------------------------------------------------------------
/boda/lib/torchsummary/model_statistics.py:
--------------------------------------------------------------------------------
  1 | """ model_statistics.py """
  2 | from typing import Any, Dict, Iterable, List, Union
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | from .formatting import FormattingOptions, Verbosity
  8 | from .layer_info import LayerInfo
  9 | 
 10 | HEADER_TITLES = {
 11 |     "kernel_size": "Kernel Shape",
 12 |     "input_size": "Input Shape",
 13 |     "output_size": "Output Shape",
 14 |     "num_params": "Param #",
 15 |     "mult_adds": "Mult-Adds",
 16 | }
 17 | CORRECTED_INPUT_SIZE_TYPE = List[Union[Iterable[Any], torch.Size]]
 18 | 
 19 | 
 20 | class ModelStatistics:
 21 |     """Class for storing results of the summary."""
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         summary_list: List[LayerInfo],
 26 |         input_size: CORRECTED_INPUT_SIZE_TYPE,
 27 |         formatting: FormattingOptions,
 28 |     ):
 29 |         self.summary_list = summary_list
 30 |         self.input_size = input_size
 31 |         self.total_input = (
 32 |             sum(abs(np.prod(sz)) for sz in input_size) if input_size else 0
 33 |         )
 34 |         self.formatting = formatting
 35 |         self.total_params, self.trainable_params = 0, 0
 36 |         self.total_output, self.total_mult_adds = 0, 0
 37 |         for layer_info in summary_list:
 38 |             self.total_mult_adds += layer_info.macs
 39 |             if not layer_info.is_recursive:
 40 |                 if layer_info.depth == formatting.max_depth or (
 41 |                     not any(layer_info.module.children())
 42 |                     and layer_info.depth < formatting.max_depth
 43 |                 ):
 44 |                     self.total_params += layer_info.num_params
 45 |                     if layer_info.trainable:
 46 |                         self.trainable_params += layer_info.num_params
 47 |                 if layer_info.num_params > 0 and not any(layer_info.module.children()):
 48 |                     # x2 for gradients
 49 |                     self.total_output += 2.0 * abs(np.prod(layer_info.output_size))
 50 | 
 51 |     def __repr__(self) -> str:
 52 |         """Print results of the summary."""
 53 |         header_row = self.formatting.format_row("Layer (type:depth-idx)", HEADER_TITLES)
 54 |         layer_rows = self.layers_to_str()
 55 |         divider = "=" * self.formatting.get_total_width()
 56 |         summary_str = (
 57 |             "{0}\n{1}{0}\n{2}{0}"
 58 |             "\nTotal params: {3:,}\n"
 59 |             "Trainable params: {4:,}\n"
 60 |             "Non-trainable params: {5:,}\n".format(
 61 |                 divider,
 62 |                 header_row,
 63 |                 layer_rows,
 64 |                 self.total_params,
 65 |                 self.trainable_params,
 66 |                 self.total_params - self.trainable_params,
 67 |             )
 68 |         )
 69 |         if self.input_size:
 70 |             summary_str += (
 71 |                 "Total mult-adds ({}): {:0.2f}\n"
 72 |                 "{}\n"
 73 |                 "Input size (MB): {:0.2f}\n"
 74 |                 "Forward/backward pass size (MB): {:0.2f}\n"
 75 |                 "Params size (MB): {:0.2f}\n"
 76 |                 "Estimated Total Size (MB): {:0.2f}\n".format(
 77 |                     "G" if self.total_mult_adds >= 1e9 else "M",
 78 |                     self.to_readable(self.total_mult_adds),
 79 |                     divider,
 80 |                     self.to_bytes(self.total_input),
 81 |                     self.to_bytes(self.total_output),
 82 |                     self.to_bytes(self.total_params),
 83 |                     self.to_bytes(
 84 |                         self.total_input + self.total_output + self.total_params
 85 |                     ),
 86 |                 )
 87 |             )
 88 |         summary_str += divider
 89 |         return summary_str
 90 | 
 91 |     @staticmethod
 92 |     def to_bytes(num: int) -> float:
 93 |         """Converts a number (assume floats, 4 bytes each) to megabytes."""
 94 |         return num * 4 / (1024 ** 2)
 95 | 
 96 |     @staticmethod
 97 |     def to_readable(num: int) -> float:
 98 |         """Converts a number to millions or billions."""
 99 |         if num >= 1e9:
100 |             return num / 1e9
101 |         return num / 1e6
102 | 
103 |     def layer_info_to_row(
104 |         self, layer_info: LayerInfo, reached_max_depth: bool = False
105 |     ) -> str:
106 |         """Convert layer_info to string representation of a row."""
107 | 
108 |         def get_start_str(depth: int) -> str:
109 |             return "├─" if depth == 1 else "|    " * (depth - 1) + "└─"
110 | 
111 |         row_values = {
112 |             "kernel_size": str(layer_info.kernel_size)
113 |             if layer_info.kernel_size
114 |             else "--",
115 |             "input_size": str(layer_info.input_size),
116 |             "output_size": str(layer_info.output_size),
117 |             "num_params": layer_info.num_params_to_str(reached_max_depth),
118 |             "mult_adds": layer_info.macs_to_str(reached_max_depth),
119 |         }
120 |         depth = layer_info.depth
121 |         name = (get_start_str(depth) if self.formatting.use_branching else "") + str(
122 |             layer_info
123 |         )
124 |         new_line = self.formatting.format_row(name, row_values)
125 |         if self.formatting.verbose == Verbosity.VERBOSE.value:
126 |             for inner_name, inner_shape in layer_info.inner_layers.items():
127 |                 prefix = (
128 |                     get_start_str(depth + 1) if self.formatting.use_branching else "  "
129 |                 )
130 |                 extra_row_values = {"kernel_size": str(inner_shape)}
131 |                 new_line += self.formatting.format_row(
132 |                     prefix + inner_name, extra_row_values
133 |                 )
134 |         return new_line
135 | 
136 |     def layers_to_str(self) -> str:
137 |         """Print each layer of the model as tree or as a list."""
138 |         if self.formatting.use_branching:
139 |             return self._layer_tree_to_str()
140 | 
141 |         layer_rows = ""
142 |         for layer_info in self.summary_list:
143 |             layer_rows += self.layer_info_to_row(layer_info)
144 |         return layer_rows
145 | 
146 |     def _layer_tree_to_str(self) -> str:
147 |         """Print each layer of the model using a fancy branching diagram."""
148 |         new_str = ""
149 |         current_hierarchy: Dict[int, LayerInfo] = {}
150 | 
151 |         for layer_info in self.summary_list:
152 |             if layer_info.depth > self.formatting.max_depth:
153 |                 continue
154 | 
155 |             # create full hierarchy of current layer
156 |             hierarchy = {}
157 |             parent = layer_info.parent_info
158 |             while parent is not None and parent.depth > 0:
159 |                 hierarchy[parent.depth] = parent
160 |                 parent = parent.parent_info
161 | 
162 |             # show hierarchy if it is not there already
163 |             for d in range(1, layer_info.depth):
164 |                 if (
165 |                     d not in current_hierarchy
166 |                     or current_hierarchy[d].module is not hierarchy[d].module
167 |                 ):
168 |                     new_str += self.layer_info_to_row(hierarchy[d])
169 |                     current_hierarchy[d] = hierarchy[d]
170 | 
171 |             reached_max_depth = layer_info.depth == self.formatting.max_depth
172 |             new_str += self.layer_info_to_row(layer_info, reached_max_depth)
173 |             current_hierarchy[layer_info.depth] = layer_info
174 | 
175 |             # remove deeper hierarchy
176 |             d = layer_info.depth + 1
177 |             while d in current_hierarchy:
178 |                 current_hierarchy.pop(d)
179 |                 d += 1
180 | 
181 |         return new_str
182 | 


--------------------------------------------------------------------------------
/boda/custom_modules.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import math
  3 | import re
  4 | from functools import partial
  5 | 
  6 | import torch
  7 | from torch import nn
  8 | from torch.nn import functional as F
  9 | 
 10 | 
 11 | class Conv2dDynamicSamePadding(nn.Conv2d):
 12 |     """
 13 |     Adapted from:
 14 |     https://github.com/lukemelas/EfficientNet-PyTorch
 15 |     https://github.com/rwightman/pytorch-image-models
 16 | 
 17 |     2D Convolutions like TensorFlow, for a dynamic image size.
 18 |     The padding is operated in forward function by calculating dynamically.
 19 | 
 20 |     Tips for 'SAME' mode padding.
 21 |         Given the following:
 22 |             i: width or height
 23 |             s: stride
 24 |             k: kernel size
 25 |             d: dilation
 26 |             p: padding
 27 |         Output after Conv2d:
 28 |             o = floor((i+p-((k-1)*d+1))/s+1)
 29 |     If o equals i, i = floor((i+p-((k-1)*d+1))/s+1),
 30 |     => p = (i-1)*s+((k-1)*d+1)-i
 31 |     """
 32 | 
 33 |     def __init__(
 34 |         self,
 35 |         in_channels: int,
 36 |         out_channels: int,
 37 |         kernel_size: int,
 38 |         stride: int = 1,
 39 |         dilation: int = 1,
 40 |         groups: int = 1,
 41 |         bias: bool = True,
 42 |     ) -> None:
 43 |         super().__init__(
 44 |             in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias
 45 |         )
 46 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
 47 | 
 48 |     def forward(self, x):
 49 |         ih, iw = x.size()[-2:]
 50 |         kh, kw = self.weight.size()[-2:]
 51 |         sh, sw = self.stride
 52 |         oh, ow = math.ceil(ih / sh), math.ceil(
 53 |             iw / sw
 54 |         )  # change the output size according to stride
 55 | 
 56 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
 57 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
 58 |         if pad_h > 0 or pad_w > 0:
 59 |             x = F.pad(
 60 |                 x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]
 61 |             )
 62 | 
 63 |         return F.conv2d(
 64 |             x,
 65 |             self.weight,
 66 |             self.bias,
 67 |             self.stride,
 68 |             self.padding,
 69 |             self.dilation,
 70 |             self.groups,
 71 |         )
 72 | 
 73 | 
 74 | class Conv2dStaticSamePadding(nn.Conv2d):
 75 |     """
 76 |     2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size.
 77 |     The padding mudule is calculated in construction function, then used in forward.
 78 |     """
 79 | 
 80 |     def __init__(
 81 |         self,
 82 |         in_channels,
 83 |         out_channels,
 84 |         kernel_size,
 85 |         stride=1,
 86 |         image_size=None,
 87 |         **kwargs
 88 |     ) -> None:
 89 |         super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs)
 90 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
 91 |         assert image_size is not None
 92 | 
 93 |         ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
 94 |         kh, kw = self.weight.size()[-2:]
 95 |         sh, sw = self.stride
 96 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
 97 | 
 98 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
 99 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
100 |         if pad_h > 0 or pad_w > 0:
101 |             self.static_padding = nn.ZeroPad2d(
102 |                 (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)
103 |             )
104 |         else:
105 |             self.static_padding = nn.Identity()
106 | 
107 |     def forward(self, x):
108 |         x = self.static_padding(x)
109 |         x = F.conv2d(
110 |             x,
111 |             self.weight,
112 |             self.bias,
113 |             self.stride,
114 |             self.padding,
115 |             self.dilation,
116 |             self.groups,
117 |         )
118 |         return x
119 | 
120 | 
121 | class MaxPool2dDynamicSamePadding(nn.MaxPool2d):
122 |     """
123 |     2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size.
124 |     The padding is operated in forward function by calculating dynamically.
125 |     """
126 | 
127 |     def __init__(
128 |         self,
129 |         kernel_size,
130 |         stride,
131 |         padding=0,
132 |         dilation=1,
133 |         return_indices=False,
134 |         ceil_mode=False,
135 |     ) -> None:
136 |         super().__init__(
137 |             kernel_size, stride, padding, dilation, return_indices, ceil_mode
138 |         )
139 |         self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
140 |         self.kernel_size = (
141 |             [self.kernel_size] * 2
142 |             if isinstance(self.kernel_size, int)
143 |             else self.kernel_size
144 |         )
145 |         self.dilation = (
146 |             [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
147 |         )
148 | 
149 |     def forward(self, x):
150 |         ih, iw = x.size()[-2:]
151 |         kh, kw = self.kernel_size
152 |         sh, sw = self.stride
153 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
154 | 
155 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
156 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
157 |         if pad_h > 0 or pad_w > 0:
158 |             x = F.pad(
159 |                 x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]
160 |             )
161 | 
162 |         return F.max_pool2d(
163 |             x,
164 |             self.kernel_size,
165 |             self.stride,
166 |             self.padding,
167 |             self.dilation,
168 |             self.ceil_mode,
169 |             self.return_indices,
170 |         )
171 | 
172 | 
173 | class MaxPool2dStaticSamePadding(nn.MaxPool2d):
174 |     """
175 |     2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size.
176 |     The padding mudule is calculated in construction function, then used in forward.
177 |     """
178 | 
179 |     def __init__(self, kernel_size, stride, image_size=None, **kwargs) -> None:
180 |         super().__init__(kernel_size, stride, **kwargs)
181 |         self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
182 |         self.kernel_size = (
183 |             [self.kernel_size] * 2
184 |             if isinstance(self.kernel_size, int)
185 |             else self.kernel_size
186 |         )
187 |         self.dilation = (
188 |             [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
189 |         )
190 |         assert image_size is not None
191 | 
192 |         ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
193 |         kh, kw = self.kernel_size
194 |         sh, sw = self.stride
195 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
196 | 
197 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
198 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
199 |         if pad_h > 0 or pad_w > 0:
200 |             self.static_padding = nn.ZeroPad2d(
201 |                 (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)
202 |             )
203 |         else:
204 |             self.static_padding = nn.Identity()
205 | 
206 |     def forward(self, x):
207 |         x = self.static_padding(x)
208 |         x = F.max_pool2d(
209 |             x,
210 |             self.kernel_size,
211 |             self.stride,
212 |             self.padding,
213 |             self.dilation,
214 |             self.ceil_mode,
215 |             self.return_indices,
216 |         )
217 |         return x
218 | 


--------------------------------------------------------------------------------
/boda/models/feature_extractor/resnet.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple, List, Optional, Callable
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | from torch import nn, Tensor
  6 | 
  7 | 
  8 | # TODO: BACKBONE_ARCHIVE_MAP or _MAPS? or ARCHIVES?
  9 | BACKBONE_ARCHIVE_MAP = {
 10 |     "resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth",
 11 |     "resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth",
 12 |     "resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth",
 13 |     "resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth",
 14 | }
 15 | 
 16 | 
 17 | class Conv2d1x1(nn.Sequential):
 18 |     """1x1 convolution"""
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         in_planes: int,
 23 |         out_planes: int,
 24 |         stride: int = 1,
 25 |     ) -> None:
 26 |         super().__init__(
 27 |             nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 28 |         )
 29 | 
 30 | 
 31 | class Conv2d3x3(nn.Sequential):
 32 |     """3x3 convolution with padding"""
 33 | 
 34 |     def __init__(
 35 |         self,
 36 |         in_planes: int,
 37 |         out_planes: int,
 38 |         stride: int = 1,
 39 |         groups: int = 1,
 40 |         dilation: int = 1,
 41 |     ) -> None:
 42 |         super().__init__(
 43 |             nn.Conv2d(
 44 |                 in_planes,
 45 |                 out_planes,
 46 |                 kernel_size=3,
 47 |                 stride=stride,
 48 |                 padding=dilation,
 49 |                 groups=groups,
 50 |                 bias=False,
 51 |                 dilation=dilation,
 52 |             )
 53 |         )
 54 | 
 55 | 
 56 | class BasicBlock(nn.Module):
 57 |     expansion: int = 1
 58 | 
 59 |     def __init__(
 60 |         self,
 61 |         inplanes: int,
 62 |         planes: int,
 63 |         stride: int = 1,
 64 |         downsample: Optional[nn.Module] = None,
 65 |         groups: int = 1,
 66 |         base_width: int = 64,
 67 |         dilation: int = 1,
 68 |         norm_layer: Optional[Callable[..., nn.Module]] = None,
 69 |     ) -> None:
 70 |         super().__init__()
 71 |         if norm_layer is None:
 72 |             norm_layer = nn.BatchNorm2d  # (track_running_stats=False)
 73 | 
 74 |         if groups != 1 or base_width != 64:
 75 |             raise ValueError("BasicBlock only supports groups=1 and base_width=64")
 76 |         if dilation > 1:
 77 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
 78 | 
 79 |         self.conv1 = Conv2d3x3(inplanes, planes, stride)
 80 |         self.bn1 = norm_layer(planes)
 81 |         self.relu = nn.ReLU(inplace=True)
 82 |         self.conv2 = Conv2d3x3(planes, planes)
 83 |         self.bn2 = norm_layer(planes)
 84 |         self.downsample = downsample
 85 |         self.stride = stride
 86 | 
 87 |     def forward(self, x: Tensor) -> Tensor:
 88 |         identity = x
 89 | 
 90 |         out = self.conv1(x)
 91 |         out = self.bn1(out)
 92 |         out = self.relu(out)
 93 | 
 94 |         out = self.conv2(out)
 95 |         out = self.bn2(out)
 96 | 
 97 |         if self.downsample is not None:
 98 |             identity = self.downsample(x)
 99 | 
100 |         out += identity
101 |         out = self.relu(out)
102 | 
103 |         return out
104 | 
105 | 
106 | class Bottleneck(nn.Module):
107 |     expansion = 4
108 | 
109 |     def __init__(
110 |         self,
111 |         in_planes: int,
112 |         planes: int,
113 |         stride: int = 1,
114 |         downsample: Optional[nn.Module] = None,
115 |         norm_layer: Optional[Callable[..., nn.Module]] = None,
116 |     ) -> None:
117 |         super().__init__()
118 |         if norm_layer is None:
119 |             norm_layer = nn.BatchNorm2d
120 | 
121 |         self.conv1 = Conv2d1x1(in_planes, planes)
122 |         self.bn1 = norm_layer(planes)
123 | 
124 |         self.conv2 = Conv2d3x3(planes, planes, stride=stride)
125 |         self.bn2 = norm_layer(planes)
126 | 
127 |         self.conv3 = Conv2d1x1(planes, planes * self.expansion)
128 |         self.bn3 = norm_layer(planes * self.expansion)
129 | 
130 |         self.downsample = downsample
131 |         self.stride = stride
132 | 
133 |     def forward(self, inputs) -> Tensor:
134 |         residual = inputs
135 | 
136 |         outputs = F.relu(self.bn1(self.conv1(inputs)), inplace=True)
137 |         outputs = F.relu(self.bn2(self.conv2(outputs)), inplace=True)
138 |         outputs = self.bn3(self.conv3(outputs))
139 | 
140 |         if self.downsample is not None:
141 |             residual = self.downsample(inputs)
142 | 
143 |         outputs += residual
144 |         outputs = F.relu(outputs, inplace=True)
145 | 
146 |         return outputs
147 | 
148 | 
149 | class ResNet(nn.Module):
150 |     def __init__(self, layers, block=Bottleneck):
151 |         super().__init__()
152 |         self.num_base_layers = len(layers)
153 |         self.layers = nn.ModuleList()
154 |         self.channels = []
155 | 
156 |         self.inplanes = 64
157 | 
158 |         # TODO self.stem = nn.Sequential() ??
159 |         self.conv = nn.Conv2d(
160 |             3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False
161 |         )
162 |         self.bn = nn.BatchNorm2d(self.inplanes)
163 |         self.relu = nn.ReLU(inplace=True)
164 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
165 | 
166 |         # TODO self._make_stage ???
167 |         self._make_layer(block, 64, layers[0])
168 |         self._make_layer(block, 128, layers[1], stride=2)
169 |         self._make_layer(block, 256, layers[2], stride=2)
170 |         self._make_layer(block, 512, layers[3], stride=2)
171 | 
172 |         # self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)]
173 |         # self.backbone_modules = [m for m in self.modules()]
174 | 
175 |     def _make_layer(self, block, planes, blocks, stride=1):
176 |         downsample = None
177 |         if stride != 1 or self.inplanes != planes * block.expansion:
178 |             downsample = nn.Sequential(
179 |                 Conv2d1x1(
180 |                     self.inplanes,
181 |                     planes * block.expansion,
182 |                     stride=stride,
183 |                 ),
184 |                 nn.BatchNorm2d(planes * block.expansion),
185 |             )
186 | 
187 |         layers = [block(self.inplanes, planes, stride, downsample)]
188 |         self.inplanes = planes * block.expansion
189 | 
190 |         # Add identity block
191 |         for _ in range(1, blocks):
192 |             layers.append(block(self.inplanes, planes))
193 | 
194 |         # layer = nn.Sequential(*layers)
195 | 
196 |         self.channels.append(planes * block.expansion)
197 |         self.layers.append(nn.Sequential(*layers))
198 | 
199 |     def forward(self, inputs):
200 |         inputs = self.conv(inputs)
201 |         inputs = self.bn(inputs)
202 |         inputs = self.relu(inputs)
203 |         inputs = self.maxpool(inputs)
204 | 
205 |         outputs = []
206 |         for layer in self.layers:
207 |             inputs = layer(inputs)
208 |             outputs.append(inputs)
209 | 
210 |         return outputs
211 | 
212 |     def add_layer(self, conv_channels=1024, downsample=2, depth=1, block=Bottleneck):
213 |         self._make_layer(
214 |             block, conv_channels // block.expansion, blocks=depth, stride=downsample
215 |         )
216 | 
217 |     def from_pretrained(self, path):
218 |         state_dict = torch.load(path)
219 | 
220 |         try:
221 |             state_dict.pop("fc.weight")
222 |             state_dict.pop("fc.bias")
223 |         except KeyError:
224 |             pass
225 | 
226 |         keys = list(state_dict)
227 |         for key in keys:
228 |             if key.startswith("layer"):
229 |                 idx = int(key[5])
230 |                 new_key = "layers." + str(idx - 1) + key[6:]
231 |                 state_dict[new_key] = state_dict.pop(key)
232 | 
233 |         self.load_state_dict(state_dict, strict=False)
234 | 
235 | 
236 | def resnet18():
237 |     backbone = ResNet([2, 2, 2, 2], BasicBlock)
238 |     return backbone
239 | 
240 | 
241 | def resnet34():
242 |     backbone = ResNet([3, 4, 6, 3], BasicBlock)
243 |     print(backbone.channels)
244 |     return backbone
245 | 
246 | 
247 | def resnet50(pretrained: bool = False):
248 |     backbone = ResNet([3, 4, 6, 3], Bottleneck)
249 |     return backbone
250 | 
251 | 
252 | def resnet101(pretrained: bool = False):
253 |     backbone = ResNet([3, 4, 23, 3], Bottleneck)
254 |     return backbone
255 | 


--------------------------------------------------------------------------------
/boda/models/solov2/inference_solov1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | 
  4 | 
  5 | def matrix_nms(
  6 |     seg_masks, cate_labels, cate_scores, kernel="gaussian", sigma=2.0, sum_masks=None
  7 | ):
  8 |     """Matrix NMS for multi-class masks.
  9 | 
 10 |     Args:
 11 |         seg_masks (Tensor): shape (n, h, w)
 12 |         cate_labels (Tensor): shape (n), mask labels in descending order
 13 |         cate_scores (Tensor): shape (n), mask scores in descending order
 14 |         kernel (str):  'linear' or 'gauss'
 15 |         sigma (float): std in gaussian method
 16 |         sum_masks (Tensor): The sum of seg_masks
 17 | 
 18 |     Returns:
 19 |         Tensor: cate_scores_update, tensors of shape (n)
 20 |     """
 21 |     n_samples = len(cate_labels)
 22 |     if n_samples == 0:
 23 |         return []
 24 |     if sum_masks is None:
 25 |         sum_masks = seg_masks.sum((1, 2)).float()
 26 |     seg_masks = seg_masks.reshape(n_samples, -1).float()
 27 |     # inter.
 28 |     inter_matrix = torch.mm(seg_masks, seg_masks.transpose(1, 0))
 29 |     # union.
 30 |     sum_masks_x = sum_masks.expand(n_samples, n_samples)
 31 |     # iou.
 32 |     iou_matrix = (
 33 |         inter_matrix / (sum_masks_x + sum_masks_x.transpose(1, 0) - inter_matrix)
 34 |     ).triu(diagonal=1)
 35 |     # label_specific matrix.
 36 |     cate_labels_x = cate_labels.expand(n_samples, n_samples)
 37 |     label_matrix = (
 38 |         (cate_labels_x == cate_labels_x.transpose(1, 0)).float().triu(diagonal=1)
 39 |     )
 40 | 
 41 |     # IoU compensation
 42 |     compensate_iou, _ = (iou_matrix * label_matrix).max(0)
 43 |     compensate_iou = compensate_iou.expand(n_samples, n_samples).transpose(1, 0)
 44 | 
 45 |     # IoU decay
 46 |     decay_iou = iou_matrix * label_matrix
 47 | 
 48 |     # matrix nms
 49 |     if kernel == "gaussian":
 50 |         decay_matrix = torch.exp(-1 * sigma * (decay_iou ** 2))
 51 |         compensate_matrix = torch.exp(-1 * sigma * (compensate_iou ** 2))
 52 |         decay_coefficient, _ = (decay_matrix / compensate_matrix).min(0)
 53 |     elif kernel == "linear":
 54 |         decay_matrix = (1 - decay_iou) / (1 - compensate_iou)
 55 |         decay_coefficient, _ = decay_matrix.min(0)
 56 |     else:
 57 |         raise NotImplementedError
 58 | 
 59 |     # update the score.
 60 |     cate_scores_update = cate_scores * decay_coefficient
 61 |     return cate_scores_update
 62 | 
 63 | 
 64 | def get_seg(seg_preds, cate_preds, img_metas=[1]):
 65 |     assert len(seg_preds) == len(cate_preds)
 66 | 
 67 |     num_levels = len(cate_preds)
 68 |     featmap_size = seg_preds[0].size()[-2:]
 69 | 
 70 |     result_list = []
 71 |     for img_id in range(len(img_metas)):
 72 |         cate_pred_list = [
 73 |             cate_preds[i][img_id].view(-1, 80).detach()
 74 |             for i in range(num_levels)
 75 |             # cate_preds[i][img_id].view(-1, self.cate_out_channels).detach() for i in range(num_levels)
 76 |         ]
 77 |         seg_pred_list = [seg_preds[i][img_id].detach() for i in range(num_levels)]
 78 | 
 79 |         # img_shape = img_metas[img_id]['img_shape']
 80 |         # scale_factor = img_metas[img_id]['scale_factor']
 81 |         # ori_shape = img_metas[img_id]['ori_shape']
 82 |         size = (1333, 800, 3)
 83 |         # size = (800, 1333, 3)
 84 |         img_shape = size
 85 |         ori_shape = size
 86 | 
 87 |         cate_pred_list = torch.cat(cate_pred_list, dim=0)
 88 |         seg_pred_list = torch.cat(seg_pred_list, dim=0)
 89 | 
 90 |         result = get_seg_single(
 91 |             cate_pred_list, seg_pred_list, featmap_size, img_shape, ori_shape
 92 |         )
 93 | 
 94 |         result_list.append(result)
 95 | 
 96 |     return result_list
 97 | 
 98 | 
 99 | def get_seg_single(cate_preds, seg_preds, featmap_size, img_shape, ori_shape):
100 |     assert len(cate_preds) == len(seg_preds)
101 | 
102 |     # test_seg_masks = seg_preds > 0.5 # cfg.mask_thr
103 |     # test_masks = test_seg_masks.detach().cpu().numpy()[0] * 255
104 |     # print(test_masks.shape)
105 |     # import cv2
106 |     # cv2.imwrite('solo-test12.jpg', test_masks)
107 | 
108 |     # overall info.
109 |     h, w, _ = img_shape
110 |     upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4)
111 | 
112 |     # process.
113 |     inds = cate_preds > 0.1  # cfg.score_thr
114 |     # category scores.
115 |     cate_scores = cate_preds[inds]
116 |     if len(cate_scores) == 0:
117 |         return None
118 |     # category labels.
119 |     # inds = inds.nonzero()
120 |     inds = inds.nonzero()
121 |     # print(inds.nonzero())
122 |     cate_labels = inds[:, 1]
123 | 
124 |     # strides.
125 |     # size_trans = cate_labels.new_tensor(self.seg_num_grids).pow(2).cumsum(0)
126 |     size_trans = cate_labels.new_tensor([40, 36, 24, 16, 12]).pow(2).cumsum(0)
127 |     strides = cate_scores.new_ones(size_trans[-1])
128 |     n_stage = len([40, 36, 24, 16, 12])  # len(self.seg_num_grids)
129 |     strides[: size_trans[0]] *= (4, 8, 16, 32, 64)[0]  # self.strides[0]
130 |     for ind_ in range(1, n_stage):
131 |         # strides[size_trans[ind_ - 1]:size_trans[ind_]] *= self.strides[ind_]
132 |         strides[size_trans[ind_ - 1] : size_trans[ind_]] *= (4, 8, 16, 32, 64)[ind_]
133 |     strides = strides[inds[:, 0]]
134 | 
135 |     # masks.
136 |     seg_preds = seg_preds[inds[:, 0]]
137 |     seg_masks = seg_preds > 0.5  # cfg.mask_thr
138 |     sum_masks = seg_masks.sum((1, 2)).float()
139 | 
140 |     # filter.
141 |     keep = sum_masks > strides
142 |     if keep.sum() == 0:
143 |         return None
144 | 
145 |     seg_masks = seg_masks[keep, ...]
146 |     seg_preds = seg_preds[keep, ...]
147 |     sum_masks = sum_masks[keep]
148 |     cate_scores = cate_scores[keep]
149 |     cate_labels = cate_labels[keep]
150 | 
151 |     # print('#'*50)
152 |     # print(seg_masks.size())
153 |     test_seg_masks = seg_masks > 0.5  # cfg.mask_thr
154 |     test_masks = test_seg_masks.detach().cpu().numpy()[0] * 255
155 |     print(test_masks.shape)
156 |     # test_masks = test_masks.transpose(1, 2, 0)
157 |     import cv2
158 | 
159 |     cv2.imwrite("solo-test11.jpg", test_masks)
160 | 
161 |     # maskness.
162 |     seg_scores = (seg_preds * seg_masks.float()).sum((1, 2)) / sum_masks
163 |     cate_scores *= seg_scores
164 | 
165 |     # sort and keep top nms_pre
166 |     sort_inds = torch.argsort(cate_scores, descending=True)
167 |     if len(sort_inds) > 500:  # cfg.nms_pre
168 |         sort_inds = sort_inds[:500]  # [:cfg.nms_pre]
169 |     seg_masks = seg_masks[sort_inds, :, :]
170 |     seg_preds = seg_preds[sort_inds, :, :]
171 |     sum_masks = sum_masks[sort_inds]
172 |     cate_scores = cate_scores[sort_inds]
173 |     cate_labels = cate_labels[sort_inds]
174 | 
175 |     # Matrix NMS
176 |     cate_scores = matrix_nms(
177 |         seg_masks,
178 |         cate_labels,
179 |         cate_scores,
180 |         kernel="gaussian",
181 |         sigma=2.0,
182 |         sum_masks=sum_masks,
183 |     )
184 | 
185 |     # filter.
186 |     keep = cate_scores >= 0.05  # cfg.update_thr
187 |     if keep.sum() == 0:
188 |         return None
189 |     seg_preds = seg_preds[keep, :, :]
190 |     cate_scores = cate_scores[keep]
191 |     cate_labels = cate_labels[keep]
192 | 
193 |     # sort and keep top_k
194 |     sort_inds = torch.argsort(cate_scores, descending=True)
195 |     if len(sort_inds) > 100:  # cfg.max_per_img:
196 |         sort_inds = sort_inds[:100]  # [:cfg.max_per_img]
197 |     seg_preds = seg_preds[sort_inds, :, :]
198 |     cate_scores = cate_scores[sort_inds]
199 |     cate_labels = cate_labels[sort_inds]
200 | 
201 |     print(seg_preds.size())
202 |     print(upsampled_size_out)
203 |     seg_preds = F.interpolate(
204 |         seg_preds.unsqueeze(0), size=upsampled_size_out, mode="bilinear"
205 |     )  # [:, :, :h, :w]
206 | 
207 |     # seg_masks = F.interpolate(
208 |     #     seg_preds, size=ori_shape[:2], mode='bilinear').squeeze(0)
209 |     size = (1333, 800)
210 |     # size = (800, 1333)
211 |     seg_masks = F.interpolate(seg_preds, size=size, mode="bilinear").squeeze(0)
212 | 
213 |     print("#" * 50)
214 |     print(seg_masks.size())
215 |     seg_masks = seg_masks > 0.5  # cfg.mask_thr
216 | 
217 |     test_masks = seg_masks.detach().cpu().numpy()[0] * 255
218 |     print(test_masks.shape)
219 |     # test_masks = test_masks.transpose(1, 2, 0)
220 |     print(test_masks.shape)
221 |     import cv2
222 | 
223 |     # test_masks = cv2.flip(test_masks, 1)
224 |     # test_masks = cv2.rotate(test_masks, cv2.ROTATE_90_COUNTERCLOCKWISE)
225 |     # print(test_masks.shape)
226 |     # test_masks = cv2.resize(test_masks, (1333, 800), cv2.INTER_AREA)
227 |     # print(test_masks.shape)
228 |     cv2.imwrite("solo-test1.jpg", test_masks)
229 | 
230 |     return seg_masks, cate_labels, cate_scores
231 | 


--------------------------------------------------------------------------------
/boda/postprocessing.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from typing import Tuple, List, Dict
  3 | 
  4 | import torch
  5 | import torch.nn.functional as F
  6 | from torch import Tensor
  7 | from torchvision.ops import batched_nms
  8 | 
  9 | 
 10 | def decode(boxes: Tensor, prior_boxes: Tensor, variances: List[float] = [0.1, 0.2]):
 11 |     """Decode locations from predictions using priors to undo
 12 |     the encoding we did for offset regression at train time.
 13 | 
 14 |     https://github.com/Hakuyume/chainer-ssd
 15 | 
 16 |     Args:
 17 |         loc (tensor): location predictions for loc layers,
 18 |             Shape: [num_priors, 4]
 19 |         priors (tensor): Prior boxes in center-offset form.
 20 |             Shape: [num_priors, 4].
 21 |         variances: (`List[float]`) Variances of priorboxes
 22 |     Return:
 23 |         decoded bounding box predictions
 24 |     """
 25 |     boxes = torch.cat(
 26 |         (
 27 |             prior_boxes[:, :2] + boxes[:, :2] * variances[0] * prior_boxes[:, 2:],
 28 |             prior_boxes[:, 2:] * torch.exp(boxes[:, 2:] * variances[1]),
 29 |         ),
 30 |         dim=1,
 31 |     )
 32 |     boxes[:, :2] -= boxes[:, 2:] / 2
 33 |     boxes[:, 2:] += boxes[:, :2]
 34 | 
 35 |     return boxes
 36 | 
 37 | 
 38 | def sanitize_coordinates(
 39 |     _x1, _x2, img_size: int, padding: int = 0, cast: bool = True
 40 | ) -> Tuple[Tensor, Tensor]:
 41 |     """
 42 |     Sanitizes the input coordinates so that x1 < x2, x1 != x2, x1 >= 0, and x2 <= image_size.
 43 |     Also converts from relative to absolute coordinates and casts the results to long tensors.
 44 |     If cast is false, the result won't be cast to longs.
 45 |     Warning: this does things in-place behind the scenes so copy if necessary.
 46 |     """
 47 |     _x1 = _x1 * img_size
 48 |     _x2 = _x2 * img_size
 49 |     if cast:
 50 |         _x1 = _x1.long()
 51 |         _x2 = _x2.long()
 52 |     x1 = torch.min(_x1, _x2)
 53 |     x2 = torch.max(_x1, _x2)
 54 |     x1 = torch.clamp(x1 - padding, min=0)
 55 |     x2 = torch.clamp(x2 + padding, max=img_size)
 56 | 
 57 |     return x1, x2
 58 | 
 59 | 
 60 | def crop(masks, boxes, padding: int = 1) -> Tensor:
 61 |     """
 62 |     "Crop" predicted masks by zeroing out everything not in the predicted bbox.
 63 |     Vectorized by Chong (thanks Chong).
 64 |     Args:
 65 |         # TODO: torchvision mask rcnn masks UInt8Tensor[N, H, W]
 66 |         # TODO: torchvision boxes FloatTensor[N, 4]
 67 |         - masks should be a size [h, w, n] tensor of masks
 68 |         - boxes should be a size [n, 4] tensor of bbox coords in relative point form
 69 |     """
 70 |     h, w, n = masks.size()
 71 |     x1, x2 = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, padding, cast=False)
 72 |     y1, y2 = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, padding, cast=False)
 73 | 
 74 |     rows = (
 75 |         torch.arange(w, device=masks.device, dtype=x1.dtype)
 76 |         .view(1, -1, 1)
 77 |         .expand(h, w, n)
 78 |     )
 79 |     cols = (
 80 |         torch.arange(h, device=masks.device, dtype=x1.dtype)
 81 |         .view(-1, 1, 1)
 82 |         .expand(h, w, n)
 83 |     )
 84 | 
 85 |     masks_left = rows >= x1.view(1, 1, -1)
 86 |     masks_right = rows < x2.view(1, 1, -1)
 87 |     masks_up = cols >= y1.view(1, 1, -1)
 88 |     masks_down = cols < y2.view(1, 1, -1)
 89 | 
 90 |     crop_mask = masks_left * masks_right * masks_up * masks_down
 91 | 
 92 |     return masks * crop_mask.float()
 93 | 
 94 | 
 95 | class PostprocessOutputs:
 96 |     def __init__(
 97 |         self,
 98 |         num_classes: int = 80,
 99 |         top_k: int = 10,
100 |         nms_threshold: float = 0.3,
101 |         score_threshold: float = 0.2,
102 |     ) -> None:
103 |         """
104 |         Args:
105 |             num_classes (int)
106 |             top_k
107 |             nms_threshold
108 |             score_threshold
109 |             nms ()
110 |         """
111 |         self.config = None
112 |         self.num_classes = num_classes + 1
113 |         self.background_label = 0
114 |         self.top_k = top_k
115 |         self.nms_threshold = 0.5
116 |         self.score_threshold = 0.2
117 | 
118 |         self.nms = batched_nms
119 |         # if self.nms is None:
120 |         #     self.nms = fast_nms
121 | 
122 |     def __call__(
123 |         self, preds: Dict[str, Tensor], image_sizes: List[Tuple[int]]
124 |     ) -> List[Dict[str, Tensor]]:
125 |         """ """
126 |         pred_boxes = None
127 |         pred_scores = None
128 |         default_boxes = None
129 |         pred_masks = None
130 |         proto_masks = None
131 |         if "boxes" in preds:
132 |             pred_boxes = preds["boxes"]
133 |         if "scores" in preds:
134 |             pred_scores = preds["scores"]
135 |         if "default_boxes" in preds:
136 |             default_boxes = preds["default_boxes"]
137 |         if "mask_coefs" in preds:
138 |             pred_masks = preds["mask_coefs"]
139 |         if "proto_masks" in preds:
140 |             proto_masks = preds["proto_masks"]
141 | 
142 |         batch_size = pred_boxes.size(0)
143 |         num_prior_boxes = default_boxes.size(0)
144 |         pred_scores = (
145 |             preds["scores"]
146 |             .view(batch_size, num_prior_boxes, self.num_classes)
147 |             .transpose(2, 1)
148 |             .contiguous()
149 |         )
150 | 
151 |         # test_scores, test_index = torch.max(preds['scores'], dim=1)
152 | 
153 |         return_list = []
154 |         # print(image_sizes)
155 |         for i, image_size in enumerate(image_sizes):
156 |             print(i, proto_masks.size())
157 |             decoded_boxes = decode(pred_boxes[i], default_boxes)
158 |             results = self._filter_overlaps(i, decoded_boxes, pred_masks, pred_scores)
159 |             print(proto_masks[i].dtype)
160 |             results["proto_masks"] = proto_masks[i]
161 | 
162 |             return_list.append(_convert_boxes_and_masks(results, image_size))
163 |             # return_list.append(results)
164 | 
165 |         for result in return_list:
166 |             scores = result["scores"].detach().cpu()
167 |             sorted_index = range(len(scores))[: self.top_k]
168 |             # sorted_index = scores.argsort(0, descending=True)[:5]
169 | 
170 |             boxes = result["boxes"][sorted_index]
171 |             labels = result["labels"][sorted_index]
172 |             scores = scores[sorted_index]
173 |             masks = result["masks"][sorted_index]
174 | 
175 |             result["boxes"] = boxes
176 |             result["scores"] = scores
177 |             result["labels"] = labels
178 |             result["masks"] = masks
179 | 
180 |         return return_list
181 | 
182 |     def _filter_overlaps(
183 |         self,
184 |         batch_index,
185 |         decoded_boxes,
186 |         pred_masks,
187 |         pred_scores,
188 |     ) -> Dict[str, Tensor]:
189 |         scores = pred_scores[batch_index, 1:, :]
190 |         max_scores, labels = torch.max(scores, dim=0)
191 | 
192 |         keep = max_scores > self.score_threshold  # 0.05
193 |         scores = scores[:, keep]
194 |         boxes = decoded_boxes[keep, :]
195 |         labels = labels[keep]
196 |         masks = pred_masks[batch_index, keep, :]
197 | 
198 |         if scores.size(1) == 0:
199 |             return None
200 | 
201 |         # print(max_scores[0], max_class[0])
202 |         print(boxes.size(), scores.size(), keep.size(), labels.size())
203 |         # boxes, masks, labels, scores = self.nms(boxes, scores, keep, iou_threshold=0.3)
204 | 
205 |         return_dict = defaultdict()
206 |         for _class in range(scores.size(0)):
207 |             _scores = scores[_class, :]
208 |             indices = self.nms(boxes, _scores, labels, iou_threshold=0.3)
209 | 
210 |         return_dict["boxes"] = boxes[indices]
211 |         return_dict["scores"] = _scores[indices]
212 |         return_dict["mask_coefs"] = masks[indices]
213 |         return_dict["labels"] = labels[indices]
214 | 
215 |         return dict(return_dict)
216 | 
217 | 
218 | def _convert_boxes_and_masks(preds, size):
219 |     """
220 |     Args:
221 |         preds
222 |         size (): (h, w)
223 | 
224 |     """
225 |     h, w = size
226 |     boxes = preds["boxes"]
227 |     mask_coefs = preds["mask_coefs"]
228 |     proto_masks = preds["proto_masks"]
229 | 
230 |     masks = proto_masks @ mask_coefs.t()
231 |     masks = torch.sigmoid(masks)
232 | 
233 |     masks = crop(masks, boxes)
234 |     masks = F.interpolate(
235 |         masks.unsqueeze(0), (h, w), mode="bilinear", align_corners=False
236 |     ).squeeze(0)
237 |     masks.gt_(0.5)  # Binarize the masks
238 | 
239 |     boxes[:, 0], boxes[:, 2] = sanitize_coordinates(
240 |         boxes[:, 0], boxes[:, 2], w, cast=False
241 |     )
242 |     boxes[:, 1], boxes[:, 3] = sanitize_coordinates(
243 |         boxes[:, 1], boxes[:, 3], h, cast=False
244 |     )
245 |     boxes = boxes.long()
246 | 
247 |     preds["boxes"] = boxes
248 |     preds["masks"] = masks
249 | 
250 |     del preds["proto_masks"]
251 |     del preds["mask_coefs"]
252 | 
253 |     return preds
254 | 


--------------------------------------------------------------------------------
/boda/base_configuration.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import json
  3 | import os
  4 | import sys
  5 | import time
  6 | from typing import Tuple, List, Dict, Any, Union, Sequence
  7 | from urllib.parse import urlparse
  8 | from urllib.request import urlretrieve
  9 | 
 10 | from .file_utils import DataEncoder
 11 | 
 12 | 
 13 | class BaseConfig:
 14 |     """
 15 |     Class attributes:
 16 |         model_type (:obj:`str`):
 17 |     Args:
 18 |         name_or_path (:obj:`str`):
 19 |     """
 20 | 
 21 |     model_name: str = ""
 22 |     cache_dir = "cache"
 23 | 
 24 |     def __init__(self, **kwargs):
 25 |         self.use_torchscript = kwargs.pop("use_torchscript", False)
 26 |         # self.use_fp16 = kwargs.pop('use_fp16', False)
 27 |         self.label_map = kwargs.pop("label_map", {})
 28 |         self.num_classes = kwargs.pop("num_classes", 0)
 29 |         self.min_size = kwargs.pop("min_size", None)
 30 |         self.max_size = kwargs.pop("max_size", None)
 31 |         self.preserve_aspect_ratio = kwargs.pop("preserve_aspect_ratio", False)
 32 |         if not isinstance(self.max_size, Sequence):
 33 |             if not self.preserve_aspect_ratio:
 34 |                 self.max_size = (self.max_size, self.max_size)
 35 |             else:
 36 |                 self.max_size = (self.min_size, self.max_size)
 37 | 
 38 |         self.num_grids = kwargs.pop("num_grids", 0)
 39 |         self.top_k = kwargs.pop("top_k", 5)
 40 |         self.score_thresh = kwargs.pop("score_thresh", 0.15)
 41 | 
 42 |         # backbone
 43 |         self.backbone_name = kwargs.pop("backbone_name", "resnet101")
 44 |         self.backbone_structure = kwargs.pop("backbone_structure", None)
 45 | 
 46 |         # neck
 47 |         self.neck_name = kwargs.pop("neck_name", "fpn")
 48 |         self.selected_layers = kwargs.pop("selected_layers", [1, 2, 3])
 49 |         self.aspect_ratios = kwargs.pop("aspect_ratios", [1, 1 / 2, 2])
 50 |         self.scales = kwargs.pop("scales", [24, 48, 96, 192, 384])
 51 |         self.fpn_channels = kwargs.pop("fpn_channels", 256)
 52 | 
 53 |         # head
 54 |         self.anchors = kwargs.pop("anchors", None)
 55 | 
 56 |         for k, v in kwargs.items():
 57 |             print(k, v)
 58 |             try:
 59 |                 setattr(k, v)
 60 |             except AttributeError as e:
 61 |                 print(k, v, e)
 62 | 
 63 |     def __repr__(self):
 64 |         return f"{self.__class__.__name__} {self.to_dict()}"
 65 | 
 66 |     def to_json(self):
 67 |         config_dict = self.to_dict()
 68 |         return json.dumps(config_dict, indent=4, cls=DataEncoder)
 69 | 
 70 |     def to_dict(self):
 71 |         output = copy.deepcopy(self.__dict__)
 72 |         if hasattr(self.__class__, "model_name"):
 73 |             output["model_name"] = self.__class__.model_name
 74 |         return output
 75 | 
 76 |     def save_json(self, path: str):
 77 |         # if os.path.isfile(path):
 78 |         #     raise AssertionError
 79 | 
 80 |         # os.makedirs(path, exist_ok=True)
 81 |         # config_file = os.path.join(path, CONFIG_NAME)
 82 |         with open(path, "w", encoding="utf-8") as writer:
 83 |             writer.write(self.to_json())
 84 | 
 85 |     def update(self, config_dict: Dict[str, Any]):
 86 |         for key, value in config_dict.items():
 87 |             setattr(self, key, value)
 88 | 
 89 |     @classmethod
 90 |     def from_pretrained(cls, name_or_path: str, **kwargs):
 91 |         config_dict = cls._get_config_dict(name_or_path)
 92 |         return cls(**config_dict)
 93 | 
 94 |     @classmethod
 95 |     def _dict_from_json_file(self, path):
 96 |         with open(path, "r", encoding="utf-8") as json_file:
 97 |             config_dict = json.load(json_file)
 98 |         return config_dict
 99 | 
100 |     @classmethod
101 |     def from_json(cls, json_file: str):
102 |         config_dict = cls._dict_from_json_file(json_file)
103 |         print(config_dict)
104 |         return cls(**config_dict)
105 | 
106 |     @classmethod
107 |     def _get_config_dict(cls, name_or_path, **kwargs):
108 |         if os.path.isdir(name_or_path):
109 |             # TODO: Thinking idea!!
110 |             config_file = os.path.join(name_or_path, "config.json")
111 |         elif os.path.isfile(name_or_path):
112 |             config_file = name_or_path
113 |         else:
114 |             url = "https://unerue.synology.me/boda/models/"
115 |             config_dir = os.path.join(cls.cache_dir, cls.model_name)
116 |             config_file = os.path.join(config_dir, f"{name_or_path}.json")
117 |             if not os.path.isfile(config_file):
118 |                 from urllib import request
119 | 
120 |                 from .file_utils import reporthook
121 | 
122 |                 if not os.path.isdir(config_dir):
123 |                     os.mkdir(config_dir)
124 | 
125 |                 # file_name = f'{config_file}.json'
126 |                 # print(f'Downloading {name_or_path}.{extension}...', end=' ')
127 |                 request.urlretrieve(
128 |                     f"{url}{cls.model_name}/{name_or_path}.json",
129 |                     config_file,
130 |                     reporthook,
131 |                 )
132 |                 print()
133 | 
134 |         return cls._dict_from_json_file(config_file)
135 | 
136 |         # if not os.path.isfile(os.path.join(config_dir, f'{name_or_path}.pth')):
137 |         #     from urllib import request
138 |         #     from .models.yolact.configuration_yolact import yolact_pretrained_models
139 | 
140 |         #     dd = urlparse(yolact_pretrained_models[name_or_path])
141 |         #     request.urlretrieve(
142 |         #         yolact_pretrained_models[name_or_path].replace('json', 'pth'),
143 |         #         'cache/yolact/yolact-base.pth', reporthook)
144 | 
145 |         # if os.path.isdir(config_dir):
146 |         #     config_file = os.path.join(config_dir, f'{name_or_path}.json')
147 |         #     if os.path.isfile(config_file):
148 |         #         return cls._dict_from_json_file(config_file)
149 |         #     else:
150 |         #         config_file = urlparse()
151 |         # else:
152 |         #     os.mkdir(config_dir)
153 |         #     return
154 | 
155 |         # config_dict = cls._dict_from_json_file(config_file)
156 | 
157 |         # return config_dict, kwargs
158 | 
159 |     # @classmethod
160 |     # def from_json(cls, json_file: str):
161 |     #     with open(path, 'r') as json_file:
162 |     #         config_dict = json.loads(json_file)
163 |     #     config_dict = cls.dict_from_json_fiel(json_file)
164 |     #     return cls(**config_dict)
165 | 
166 |     # @classmethod
167 |     # def from_pretrained(cls, pretrained_model_or_path: str, **kwargs):
168 |     #     raise NotImplementedError
169 | 
170 |     # @classmethod
171 |     # def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs) -> Tuple[Dict[str, Any], Dict[str, Any]]:
172 |     #     cache_dir = kwargs.pop('cache_dir', None)
173 |     #     force_download = kwargs.pop('force_download', False)
174 |     #     resume_download = kwargs.pop('resume_download', False)
175 |     #     proxies = kwargs.pop("proxies", None)
176 |     #     local_files_only = kwargs.pop("local_files_only", False)
177 |     #     revision = kwargs.pop("revision", None)
178 | 
179 |     #     if os.path.isdir(pretrained_model_name_or_path):
180 |     #         config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME)
181 |     #     elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
182 |     #         config_file = pretrained_model_name_or_path
183 |     #     else:
184 |     #         config_file = hf_bucket_url(
185 |     #             pretrained_model_name_or_path, filename=CONFIG_NAME, revision=revision, mirror=None
186 |     #         )
187 | 
188 |     #     try:
189 |     #         # Load from URL or cache if already cached
190 |     #         resolved_config_file = cached_path(
191 |     #             config_file,
192 |     #             cache_dir=cache_dir,
193 |     #             force_download=force_download,
194 |     #             proxies=proxies,
195 |     #             resume_download=resume_download,
196 |     #             local_files_only=local_files_only,
197 |     #         )
198 |     #         # Load config dict
199 |     #         config_dict = cls._dict_from_json_file(resolved_config_file)
200 | 
201 |     #     except EnvironmentError as err:
202 |     #         logger.error(err)
203 |     #         msg = (
204 |     #             f"Can't load config for '{pretrained_model_name_or_path}'. Make sure that:\n\n"
205 |     #             f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
206 |     #             f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a {CONFIG_NAME} file\n\n"
207 |     #         )
208 |     #         raise EnvironmentError(msg)
209 | 
210 |     #     except json.JSONDecodeError:
211 |     #         msg = (
212 |     #             "Couldn't reach server at '{}' to download configuration file or "
213 |     #             "configuration file is not a valid JSON file. "
214 |     #             "Please check network or file content here: {}.".format(config_file, resolved_config_file)
215 |     #         )
216 |     #         raise EnvironmentError(msg)
217 | 
218 |     #     if resolved_config_file == config_file:
219 |     #         logger.info("loading configuration file {}".format(config_file))
220 |     #     else:
221 |     #         logger.info("loading configuration file {} from cache at {}".format(config_file, resolved_config_file))
222 | 
223 |     #     return config_dict, kwargs
224 | 


--------------------------------------------------------------------------------
/boda/models/yolact/inference_yolact.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from typing import Tuple, List, Dict
  3 | 
  4 | import torch
  5 | import torch.nn.functional as F
  6 | from torch import Tensor
  7 | from torchvision.ops import batched_nms
  8 | 
  9 | 
 10 | def decode(boxes: Tensor, default_boxes: Tensor, variances: List[float] = [0.1, 0.2]):
 11 |     """Decode locations from predictions using priors to undo
 12 |     the encoding we did for offset regression at train time.
 13 | 
 14 |     https://github.com/Hakuyume/chainer-ssd
 15 | 
 16 |     Args:
 17 |         loc (FloatTensor[N, 4]): location predictions for loc layers,
 18 |             Shape: [num_priors, 4]
 19 |         priors (tensor): Prior boxes in center-offset form.
 20 |             Shape: [num_priors, 4].
 21 |         variances: (`List[float]`) Variances of priorboxes
 22 |     Return:
 23 |         decoded bounding box predictions
 24 |     """
 25 |     boxes = torch.cat(
 26 |         (
 27 |             default_boxes[:, :2] + boxes[:, :2] * variances[0] * default_boxes[:, 2:],
 28 |             default_boxes[:, 2:] * torch.exp(boxes[:, 2:] * variances[1]),
 29 |         ),
 30 |         dim=1,
 31 |     )
 32 |     boxes[:, :2] -= boxes[:, 2:] / 2
 33 |     boxes[:, 2:] += boxes[:, :2]
 34 | 
 35 |     return boxes
 36 | 
 37 | 
 38 | def sanitize_coordinates(
 39 |     _x1, _x2, img_size: int, padding: int = 0, cast: bool = True
 40 | ) -> Tuple[Tensor, Tensor]:
 41 |     """
 42 |     Sanitizes the input coordinates so that x1 < x2, x1 != x2, x1 >= 0, and x2 <= image_size.
 43 |     Also converts from relative to absolute coordinates and casts the results to long tensors.
 44 |     If cast is false, the result won't be cast to longs.
 45 |     Warning: this does things in-place behind the scenes so copy if necessary.
 46 |     """
 47 |     _x1 = _x1 * img_size
 48 |     _x2 = _x2 * img_size
 49 |     if cast:
 50 |         _x1 = _x1.long()
 51 |         _x2 = _x2.long()
 52 |     x1 = torch.min(_x1, _x2)
 53 |     x2 = torch.max(_x1, _x2)
 54 |     x1 = torch.clamp(x1 - padding, min=0)
 55 |     x2 = torch.clamp(x2 + padding, max=img_size)
 56 | 
 57 |     return x1, x2
 58 | 
 59 | 
 60 | def crop(masks, boxes, padding: int = 1) -> Tensor:
 61 |     """
 62 |     "Crop" predicted masks by zeroing out everything not in the predicted bbox.
 63 |     Vectorized by Chong (thanks Chong).
 64 |     Args:
 65 |         # TODO: torchvision mask rcnn masks UInt8Tensor[N, H, W]
 66 |         # TODO: torchvision boxes FloatTensor[N, 4]
 67 |         - masks should be a size [h, w, n] tensor of masks
 68 |         - boxes should be a size [n, 4] tensor of bbox coords in relative point form
 69 |     """
 70 |     h, w, n = masks.size()
 71 |     x1, x2 = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, padding, cast=False)
 72 |     y1, y2 = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, padding, cast=False)
 73 | 
 74 |     rows = (
 75 |         torch.arange(w, device=masks.device, dtype=x1.dtype)
 76 |         .view(1, -1, 1)
 77 |         .expand(h, w, n)
 78 |     )
 79 |     cols = (
 80 |         torch.arange(h, device=masks.device, dtype=x1.dtype)
 81 |         .view(-1, 1, 1)
 82 |         .expand(h, w, n)
 83 |     )
 84 | 
 85 |     masks_left = rows >= x1.view(1, 1, -1)
 86 |     masks_right = rows < x2.view(1, 1, -1)
 87 |     masks_up = cols >= y1.view(1, 1, -1)
 88 |     masks_down = cols < y2.view(1, 1, -1)
 89 | 
 90 |     crop_mask = masks_left * masks_right * masks_up * masks_down
 91 | 
 92 |     return masks * crop_mask.float()
 93 | 
 94 | 
 95 | class PostprocessYolact:
 96 |     def __init__(
 97 |         self,
 98 |         num_classes: int = 80,
 99 |         top_k: int = 10,
100 |         nms_threshold: float = 0.3,
101 |         score_threshold: float = 0.2,
102 |     ) -> None:
103 |         """
104 |         Args:
105 |             num_classes (int)
106 |             top_k
107 |             nms_threshold
108 |             score_threshold
109 |             nms ()
110 |         """
111 |         self.config = None
112 |         self.num_classes = num_classes + 1
113 |         self.background_label = 0
114 |         self.top_k = top_k
115 |         self.nms_threshold = 0.5
116 |         self.score_threshold = 0.2
117 | 
118 |         self.nms = batched_nms
119 |         # if self.nms is None:
120 |         #     self.nms = fast_nms
121 | 
122 |     def __call__(
123 |         self, preds: Dict[str, Tensor], image_sizes: List[Tuple[int]]
124 |     ) -> List[Dict[str, Tensor]]:
125 |         """
126 |         preds (Dict[str, Tensor]):
127 |             boxes (FloatTensor[B, N, 4])
128 |             scores (FloatTensor[B, N, 81])
129 | 
130 |             mask_coefs (FloatTensor[B, N, 32])
131 |             default_boxes (FloatTensor[N, 4])
132 |             proto_masks (FloatTensor[1, 138, 138, 32])
133 |         """
134 |         pred_boxes = None
135 |         pred_scores = None
136 |         default_boxes = None
137 |         pred_masks = None
138 |         proto_masks = None
139 |         if "boxes" in preds:
140 |             pred_boxes = preds["boxes"]
141 |         if "scores" in preds:
142 |             pred_scores = preds["scores"]
143 |             print("before", pred_scores.size())
144 |         if "default_boxes" in preds:
145 |             default_boxes = preds["default_boxes"]
146 |         if "mask_coefs" in preds:
147 |             pred_masks = preds["mask_coefs"]
148 |         if "proto_masks" in preds:
149 |             proto_masks = preds["proto_masks"]
150 | 
151 |         batch_size = pred_boxes.size(0)
152 |         num_prior_boxes = default_boxes.size(0)
153 |         # pred_scores = preds['scores'].view(batch_size, num_prior_boxes, self.num_classes).transpose(2, 1).contiguous()
154 | 
155 |         pred_scores = preds["scores"].view(
156 |             batch_size, num_prior_boxes, self.num_classes
157 |         )
158 |         pred_scores = pred_scores.transpose(2, 1).contiguous()
159 |         # test_scores, test_index = torch.max(preds['scores'], dim=1)
160 | 
161 |         return_list = []
162 |         for i, image_size in enumerate(image_sizes):
163 |             decoded_boxes = decode(pred_boxes[i], default_boxes)
164 |             results = self._filter_overlaps(i, decoded_boxes, pred_masks, pred_scores)
165 |             results["proto_masks"] = proto_masks[i]
166 | 
167 |             return_list.append(_convert_boxes_and_masks(results, image_size))
168 | 
169 |         for result in return_list:
170 |             scores = result["scores"].detach()
171 |             sorted_index = range(len(scores))[: self.top_k]
172 |             # sorted_index = scores.argsort(0, descending=True)[:5]
173 | 
174 |             boxes = result["boxes"][sorted_index]
175 |             labels = result["labels"][sorted_index]
176 |             scores = scores[sorted_index]
177 |             masks = result["masks"][sorted_index]
178 |             print(masks[0].sum())
179 | 
180 |             result["boxes"] = boxes
181 |             result["scores"] = scores
182 |             result["labels"] = labels
183 |             result["masks"] = masks
184 | 
185 |         return return_list
186 | 
187 |     def _filter_overlaps(
188 |         self, batch_index, decoded_boxes, pred_masks, pred_scores
189 |     ) -> Dict[str, Tensor]:
190 |         """
191 |         batch_index (int)
192 |         decoded_boxes ()
193 |         pred_masks (FloatTensor[B, N, 32])
194 |         pred_scores ()
195 |         """
196 |         scores = pred_scores[batch_index, 1:, :]
197 |         max_scores, labels = torch.max(scores, dim=0)
198 | 
199 |         keep = max_scores > self.score_threshold  # 0.05
200 |         scores = scores[:, keep]
201 |         boxes = decoded_boxes[keep, :]
202 |         labels = labels[keep]
203 |         masks = pred_masks[batch_index, keep, :]
204 | 
205 |         if scores.size(1) == 0:
206 |             return None
207 | 
208 |         return_dict = defaultdict()
209 |         for _class in range(scores.size(0)):
210 |             _scores = scores[_class, :]
211 |             indices = self.nms(boxes, _scores, labels, iou_threshold=0.3)
212 | 
213 |         return_dict["boxes"] = boxes[indices]
214 |         return_dict["scores"] = scores[indices]
215 |         return_dict["mask_coefs"] = masks[indices]
216 |         return_dict["labels"] = labels[indices]
217 | 
218 |         return dict(return_dict)
219 | 
220 | 
221 | def _convert_boxes_and_masks(preds, size):
222 |     """
223 |     Args:
224 |         preds
225 |             boxes (FloatTensor[N, 4])
226 |             mask_coefs (FloatTensor[N, 32])
227 |             proto_masks (FloatTensor[138, 138, 32])
228 |         size (): (h, w)
229 | 
230 |     """
231 |     h, w = size
232 |     boxes = preds["boxes"]
233 |     mask_coefs = preds["mask_coefs"]
234 |     proto_masks = preds["proto_masks"]
235 |     print(boxes.size(), mask_coefs.size(), proto_masks.size())
236 | 
237 |     # masks = proto_masks @ mask_coefs.t()
238 |     masks = torch.matmul(proto_masks, mask_coefs.t())
239 |     print(mask_coefs)
240 |     masks = torch.sigmoid(masks)
241 |     print(masks.size())
242 |     print(masks[0].sum().long())
243 | 
244 |     masks = crop(masks, boxes)
245 | 
246 |     masks = masks.permute(2, 0, 1).contiguous()
247 |     print(masks.size())
248 | 
249 |     masks = F.interpolate(
250 |         masks.unsqueeze(0), (h, w), mode="bilinear", align_corners=False
251 |     ).squeeze(0)
252 |     masks.gt_(0.5)  # Binarize the masks
253 |     print(masks[0].sum())
254 |     boxes[:, 0], boxes[:, 2] = sanitize_coordinates(
255 |         boxes[:, 0], boxes[:, 2], w, cast=False
256 |     )
257 |     boxes[:, 1], boxes[:, 3] = sanitize_coordinates(
258 |         boxes[:, 1], boxes[:, 3], h, cast=False
259 |     )
260 |     boxes = boxes.long()
261 | 
262 |     preds["boxes"] = boxes
263 |     preds["masks"] = masks
264 | 
265 |     del preds["proto_masks"]
266 |     del preds["mask_coefs"]
267 | 
268 |     return preds
269 | 


--------------------------------------------------------------------------------
/boda/models/feature_extractor/mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable, Any, Optional, List
  2 | 
  3 | import torch
  4 | from torch import Tensor
  5 | from torch import nn
  6 | from torch.hub import load_state_dict_from_url
  7 | 
  8 | 
  9 | # __all__ = ['MobileNetV2', 'mobilenet_v2']
 10 | 
 11 | 
 12 | model_urls = {
 13 |     "mobilenet_v2": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth",
 14 | }
 15 | 
 16 | 
 17 | def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int:
 18 |     """
 19 |     This function is taken from the original tf repo.
 20 |     It ensures that all layers have a channel number that is divisible by 8
 21 |     It can be seen here:
 22 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 23 |     """
 24 |     if min_value is None:
 25 |         min_value = divisor
 26 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 27 |     # Make sure that round down does not go down by more than 10%.
 28 |     if new_v < 0.9 * v:
 29 |         new_v += divisor
 30 |     return new_v
 31 | 
 32 | 
 33 | class ConvBNActivation(nn.Sequential):
 34 |     def __init__(
 35 |         self,
 36 |         in_planes: int,
 37 |         out_planes: int,
 38 |         kernel_size: int = 3,
 39 |         stride: int = 1,
 40 |         groups: int = 1,
 41 |         norm_layer: Optional[Callable[..., nn.Module]] = None,
 42 |         activation_layer: Optional[Callable[..., nn.Module]] = None,
 43 |         dilation: int = 1,
 44 |     ) -> None:
 45 |         padding = (kernel_size - 1) // 2 * dilation
 46 |         if norm_layer is None:
 47 |             norm_layer = nn.BatchNorm2d
 48 |         if activation_layer is None:
 49 |             activation_layer = nn.ReLU6
 50 |         super(ConvBNReLU, self).__init__(
 51 |             nn.Conv2d(
 52 |                 in_planes,
 53 |                 out_planes,
 54 |                 kernel_size,
 55 |                 stride,
 56 |                 padding,
 57 |                 dilation=dilation,
 58 |                 groups=groups,
 59 |                 bias=False,
 60 |             ),
 61 |             norm_layer(out_planes),
 62 |             activation_layer(inplace=True),
 63 |         )
 64 |         self.out_channels = out_planes
 65 | 
 66 | 
 67 | # necessary for backwards compatibility
 68 | ConvBNReLU = ConvBNActivation
 69 | 
 70 | 
 71 | class InvertedResidual(nn.Module):
 72 |     def __init__(
 73 |         self,
 74 |         inp: int,
 75 |         oup: int,
 76 |         stride: int,
 77 |         expand_ratio: int,
 78 |         norm_layer: Optional[Callable[..., nn.Module]] = None,
 79 |     ) -> None:
 80 |         super(InvertedResidual, self).__init__()
 81 |         self.stride = stride
 82 |         assert stride in [1, 2]
 83 | 
 84 |         if norm_layer is None:
 85 |             norm_layer = nn.BatchNorm2d
 86 | 
 87 |         hidden_dim = int(round(inp * expand_ratio))
 88 |         self.use_res_connect = self.stride == 1 and inp == oup
 89 | 
 90 |         layers: List[nn.Module] = []
 91 |         if expand_ratio != 1:
 92 |             # pw
 93 |             layers.append(
 94 |                 ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer)
 95 |             )
 96 |         layers.extend(
 97 |             [
 98 |                 # dw
 99 |                 ConvBNReLU(
100 |                     hidden_dim,
101 |                     hidden_dim,
102 |                     stride=stride,
103 |                     groups=hidden_dim,
104 |                     norm_layer=norm_layer,
105 |                 ),
106 |                 # pw-linear
107 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
108 |                 norm_layer(oup),
109 |             ]
110 |         )
111 |         self.conv = nn.Sequential(*layers)
112 |         self.out_channels = oup
113 |         self._is_cn = stride > 1
114 | 
115 |     def forward(self, x: Tensor) -> Tensor:
116 |         if self.use_res_connect:
117 |             return x + self.conv(x)
118 |         else:
119 |             return self.conv(x)
120 | 
121 | 
122 | class MobileNetV2(nn.Module):
123 |     def __init__(
124 |         self,
125 |         num_classes: int = 1000,
126 |         width_mult: float = 1.0,
127 |         inverted_residual_setting: Optional[List[List[int]]] = None,
128 |         round_nearest: int = 8,
129 |         block: Optional[Callable[..., nn.Module]] = None,
130 |         norm_layer: Optional[Callable[..., nn.Module]] = None,
131 |     ) -> None:
132 |         """
133 |         MobileNet V2 main class
134 |         Args:
135 |             num_classes (int): Number of classes
136 |             width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
137 |             inverted_residual_setting: Network structure
138 |             round_nearest (int): Round the number of channels in each layer to be a multiple of this number
139 |             Set to 1 to turn off rounding
140 |             block: Module specifying inverted residual building block for mobilenet
141 |             norm_layer: Module specifying the normalization layer to use
142 |         """
143 |         super(MobileNetV2, self).__init__()
144 | 
145 |         if block is None:
146 |             block = InvertedResidual
147 | 
148 |         if norm_layer is None:
149 |             norm_layer = nn.BatchNorm2d
150 | 
151 |         input_channel = 32
152 |         last_channel = 1280
153 | 
154 |         if inverted_residual_setting is None:
155 |             inverted_residual_setting = [
156 |                 # t, c, n, s
157 |                 [1, 16, 1, 1],
158 |                 [6, 24, 2, 2],
159 |                 [6, 32, 3, 2],
160 |                 [6, 64, 4, 2],
161 |                 [6, 96, 3, 1],
162 |                 [6, 160, 3, 2],
163 |                 [6, 320, 1, 1],
164 |             ]
165 | 
166 |         # only check the first element, assuming user knows t,c,n,s are required
167 |         if (
168 |             len(inverted_residual_setting) == 0
169 |             or len(inverted_residual_setting[0]) != 4
170 |         ):
171 |             raise ValueError(
172 |                 "inverted_residual_setting should be non-empty "
173 |                 "or a 4-element list, got {}".format(inverted_residual_setting)
174 |             )
175 | 
176 |         # building first layer
177 |         input_channel = _make_divisible(input_channel * width_mult, round_nearest)
178 |         self.last_channel = _make_divisible(
179 |             last_channel * max(1.0, width_mult), round_nearest
180 |         )
181 |         features: List[nn.Module] = [
182 |             ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)
183 |         ]
184 |         # building inverted residual blocks
185 |         for t, c, n, s in inverted_residual_setting:
186 |             output_channel = _make_divisible(c * width_mult, round_nearest)
187 |             for i in range(n):
188 |                 stride = s if i == 0 else 1
189 |                 features.append(
190 |                     block(
191 |                         input_channel,
192 |                         output_channel,
193 |                         stride,
194 |                         expand_ratio=t,
195 |                         norm_layer=norm_layer,
196 |                     )
197 |                 )
198 |                 input_channel = output_channel
199 |         # building last several layers
200 |         features.append(
201 |             ConvBNReLU(
202 |                 input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer
203 |             )
204 |         )
205 |         # make it nn.Sequential
206 |         self.features = nn.Sequential(*features)
207 | 
208 |         # building classifier
209 |         self.classifier = nn.Sequential(
210 |             nn.Dropout(0.2),
211 |             nn.Linear(self.last_channel, num_classes),
212 |         )
213 | 
214 |         # weight initialization
215 |         for m in self.modules():
216 |             if isinstance(m, nn.Conv2d):
217 |                 nn.init.kaiming_normal_(m.weight, mode="fan_out")
218 |                 if m.bias is not None:
219 |                     nn.init.zeros_(m.bias)
220 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
221 |                 nn.init.ones_(m.weight)
222 |                 nn.init.zeros_(m.bias)
223 |             elif isinstance(m, nn.Linear):
224 |                 nn.init.normal_(m.weight, 0, 0.01)
225 |                 nn.init.zeros_(m.bias)
226 | 
227 |     def _forward_impl(self, x: Tensor) -> Tensor:
228 |         # This exists since TorchScript doesn't support inheritance, so the superclass method
229 |         # (this one) needs to have a name other than `forward` that can be accessed in a subclass
230 |         x = self.features(x)
231 |         # Cannot use "squeeze" as batch-size can be 1
232 |         x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
233 |         x = torch.flatten(x, 1)
234 |         x = self.classifier(x)
235 |         return x
236 | 
237 |     def forward(self, x: Tensor) -> Tensor:
238 |         return self._forward_impl(x)
239 | 
240 | 
241 | def mobilenet_v2(
242 |     pretrained: bool = False, progress: bool = True, **kwargs: Any
243 | ) -> MobileNetV2:
244 |     """
245 |     Constructs a MobileNetV2 architecture from
246 |     `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
247 |     Args:
248 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
249 |         progress (bool): If True, displays a progress bar of the download to stderr
250 |     """
251 |     model = MobileNetV2(**kwargs)
252 |     if pretrained:
253 |         state_dict = load_state_dict_from_url(
254 |             model_urls["mobilenet_v2"], progress=progress
255 |         )
256 |         model.load_state_dict(state_dict)
257 |     return model
258 | 


--------------------------------------------------------------------------------
/boda/models/solov2/README.md:
--------------------------------------------------------------------------------
  1 | # SOLO (Segmenting Objects by Locations)
  2 | 
  3 | ```
  4 |  ██████╗  ██████╗ ██╗      ██████╗         
  5 | ██╔════╝ ██╔═══██╗██║     ██╔═══██╗        
  6 | ╚██████╗ ██║   ██║██║     ██║   ██║██╗   ██╗
  7 |  ╚════██╗██║   ██║██║     ██║   ██║ ██╗ ██╔╝
  8 |  ██████╔╝╚██████╔╝███████╗╚██████╔╝  ████╔╝ 
  9 |  ╚═════╝  ╚═════╝ ╚══════╝ ╚═════╝   ╚═══╝  
 10 | ```
 11 | 
 12 | ## SOLO Architecture
 13 | 
 14 | ```{bash}
 15 | ==========================================================================================
 16 | Layer (type:depth-idx)                   Output Shape              Param #
 17 | ==========================================================================================
 18 | ├─ResNet: 1-1                            [-1, 256, 334, 200]       --
 19 | |    └─Conv2d: 2-1                       [-1, 64, 667, 400]        9,408
 20 | |    └─BatchNorm2d: 2-2                  [-1, 64, 667, 400]        128
 21 | |    └─ReLU: 2-3                         [-1, 64, 667, 400]        --
 22 | |    └─MaxPool2d: 2-4                    [-1, 64, 334, 200]        --
 23 | |    └─ModuleList: 2                     []                        --
 24 | |    |    └─Sequential: 3-1              [-1, 256, 334, 200]       215,808
 25 | |    |    └─Sequential: 3-2              [-1, 512, 167, 100]       1,219,584
 26 | |    |    └─Sequential: 3-3              [-1, 1024, 84, 50]        7,098,368
 27 | |    |    └─Sequential: 3-4              [-1, 2048, 42, 25]        14,964,736
 28 | ├─Solov1PredictNeck: 1-2                 [-1, 256, 334, 200]       --
 29 | |    └─ModuleList: 2                     []                        --
 30 | |    |    └─Conv2d: 3-5                  [-1, 256, 42, 25]         524,544
 31 | |    |    └─Conv2d: 3-6                  [-1, 256, 84, 50]         262,400
 32 | |    |    └─Conv2d: 3-7                  [-1, 256, 167, 100]       131,328
 33 | |    |    └─Conv2d: 3-8                  [-1, 256, 334, 200]       65,792
 34 | |    └─ModuleList: 2                     []                        --
 35 | |    |    └─Conv2d: 3-9                  [-1, 256, 42, 25]         590,080
 36 | |    |    └─Conv2d: 3-10                 [-1, 256, 84, 50]         590,080
 37 | |    |    └─Conv2d: 3-11                 [-1, 256, 167, 100]       590,080
 38 | |    |    └─Conv2d: 3-12                 [-1, 256, 334, 200]       590,080
 39 | |    └─ModuleList: 2                     []                        --
 40 | |    |    └─Conv2d: 3-13                 [-1, 256, 21, 13]         590,080
 41 | ├─Solov1PredictHead: 1-3                 [2, 1600, 334, 200]       --
 42 | |    └─ModuleList: 2                     []                        --
 43 | |    |    └─Sequential: 3-14             [-1, 256, 167, 100]       595,200
 44 | |    |    └─Sequential: 3-15             [-1, 256, 167, 100]       590,592
 45 | |    |    └─Sequential: 3-16             [-1, 256, 167, 100]       590,592
 46 | |    |    └─Sequential: 3-17             [-1, 256, 167, 100]       590,592
 47 | |    |    └─Sequential: 3-18             [-1, 256, 167, 100]       590,592
 48 | |    |    └─Sequential: 3-19             [-1, 256, 167, 100]       590,592
 49 | |    |    └─Sequential: 3-20             [-1, 256, 167, 100]       590,592
 50 | |    └─ModuleList: 2                     []                        --
 51 | |    |    └─Conv2d: 3-21                 [-1, 1600, 334, 200]      411,200
 52 | |    └─ModuleList: 2                     []                        --
 53 | |    |    └─Sequential: 3-22             [-1, 256, 40, 40]         590,592
 54 | |    |    └─Sequential: 3-23             [-1, 256, 40, 40]         590,592
 55 | |    |    └─Sequential: 3-24             [-1, 256, 40, 40]         590,592
 56 | |    |    └─Sequential: 3-25             [-1, 256, 40, 40]         590,592
 57 | |    |    └─Sequential: 3-26             [-1, 256, 40, 40]         590,592
 58 | |    |    └─Sequential: 3-27             [-1, 256, 40, 40]         590,592
 59 | |    |    └─Sequential: 3-28             [-1, 256, 40, 40]         590,592
 60 | |    └─Conv2d: 2-5                       [-1, 79, 40, 40]          182,095
 61 | |    └─ModuleList: 2                     []                        --
 62 | |    |    └─Sequential: 3-29             [-1, 256, 167, 100]       (recursive)
 63 | |    |    └─Sequential: 3-30             [-1, 256, 167, 100]       (recursive)
 64 | |    |    └─Sequential: 3-31             [-1, 256, 167, 100]       (recursive)
 65 | |    |    └─Sequential: 3-32             [-1, 256, 167, 100]       (recursive)
 66 | |    |    └─Sequential: 3-33             [-1, 256, 167, 100]       (recursive)
 67 | |    |    └─Sequential: 3-34             [-1, 256, 167, 100]       (recursive)
 68 | |    |    └─Sequential: 3-35             [-1, 256, 167, 100]       (recursive)
 69 | |    └─ModuleList: 2                     []                        --
 70 | |    |    └─Conv2d: 3-36                 [-1, 1296, 334, 200]      333,072
 71 | |    └─ModuleList: 2                     []                        --
 72 | |    |    └─Sequential: 3-37             [-1, 256, 36, 36]         (recursive)
 73 | |    |    └─Sequential: 3-38             [-1, 256, 36, 36]         (recursive)
 74 | |    |    └─Sequential: 3-39             [-1, 256, 36, 36]         (recursive)
 75 | |    |    └─Sequential: 3-40             [-1, 256, 36, 36]         (recursive)
 76 | |    |    └─Sequential: 3-41             [-1, 256, 36, 36]         (recursive)
 77 | |    |    └─Sequential: 3-42             [-1, 256, 36, 36]         (recursive)
 78 | |    |    └─Sequential: 3-43             [-1, 256, 36, 36]         (recursive)
 79 | |    └─Conv2d: 2-6                       [-1, 79, 36, 36]          (recursive)
 80 | |    └─ModuleList: 2                     []                        --
 81 | |    |    └─Sequential: 3-44             [-1, 256, 84, 50]         (recursive)
 82 | |    |    └─Sequential: 3-45             [-1, 256, 84, 50]         (recursive)
 83 | |    |    └─Sequential: 3-46             [-1, 256, 84, 50]         (recursive)
 84 | |    |    └─Sequential: 3-47             [-1, 256, 84, 50]         (recursive)
 85 | |    |    └─Sequential: 3-48             [-1, 256, 84, 50]         (recursive)
 86 | |    |    └─Sequential: 3-49             [-1, 256, 84, 50]         (recursive)
 87 | |    |    └─Sequential: 3-50             [-1, 256, 84, 50]         (recursive)
 88 | |    └─ModuleList: 2                     []                        --
 89 | |    |    └─Conv2d: 3-51                 [-1, 576, 168, 100]       148,032
 90 | |    └─ModuleList: 2                     []                        --
 91 | |    |    └─Sequential: 3-52             [-1, 256, 24, 24]         (recursive)
 92 | |    |    └─Sequential: 3-53             [-1, 256, 24, 24]         (recursive)
 93 | |    |    └─Sequential: 3-54             [-1, 256, 24, 24]         (recursive)
 94 | |    |    └─Sequential: 3-55             [-1, 256, 24, 24]         (recursive)
 95 | |    |    └─Sequential: 3-56             [-1, 256, 24, 24]         (recursive)
 96 | |    |    └─Sequential: 3-57             [-1, 256, 24, 24]         (recursive)
 97 | |    |    └─Sequential: 3-58             [-1, 256, 24, 24]         (recursive)
 98 | |    └─Conv2d: 2-7                       [-1, 79, 24, 24]          (recursive)
 99 | |    └─ModuleList: 2                     []                        --
100 | |    |    └─Sequential: 3-59             [-1, 256, 42, 25]         (recursive)
101 | |    |    └─Sequential: 3-60             [-1, 256, 42, 25]         (recursive)
102 | |    |    └─Sequential: 3-61             [-1, 256, 42, 25]         (recursive)
103 | |    |    └─Sequential: 3-62             [-1, 256, 42, 25]         (recursive)
104 | |    |    └─Sequential: 3-63             [-1, 256, 42, 25]         (recursive)
105 | |    |    └─Sequential: 3-64             [-1, 256, 42, 25]         (recursive)
106 | |    |    └─Sequential: 3-65             [-1, 256, 42, 25]         (recursive)
107 | |    └─ModuleList: 2                     []                        --
108 | |    |    └─Conv2d: 3-66                 [-1, 256, 84, 50]         65,792
109 | |    └─ModuleList: 2                     []                        --
110 | |    |    └─Sequential: 3-67             [-1, 256, 16, 16]         (recursive)
111 | |    |    └─Sequential: 3-68             [-1, 256, 16, 16]         (recursive)
112 | |    |    └─Sequential: 3-69             [-1, 256, 16, 16]         (recursive)
113 | |    |    └─Sequential: 3-70             [-1, 256, 16, 16]         (recursive)
114 | |    |    └─Sequential: 3-71             [-1, 256, 16, 16]         (recursive)
115 | |    |    └─Sequential: 3-72             [-1, 256, 16, 16]         (recursive)
116 | |    |    └─Sequential: 3-73             [-1, 256, 16, 16]         (recursive)
117 | |    └─Conv2d: 2-8                       [-1, 79, 16, 16]          (recursive)
118 | |    └─ModuleList: 2                     []                        --
119 | |    |    └─Sequential: 3-74             [-1, 256, 42, 25]         (recursive)
120 | |    |    └─Sequential: 3-75             [-1, 256, 42, 25]         (recursive)
121 | |    |    └─Sequential: 3-76             [-1, 256, 42, 25]         (recursive)
122 | |    |    └─Sequential: 3-77             [-1, 256, 42, 25]         (recursive)
123 | |    |    └─Sequential: 3-78             [-1, 256, 42, 25]         (recursive)
124 | |    |    └─Sequential: 3-79             [-1, 256, 42, 25]         (recursive)
125 | |    |    └─Sequential: 3-80             [-1, 256, 42, 25]         (recursive)
126 | |    └─ModuleList: 2                     []                        --
127 | |    |    └─Conv2d: 3-81                 [-1, 144, 84, 50]         37,008
128 | |    └─ModuleList: 2                     []                        --
129 | |    |    └─Sequential: 3-82             [-1, 256, 12, 12]         (recursive)
130 | |    |    └─Sequential: 3-83             [-1, 256, 12, 12]         (recursive)
131 | |    |    └─Sequential: 3-84             [-1, 256, 12, 12]         (recursive)
132 | |    |    └─Sequential: 3-85             [-1, 256, 12, 12]         (recursive)
133 | |    |    └─Sequential: 3-86             [-1, 256, 12, 12]         (recursive)
134 | |    |    └─Sequential: 3-87             [-1, 256, 12, 12]         (recursive)
135 | |    |    └─Sequential: 3-88             [-1, 256, 12, 12]         (recursive)
136 | |    └─Conv2d: 2-9                       [-1, 79, 12, 12]          (recursive)
137 | ==========================================================================================
138 | Total params: 36,892,591
139 | Trainable params: 36,892,591
140 | Non-trainable params: 0
141 | Total mult-adds (G): 296.58
142 | ==========================================================================================
143 | Input size (MB): 12.20
144 | Forward/backward pass size (MB): 2671.69
145 | Params size (MB): 140.73
146 | Estimated Total Size (MB): 2824.63
147 | ==========================================================================================
148 | ```


--------------------------------------------------------------------------------
/boda/models/feature_extractor/efficientnet.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import math
  3 | from functools import partial
  4 | from typing import Any, Callable, Optional, List
  5 | 
  6 | import torch
  7 | from torch import nn, Tensor
  8 | from torchvision.ops import StochasticDepth
  9 | from torchvision.ops.misc import ConvNormActivation, SqueezeExcitation
 10 | 
 11 | # from torchvision._internally_replaced_utils import load_state_dict_from_url
 12 | 
 13 | 
 14 | __all__ = [
 15 |     "EfficientNet",
 16 |     "efficientnet_b0",
 17 |     "efficientnet_b1",
 18 |     "efficientnet_b2",
 19 |     "efficientnet_b3",
 20 |     "efficientnet_b4",
 21 |     "efficientnet_b5",
 22 |     "efficientnet_b6",
 23 |     "efficientnet_b7",
 24 | ]
 25 | 
 26 | 
 27 | model_urls = {
 28 |     # Weights ported from https://github.com/rwightman/pytorch-image-models/
 29 |     "efficientnet_b0": "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth",
 30 |     "efficientnet_b1": "https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth",
 31 |     "efficientnet_b2": "https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth",
 32 |     "efficientnet_b3": "https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth",
 33 |     "efficientnet_b4": "https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth",
 34 |     # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
 35 |     "efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
 36 |     "efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
 37 |     "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
 38 | }
 39 | 
 40 | 
 41 | def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int:
 42 |     if min_value is None:
 43 |         min_value = divisor
 44 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 45 |     # Make sure that round down does not go down by more than 10%.
 46 |     if new_v < 0.9 * v:
 47 |         new_v += divisor
 48 |     return new_v
 49 | 
 50 | 
 51 | class MBConvConfig:
 52 |     # Stores information listed at Table 1 of the EfficientNet paper
 53 |     def __init__(
 54 |         self,
 55 |         expand_ratio: float,
 56 |         kernel: int,
 57 |         stride: int,
 58 |         input_channels: int,
 59 |         out_channels: int,
 60 |         num_layers: int,
 61 |         width_mult: float,
 62 |         depth_mult: float,
 63 |     ) -> None:
 64 |         self.expand_ratio = expand_ratio
 65 |         self.kernel = kernel
 66 |         self.stride = stride
 67 |         self.input_channels = self.adjust_channels(input_channels, width_mult)
 68 |         self.out_channels = self.adjust_channels(out_channels, width_mult)
 69 |         self.num_layers = self.adjust_depth(num_layers, depth_mult)
 70 | 
 71 |     def __repr__(self) -> str:
 72 |         s = self.__class__.__name__ + "("
 73 |         s += "expand_ratio={expand_ratio}"
 74 |         s += ", kernel={kernel}"
 75 |         s += ", stride={stride}"
 76 |         s += ", input_channels={input_channels}"
 77 |         s += ", out_channels={out_channels}"
 78 |         s += ", num_layers={num_layers}"
 79 |         s += ")"
 80 |         return s.format(**self.__dict__)
 81 | 
 82 |     @staticmethod
 83 |     def adjust_channels(
 84 |         channels: int, width_mult: float, min_value: Optional[int] = None
 85 |     ) -> int:
 86 |         return _make_divisible(channels * width_mult, 8, min_value)
 87 | 
 88 |     @staticmethod
 89 |     def adjust_depth(num_layers: int, depth_mult: float):
 90 |         return int(math.ceil(num_layers * depth_mult))
 91 | 
 92 | 
 93 | def _efficientnet_conf(width_mult: float, depth_mult: float) -> List[MBConvConfig]:
 94 |     bneck_conf = partial(MBConvConfig, width_mult=width_mult, depth_mult=depth_mult)
 95 |     inverted_residual_setting = [
 96 |         bneck_conf(1, 3, 1, 32, 16, 1),
 97 |         bneck_conf(6, 3, 2, 16, 24, 2),
 98 |         bneck_conf(6, 5, 2, 24, 40, 2),
 99 |         bneck_conf(6, 3, 2, 40, 80, 3),
100 |         bneck_conf(6, 5, 1, 80, 112, 3),
101 |         bneck_conf(6, 5, 2, 112, 192, 4),
102 |         bneck_conf(6, 3, 1, 192, 320, 1),
103 |     ]
104 | 
105 |     return inverted_residual_setting
106 | 
107 | 
108 | class MBConv(nn.Module):
109 |     def __init__(
110 |         self,
111 |         cnf: MBConvConfig,
112 |         stochastic_depth_prob: float,
113 |         norm_layer: Callable[..., nn.Module],
114 |         se_layer: Callable[..., nn.Module] = SqueezeExcitation,
115 |     ) -> None:
116 |         super().__init__()
117 | 
118 |         if not (1 <= cnf.stride <= 2):
119 |             raise ValueError("illegal stride value")
120 | 
121 |         self.use_res_connect = (
122 |             cnf.stride == 1 and cnf.input_channels == cnf.out_channels
123 |         )
124 | 
125 |         layers: List[nn.Module] = []
126 |         activation_layer = nn.SiLU
127 | 
128 |         # expand
129 |         expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
130 |         if expanded_channels != cnf.input_channels:
131 |             layers.append(
132 |                 ConvNormActivation(
133 |                     cnf.input_channels,
134 |                     expanded_channels,
135 |                     kernel_size=1,
136 |                     norm_layer=norm_layer,
137 |                     activation_layer=activation_layer,
138 |                 )
139 |             )
140 | 
141 |         # depthwise
142 |         layers.append(
143 |             ConvNormActivation(
144 |                 expanded_channels,
145 |                 expanded_channels,
146 |                 kernel_size=cnf.kernel,
147 |                 stride=cnf.stride,
148 |                 groups=expanded_channels,
149 |                 norm_layer=norm_layer,
150 |                 activation_layer=activation_layer,
151 |             )
152 |         )
153 | 
154 |         # squeeze and excitation
155 |         squeeze_channels = max(1, cnf.input_channels // 4)
156 |         layers.append(
157 |             se_layer(
158 |                 expanded_channels,
159 |                 squeeze_channels,
160 |                 activation=partial(nn.SiLU, inplace=True),
161 |             )
162 |         )
163 | 
164 |         # project
165 |         layers.append(
166 |             ConvNormActivation(
167 |                 expanded_channels,
168 |                 cnf.out_channels,
169 |                 kernel_size=1,
170 |                 norm_layer=norm_layer,
171 |                 activation_layer=None,
172 |             )
173 |         )
174 | 
175 |         self.block = nn.Sequential(*layers)
176 |         self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
177 |         self.out_channels = cnf.out_channels
178 |         self.stride = cnf.stride
179 | 
180 |     def forward(self, input: Tensor) -> Tensor:
181 |         result = self.block(input)
182 |         if self.use_res_connect:
183 |             result = self.stochastic_depth(result)
184 |             result += input
185 |         return result
186 | 
187 | 
188 | class EfficientNet(nn.Module):
189 |     def __init__(
190 |         self,
191 |         width_mult: float,
192 |         depth_mult: float,
193 |         stochastic_depth_prob: float = 0.2,
194 |         block: Optional[Callable[..., nn.Module]] = None,
195 |         norm_layer: Optional[Callable[..., nn.Module]] = None,
196 |         **kwargs: Any,
197 |     ) -> None:
198 |         super().__init__()
199 |         self.layers = nn.ModuleList()
200 |         self.channels = []
201 | 
202 |         if block is None:
203 |             block = MBConv
204 | 
205 |         if norm_layer is None:
206 |             norm_layer = nn.BatchNorm2d
207 | 
208 |         self.inverted_residual_setting = _efficientnet_conf(
209 |             width_mult=width_mult, depth_mult=depth_mult
210 |         )
211 | 
212 |         # building first layer
213 |         firstconv_output_channels = self.inverted_residual_setting[0].input_channels
214 |         self.firstconv_layer = ConvNormActivation(
215 |             3,
216 |             firstconv_output_channels,
217 |             kernel_size=3,
218 |             stride=2,
219 |             norm_layer=norm_layer,
220 |             activation_layer=nn.SiLU,
221 |         )
222 | 
223 |         # building inverted residual blocks
224 |         total_stage_blocks = sum(
225 |             cnf.num_layers for cnf in self.inverted_residual_setting
226 |         )
227 |         stage_block_id = 0
228 |         for cnf in self.inverted_residual_setting:
229 |             stage: List[nn.Module] = []
230 |             for _ in range(cnf.num_layers):
231 |                 # copy to avoid modifications. shallow copy is enough
232 |                 block_cnf = copy.copy(cnf)
233 | 
234 |                 # overwrite info if not the first conv in the stage
235 |                 if stage:
236 |                     block_cnf.input_channels = block_cnf.out_channels
237 |                     block_cnf.stride = 1
238 | 
239 |                 # adjust stochastic depth probability based on the depth of the stage block
240 |                 sd_prob = (
241 |                     stochastic_depth_prob * float(stage_block_id) / total_stage_blocks
242 |                 )
243 | 
244 |                 stage.append(block(block_cnf, sd_prob, norm_layer))
245 |                 stage_block_id += 1
246 | 
247 |             # self.channels.append(block_cnf.out_channels)
248 |             self.layers.extend(stage)
249 | 
250 |         for m in self.modules():
251 |             if isinstance(m, nn.Conv2d):
252 |                 nn.init.kaiming_normal_(m.weight, mode="fan_out")
253 |                 if m.bias is not None:
254 |                     nn.init.zeros_(m.bias)
255 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
256 |                 nn.init.ones_(m.weight)
257 |                 nn.init.zeros_(m.bias)
258 | 
259 |     def forward(self, inputs: Tensor) -> Tensor:
260 |         x = self.firstconv_layer(inputs)
261 | 
262 |         outputs = []
263 |         last_x = None
264 |         for i, layer in enumerate(self.layers):
265 |             x = layer(x)
266 | 
267 |             if layer.stride == 2:
268 |                 outputs.append(last_x)
269 |             elif i == len(self.layers) - 1:
270 |                 outputs.append(x)
271 |             last_x = x
272 | 
273 |         del last_x
274 | 
275 |         return outputs[1:]
276 | 
277 |     def from_pretrained(self, path):
278 |         state_dict = torch.load(path)
279 |         # state_dict = load_state_dict_from_url(model_urls[arch], progress=True)
280 | 
281 |         try:
282 |             excepted_keys = [
283 |                 key
284 |                 for key in list(state_dict)
285 |                 if key.startswith("features.8") or key.startswith("classifier")
286 |             ]
287 |             for excepted_key in excepted_keys:
288 |                 state_dict.pop(excepted_key)
289 |         except KeyError:
290 |             pass
291 | 
292 |         self.load_state_dict(state_dict, strict=False)
293 | 
294 | 
295 | def efficientnet_b0() -> EfficientNet:
296 |     backbone = EfficientNet(width_mult=1.0, depth_mult=1.0)
297 |     return backbone
298 | 
299 | 
300 | def efficientnet_b1() -> EfficientNet:
301 |     backbone = EfficientNet(width_mult=1.0, depth_mult=1.1)
302 |     return backbone
303 | 
304 | 
305 | def efficientnet_b2() -> EfficientNet:
306 |     backbone = EfficientNet(width_mult=1.1, depth_mult=1.2)
307 |     return backbone
308 | 
309 | 
310 | def efficientnet_b3() -> EfficientNet:
311 |     backbone = EfficientNet(width_mult=1.2, depth_mult=1.4)
312 |     return backbone
313 | 
314 | 
315 | def efficientnet_b4() -> EfficientNet:
316 |     backbone = EfficientNet(width_mult=1.4, depth_mult=1.8)
317 |     return backbone
318 | 
319 | 
320 | def efficientnet_b5() -> EfficientNet:
321 |     backbone = EfficientNet(
322 |         width_mult=1.6,
323 |         depth_mult=2.2,
324 |         norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
325 |     )
326 |     return backbone
327 | 
328 | 
329 | def efficientnet_b6() -> EfficientNet:
330 |     backbone = EfficientNet(
331 |         width_mult=1.8,
332 |         depth_mult=2.6,
333 |         norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
334 |     )
335 |     return backbone
336 | 
337 | 
338 | def efficientnet_b7() -> EfficientNet:
339 |     backbone = EfficientNet(
340 |         width_mult=2.0,
341 |         depth_mult=3.1,
342 |         norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
343 |     )
344 |     return backbone
345 | 


--------------------------------------------------------------------------------
/boda/lib/torchsummary/torchsummary.py:
--------------------------------------------------------------------------------
  1 | """ torchsummary.py """
  2 | from typing import (
  3 |     Any,
  4 |     Dict,
  5 |     Iterable,
  6 |     Iterator,
  7 |     List,
  8 |     Mapping,
  9 |     Optional,
 10 |     Sequence,
 11 |     Tuple,
 12 |     Union,
 13 | )
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | from torch.utils.hooks import RemovableHandle
 18 | 
 19 | from .formatting import FormattingOptions, Verbosity
 20 | from .layer_info import LayerInfo
 21 | from .model_statistics import CORRECTED_INPUT_SIZE_TYPE, HEADER_TITLES, ModelStatistics
 22 | 
 23 | # Some modules do the computation themselves using parameters
 24 | # or the parameters of children. Treat these as layers.
 25 | LAYER_MODULES = (torch.nn.MultiheadAttention,)
 26 | INPUT_SIZE_TYPE = Sequence[Union[int, Sequence[Any], torch.Size]]
 27 | INPUT_DATA_TYPE = Optional[
 28 |     Union[torch.Tensor, torch.Size, Sequence[torch.Tensor], INPUT_SIZE_TYPE]
 29 | ]
 30 | DEFAULT_COLUMN_NAMES = ("output_size", "num_params")
 31 | 
 32 | 
 33 | def summary(
 34 |     model: nn.Module,
 35 |     input_data: INPUT_DATA_TYPE = None,
 36 |     *args: Any,
 37 |     batch_dim: Optional[int] = 0,
 38 |     branching: bool = True,
 39 |     col_names: Optional[Iterable[str]] = None,
 40 |     col_width: int = 25,
 41 |     depth: int = 3,
 42 |     device: Optional[torch.device] = None,
 43 |     dtypes: Optional[List[torch.dtype]] = None,
 44 |     verbose: int = 1,
 45 |     **kwargs: Any,
 46 | ) -> ModelStatistics:
 47 |     """
 48 |     Summarize the given PyTorch model. Summarized information includes:
 49 |         1) Layer names,
 50 |         2) input/output shapes,
 51 |         3) kernel shape,
 52 |         4) # of parameters,
 53 |         5) # of operations (Mult-Adds)
 54 | 
 55 |     Args:
 56 |         model (nn.Module):
 57 |                 PyTorch model to summarize
 58 | 
 59 |         input_data (Sequence of Sizes or Tensors):
 60 |                 Example input tensor of the model (dtypes inferred from model input).
 61 |                 - OR -
 62 |                 Shape of input data as a List/Tuple/torch.Size
 63 |                 (dtypes must match model input, default is FloatTensors).
 64 |                 You should NOT include batch size in the tuple.
 65 |                 - OR -
 66 |                 If input_data is not provided, no forward pass through the network is
 67 |                 performed, and the provided model information is limited to layer names.
 68 |                 Default: None
 69 | 
 70 |         batch_dim (int):
 71 |                 Batch_dimension of input data. If batch_dim is None, the input data
 72 |                 is assumed to contain the batch dimension.
 73 |                 WARNING: in a future version, the default will change to None.
 74 |                 Default: 0
 75 | 
 76 |         branching (bool):
 77 |                 Whether to use the branching layout for the printed output.
 78 |                 Default: True
 79 | 
 80 |         col_names (Iterable[str]):
 81 |                 Specify which columns to show in the output. Currently supported:
 82 |                 ("input_size", "output_size", "num_params", "kernel_size", "mult_adds")
 83 |                 If input_data is not provided, only "num_params" is used.
 84 |                 Default: ("output_size", "num_params")
 85 | 
 86 |         col_width (int):
 87 |                 Width of each column.
 88 |                 Default: 25
 89 | 
 90 |         depth (int):
 91 |                 Number of nested layers to traverse (e.g. Sequentials).
 92 |                 Default: 3
 93 | 
 94 |         device (torch.Device):
 95 |                 Uses this torch device for model and input_data.
 96 |                 If not specified, uses result of torch.cuda.is_available().
 97 |                 Default: None
 98 | 
 99 |         dtypes (List[torch.dtype]):
100 |                 For multiple inputs, specify the size of both inputs, and
101 |                 also specify the types of each parameter here.
102 |                 Default: None
103 | 
104 |         verbose (int):
105 |                 0 (quiet): No output
106 |                 1 (default): Print model summary
107 |                 2 (verbose): Show weight and bias layers in full detail
108 |                 Default: 1
109 | 
110 |         *args, **kwargs:
111 |                 Other arguments used in `model.forward` function.
112 | 
113 |     Return:
114 |         ModelStatistics object
115 |                 See torchsummary/model_statistics.py for more information.
116 |     """
117 |     if col_names is None:
118 |         col_names = ("num_params",) if input_data is None else DEFAULT_COLUMN_NAMES
119 | 
120 |     validate_user_params(input_data, col_names, verbose)
121 |     input_size: CORRECTED_INPUT_SIZE_TYPE = []
122 |     summary_list: List[LayerInfo] = []
123 |     hooks: Optional[List[RemovableHandle]] = None if input_data is None else []
124 |     idx: Dict[int, int] = {}
125 |     apply_hooks(model, model, batch_dim, depth, summary_list, idx, hooks)
126 | 
127 |     if input_data is not None:
128 |         if device is None:
129 |             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
130 | 
131 |         x, input_size = process_input_data(input_data, batch_dim, device, dtypes)
132 |         args, kwargs = set_device(args, device), set_device(kwargs, device)
133 |         try:
134 |             with torch.no_grad():
135 |                 _ = model.to(device)(*x, *args, **kwargs)  # type: ignore[misc]
136 |         except Exception as e:
137 |             executed_layers = [layer for layer in summary_list if layer.executed]
138 |             raise RuntimeError(
139 |                 "Failed to run torchsummary. See above stack traces for more details. "
140 |                 "Executed layers up to: {}".format(executed_layers)
141 |             ) from e
142 |         finally:
143 |             if hooks is not None:
144 |                 for hook in hooks:
145 |                     hook.remove()
146 | 
147 |     formatting = FormattingOptions(branching, depth, verbose, col_names, col_width)
148 |     formatting.set_layer_name_width(summary_list)
149 |     results = ModelStatistics(summary_list, input_size, formatting)
150 |     if verbose > Verbosity.QUIET.value:
151 |         print(results)
152 |     return results
153 | 
154 | 
155 | def validate_user_params(
156 |     input_data: INPUT_DATA_TYPE, col_names: Iterable[str], verbose: int
157 | ) -> None:
158 |     """Raise exceptions if the user's input is invalid."""
159 |     if verbose not in (0, 1, 2):
160 |         raise ValueError(
161 |             "Verbose must be either 0 (quiet), 1 (default), or 2 (verbose)."
162 |         )
163 | 
164 |     for col in col_names:
165 |         if col not in HEADER_TITLES.keys():
166 |             raise ValueError(f"Column {col} is not a valid column name.")
167 |         if input_data is None and col not in ("num_params", "kernel_size"):
168 |             raise ValueError(f"You must pass input_data in order to use column {col}")
169 | 
170 | 
171 | def set_device(data: Any, device: torch.device) -> Any:
172 |     """Sets device for all input types and collections of input types."""
173 |     if torch.is_tensor(data):
174 |         return data.to(device, non_blocking=True)
175 | 
176 |     # Recursively apply to collection items
177 |     elem_type = type(data)
178 |     if isinstance(data, Mapping):
179 |         return elem_type({k: set_device(v, device) for k, v in data.items()})
180 |     if isinstance(data, tuple) and hasattr(data, "_fields"):  # Named tuple
181 |         return elem_type(*(set_device(d, device) for d in data))
182 |     if isinstance(data, Iterable) and not isinstance(data, str):
183 |         return elem_type([set_device(d, device) for d in data])
184 |     # Data is neither a tensor nor a collection
185 |     return data
186 | 
187 | 
188 | def process_input_data(
189 |     input_data: INPUT_DATA_TYPE,
190 |     batch_dim: Optional[int],
191 |     device: torch.device,
192 |     dtypes: Optional[List[torch.dtype]],
193 | ) -> Tuple[INPUT_DATA_TYPE, CORRECTED_INPUT_SIZE_TYPE]:
194 |     """Create sample input data and the corrected input size."""
195 |     if isinstance(input_data, torch.Tensor):
196 |         input_size = get_correct_input_sizes(input_data.size())
197 |         x = [input_data.to(device)]
198 | 
199 |     elif isinstance(input_data, (list, tuple)):
200 |         if all(isinstance(data, torch.Tensor) for data in input_data):
201 |             input_sizes = [
202 |                 data.size() for data in input_data  # type: ignore[union-attr]
203 |             ]
204 |             input_size = get_correct_input_sizes(input_sizes)
205 |             x = set_device(input_data, device)
206 |         else:
207 |             if dtypes is None:
208 |                 dtypes = [torch.float] * len(input_data)
209 |             input_size = get_correct_input_sizes(input_data)
210 |             x = get_input_tensor(input_size, batch_dim, dtypes, device)
211 | 
212 |     else:
213 |         raise TypeError(
214 |             "Input type is not recognized. Please ensure input_data is valid.\n"
215 |             "For multiple inputs to the network, ensure input_data passed in is "
216 |             "a sequence of tensors or a list of tuple sizes. If you are having "
217 |             "trouble here, please submit a GitHub issue."
218 |         )
219 | 
220 |     return x, input_size
221 | 
222 | 
223 | def get_input_tensor(
224 |     input_size: CORRECTED_INPUT_SIZE_TYPE,
225 |     batch_dim: Optional[int],
226 |     dtypes: List[torch.dtype],
227 |     device: torch.device,
228 | ) -> List[torch.Tensor]:
229 |     """Get input_tensor with batch size 2 for use in model.forward()"""
230 |     x = []
231 |     for size, dtype in zip(input_size, dtypes):
232 |         # add batch_size of 2 for BatchNorm
233 |         input_tensor = torch.rand(*size)
234 |         if batch_dim is not None:
235 |             input_tensor = input_tensor.unsqueeze(dim=batch_dim)
236 |             input_tensor = torch.cat([input_tensor] * 2, dim=batch_dim)
237 |         x.append(input_tensor.to(device).type(dtype))
238 |     return x
239 | 
240 | 
241 | def get_correct_input_sizes(input_size: INPUT_SIZE_TYPE) -> CORRECTED_INPUT_SIZE_TYPE:
242 |     """
243 |     Convert input_size to the correct form, which is a list of tuples.
244 |     Also handles multiple inputs to the network.
245 |     """
246 | 
247 |     def flatten(nested_array: INPUT_SIZE_TYPE) -> Iterator[Any]:
248 |         """Flattens a nested array."""
249 |         for item in nested_array:
250 |             if isinstance(item, (list, tuple)):
251 |                 yield from flatten(item)
252 |             else:
253 |                 yield item
254 | 
255 |     if not input_size or any(size <= 0 for size in flatten(input_size)):
256 |         raise ValueError("Input_data is invalid, or negative size found in input_data.")
257 | 
258 |     if isinstance(input_size, list) and isinstance(input_size[0], int):
259 |         return [tuple(input_size)]
260 |     if isinstance(input_size, list):
261 |         return input_size
262 |     if isinstance(input_size, tuple) and isinstance(input_size[0], tuple):
263 |         return list(input_size)
264 |     return [input_size]
265 | 
266 | 
267 | def apply_hooks(
268 |     module: nn.Module,
269 |     orig_model: nn.Module,
270 |     batch_dim: Optional[int],
271 |     depth: int,
272 |     summary_list: List[LayerInfo],
273 |     idx: Dict[int, int],
274 |     hooks: Optional[List[RemovableHandle]],
275 |     curr_depth: int = 0,
276 |     parent_info: Optional[LayerInfo] = None,
277 | ) -> None:
278 |     """
279 |     If input_data is provided, recursively adds hooks to all layers of the model.
280 |     Else, fills summary_list with layer info without computing a
281 |     forward pass through the network.
282 |     """
283 |     # Fallback is used if the layer's hook is never called, in ModuleLists, for example.
284 |     info = LayerInfo(module, curr_depth, None, parent_info)
285 | 
286 |     def pre_hook(module: nn.Module, inputs: Any) -> None:
287 |         """Create a LayerInfo object to aggregate information about that layer."""
288 |         del inputs
289 |         nonlocal info
290 |         idx[curr_depth] = idx.get(curr_depth, 0) + 1
291 |         info = LayerInfo(module, curr_depth, idx[curr_depth], parent_info)
292 |         info.check_recursive(summary_list)
293 |         summary_list.append(info)
294 | 
295 |     def hook(module: nn.Module, inputs: Any, outputs: Any) -> None:
296 |         """Update LayerInfo after forward pass."""
297 |         del module
298 |         info.input_size = info.calculate_size(inputs, batch_dim)
299 |         info.output_size = info.calculate_size(outputs, batch_dim)
300 |         info.calculate_macs()
301 |         info.executed = True
302 | 
303 |     submodules = [m for m in module.modules() if m is not orig_model]
304 |     if module != orig_model or isinstance(module, LAYER_MODULES) or not submodules:
305 |         if hooks is None:
306 |             pre_hook(module, None)
307 |         else:
308 |             hooks.append(module.register_forward_pre_hook(pre_hook))
309 |             hooks.append(module.register_forward_hook(hook))
310 | 
311 |     if curr_depth <= depth:
312 |         for child in module.children():
313 |             apply_hooks(
314 |                 child,
315 |                 orig_model,
316 |                 batch_dim,
317 |                 depth,
318 |                 summary_list,
319 |                 idx,
320 |                 hooks,
321 |                 curr_depth + 1,
322 |                 info,
323 |             )
324 | 


--------------------------------------------------------------------------------