├── LICENSE ├── README.md ├── images └── invo.png ├── involution.py └── test.ipynb /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Shuchen Du 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # involution.pytorch ([内卷](https://zh.wikipedia.org/wiki/%E5%86%85%E5%8D%B7%E5%8C%96)) 2 | A PyTorch implementation of Involution using [einops](https://github.com/arogozhnikov/einops) 3 | 4 | This is an unofficial pytorch implementation of involution [paper](https://arxiv.org/pdf/2103.06255.pdf). Official implementation can be found [here](https://github.com/d-li14/involution). 5 | 6 | 7 | 8 | ## Features 9 | - This layer can deal with arbitrary input and output channels, kernel sizes, strides and reduction ratios. However, input channels should be divisible by groups. 10 | 11 | 12 | ## Requirements 13 | ``` 14 | pytorch >= 1.4.0 15 | einops >= 0.3.0 16 | ``` 17 | 18 | ## Usage 19 | * An example as in [test.ipynb](https://github.com/shuuchen/involution.pytorch/blob/main/test.ipynb): 20 | ```shell 21 | >>> import torch 22 | >>> from involution import Involution 23 | >>> 24 | >>> x = torch.rand(2,8,5,5) 25 | >>> i = Involution(in_channels=8, out_channels=4, groups=4, 26 | >>> kernel_size=3, stride=2, reduction_ratio=2) 27 | >>> i(x).size() 28 | torch.Size([2, 4, 3, 3]) 29 | ``` 30 | 31 | ## TODO 32 | - [ ] ImageNet training & performance checking 33 | 34 | 35 | ## References 36 | - [official implementation](https://github.com/d-li14/involution/blob/main/cls/mmcls/models/utils/involution_naive.py) 37 | - [original paper](https://arxiv.org/pdf/2103.06255.pdf) 38 | - [einops](https://github.com/arogozhnikov/einops) 39 | - [利用Pytorch实现卷积操作](https://zhuanlan.zhihu.com/p/349683405) 40 | -------------------------------------------------------------------------------- /images/invo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shuuchen/involution.pytorch/adde46aff4e28068962ccd481e6dc33b5efa5d73/images/invo.png -------------------------------------------------------------------------------- /involution.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from einops import rearrange 3 | from torch import nn 4 | from torch.nn import functional as F 5 | 6 | 7 | class Involution(nn.Module): 8 | """ 9 | Implementation of `Involution: Inverting the Inherence of Convolution for Visual Recognition`. 10 | """ 11 | def __init__(self, in_channels, out_channels, groups=1, kernel_size=3, stride=1, reduction_ratio=2): 12 | 13 | super().__init__() 14 | 15 | channels_reduced = max(1, in_channels // reduction_ratio) 16 | padding = kernel_size // 2 17 | 18 | self.reduce = nn.Sequential( 19 | nn.Conv2d(in_channels, channels_reduced, 1), 20 | nn.BatchNorm2d(channels_reduced), 21 | nn.ReLU(inplace=True)) 22 | 23 | self.span = nn.Conv2d(channels_reduced, kernel_size * kernel_size * groups, 1) 24 | self.unfold = nn.Unfold(kernel_size, padding=padding, stride=stride) 25 | 26 | self.resampling = None if in_channels == out_channels else nn.Conv2d(in_channels, out_channels, 1) 27 | 28 | self.kernel_size = kernel_size 29 | self.stride = stride 30 | self.padding = padding 31 | self.groups = groups 32 | 33 | @classmethod 34 | def get_name(cls): 35 | """ 36 | Return this layer name. 37 | 38 | Returns: 39 | str: layer name. 40 | """ 41 | return 'Involution' 42 | 43 | def forward(self, input_tensor): 44 | """ 45 | Calculate Involution. 46 | 47 | override function from PyTorch. 48 | """ 49 | _, _, height, width = input_tensor.size() 50 | if self.stride > 1: 51 | out_size = lambda x: (x + 2 * self.padding - self.kernel_size) // self.stride + 1 52 | height, width = out_size(height), out_size(width) 53 | uf_x = rearrange(self.unfold(input_tensor), 'b (g d k j) (h w) -> b g d (k j) h w', 54 | g=self.groups, k=self.kernel_size, j=self.kernel_size, h=height, w=width) 55 | 56 | if self.stride > 1: 57 | input_tensor = F.adaptive_avg_pool2d(input_tensor, (height, width)) 58 | kernel = rearrange(self.span(self.reduce(input_tensor)), 'b (k j g) h w -> b g (k j) h w', 59 | k=self.kernel_size, j=self.kernel_size) 60 | 61 | out = rearrange(torch.einsum('bgdxhw, bgxhw -> bgdhw', uf_x, kernel), 'b g d h w -> b (g d) h w') 62 | 63 | if self.resampling: 64 | out = self.resampling(out) 65 | 66 | return out.contiguous() -------------------------------------------------------------------------------- /test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "from torch import nn\n", 11 | "from involution import Involution" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/plain": [ 22 | "torch.Size([2, 4, 3, 3])" 23 | ] 24 | }, 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "output_type": "execute_result" 28 | } 29 | ], 30 | "source": [ 31 | "x = torch.rand(2,8,5,5)\n", 32 | "i = Involution(in_channels=8, out_channels=4, groups=4, kernel_size=3, stride=2, reduction_ratio=2)\n", 33 | "i(x).size()" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "x = torch.rand(2,8,10,11)\n", 43 | "y = torch.rand(2,3,4,4)\n", 44 | "i = Involution(8,3,stride=3, reduction_ratio=4)\n", 45 | "l = nn.L1Loss()(i(x), y)\n", 46 | "l.backward()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 4, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/plain": [ 57 | "True" 58 | ] 59 | }, 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "i(x).is_contiguous()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.7.6" 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 5 98 | } 99 | --------------------------------------------------------------------------------