├── README.md ├── CoordConv-numpy.py └── CoordConv.py /README.md: -------------------------------------------------------------------------------- 1 | # CoordConv-pytorch 2 | Pytorch implementation of CoordConv introduced in _An intriguing failing of convolutional neural networks and the CoordConv solution_ [paper](https://arxiv.org/pdf/1807.03247.pdf) 3 | -------------------------------------------------------------------------------- /CoordConv-numpy.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | 5 | class AddCoordsNp(): 6 | """Add coords to a tensor""" 7 | def __init__(self, x_dim=64, y_dim=64, with_r=False): 8 | self.x_dim = x_dim 9 | self.y_dim = y_dim 10 | self.with_r = with_r 11 | 12 | def call(self, input_tensor): 13 | """ 14 | input_tensor: (batch, x_dim, y_dim, c) 15 | """ 16 | batch_size_tensor = np.shape(input_tensor)[0] 17 | 18 | xx_ones = np.ones([1, self.x_dim], dtype=np.int32) 19 | xx_ones = np.expand_dims(xx_ones, -1) 20 | 21 | print(xx_ones.shape) 22 | 23 | xx_range = np.expand_dims(np.arange(self.x_dim), 0) 24 | xx_range = np.expand_dims(xx_range, 1) 25 | 26 | print(xx_range.shape) 27 | 28 | xx_channel = np.matmul(xx_ones, xx_range) 29 | xx_channel = np.expand_dims(xx_channel, -1) 30 | 31 | yy_ones = np.ones([1, self.y_dim], dtype=np.int32) 32 | yy_ones = np.expand_dims(yy_ones, 1) 33 | 34 | print(yy_ones.shape) 35 | 36 | yy_range = np.expand_dims(np.arange(self.y_dim), 0) 37 | yy_range = np.expand_dims(yy_range, -1) 38 | 39 | print(yy_range.shape) 40 | 41 | yy_channel = np.matmul(yy_range, yy_ones) 42 | yy_channel = np.expand_dims(yy_channel, -1) 43 | 44 | xx_channel = xx_channel.astype('float32') / (self.x_dim - 1) 45 | yy_channel = yy_channel.astype('float32') / (self.y_dim - 1) 46 | 47 | xx_channel = xx_channel*2 - 1 48 | yy_channel = yy_channel*2 - 1 49 | 50 | xx_channel = xx_channel.repeat(batch_size_tensor, axis=0) 51 | yy_channel = yy_channel.repeat(batch_size_tensor, axis=0) 52 | 53 | ret = np.concatenate([input_tensor, xx_channel, yy_channel], axis=-1) 54 | 55 | if self.with_r: 56 | rr = np.sqrt( np.square(xx_channel-0.5) + np.square(yy_channel-0.5)) 57 | ret = np.concatenate([ret, rr], axis=-1) 58 | 59 | return ret 60 | -------------------------------------------------------------------------------- /CoordConv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class AddCoordsTh(nn.Module): 6 | def __init__(self, x_dim=64, y_dim=64, with_r=False): 7 | super(AddCoordsTh, self).__init__() 8 | self.x_dim = x_dim 9 | self.y_dim = y_dim 10 | self.with_r = with_r 11 | 12 | def forward(self, input_tensor): 13 | """ 14 | input_tensor: (batch, c, x_dim, y_dim) 15 | """ 16 | batch_size_tensor = input_tensor.shape[0] 17 | 18 | xx_ones = torch.ones([1, self.y_dim], dtype=torch.int32) 19 | xx_ones = xx_ones.unsqueeze(-1) 20 | 21 | xx_range = torch.arange(self.x_dim, dtype=torch.int32).unsqueeze(0) 22 | xx_range = xx_range.unsqueeze(1) 23 | 24 | xx_channel = torch.matmul(xx_ones, xx_range) 25 | xx_channel = xx_channel.unsqueeze(-1) 26 | 27 | yy_ones = torch.ones([1, self.x_dim], dtype=torch.int32) 28 | yy_ones = yy_ones.unsqueeze(1) 29 | 30 | yy_range = torch.arange(self.y_dim, dtype=torch.int32).unsqueeze(0) 31 | yy_range = yy_range.unsqueeze(-1) 32 | 33 | yy_channel = torch.matmul(yy_range, yy_ones) 34 | yy_channel = yy_channel.unsqueeze(-1) 35 | 36 | xx_channel = xx_channel.permute(0, 3, 2, 1) 37 | yy_channel = yy_channel.permute(0, 3, 2, 1) 38 | 39 | xx_channel = xx_channel.float() / (self.x_dim - 1) 40 | yy_channel = yy_channel.float() / (self.y_dim - 1) 41 | 42 | xx_channel = xx_channel * 2 - 1 43 | yy_channel = yy_channel * 2 - 1 44 | 45 | xx_channel = xx_channel.repeat(batch_size_tensor, 1, 1, 1) 46 | yy_channel = yy_channel.repeat(batch_size_tensor, 1, 1, 1) 47 | 48 | ret = torch.cat([input_tensor, xx_channel, yy_channel], dim=1) 49 | 50 | if self.with_r: 51 | rr = torch.sqrt(torch.pow(xx_channel - 0.5, 2) + torch.pow(yy_channel - 0.5, 2)) 52 | ret = torch.cat([ret, rr], dim=1) 53 | 54 | return ret 55 | 56 | 57 | class CoordConvTh(nn.Module): 58 | """CoordConv layer as in the paper.""" 59 | def __init__(self, x_dim, y_dim, with_r, *args, **kwargs): 60 | super(CoordConvTh, self).__init__() 61 | self.addcoords = AddCoordsTh(x_dim=x_dim, y_dim=y_dim, with_r=with_r) 62 | self.conv = nn.Conv2d(*args, **kwargs) 63 | 64 | def forward(self, input_tensor): 65 | ret = self.addcoords(input_tensor) 66 | ret = self.conv(ret) 67 | return ret 68 | 69 | 70 | ''' 71 | An alternative implementation for PyTorch with auto-infering the x-y dimensions. 72 | ''' 73 | class AddCoords(nn.Module): 74 | 75 | def __init__(self, with_r=False): 76 | super().__init__() 77 | self.with_r = with_r 78 | 79 | def forward(self, input_tensor): 80 | """ 81 | Args: 82 | input_tensor: shape(batch, channel, x_dim, y_dim) 83 | """ 84 | batch_size, _, x_dim, y_dim = input_tensor.size() 85 | 86 | xx_channel = torch.arange(x_dim).repeat(1, y_dim, 1) 87 | yy_channel = torch.arange(y_dim).repeat(1, x_dim, 1).transpose(1, 2) 88 | 89 | xx_channel = xx_channel.float() / (x_dim - 1) 90 | yy_channel = yy_channel.float() / (y_dim - 1) 91 | 92 | xx_channel = xx_channel * 2 - 1 93 | yy_channel = yy_channel * 2 - 1 94 | 95 | xx_channel = xx_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3) 96 | yy_channel = yy_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3) 97 | 98 | ret = torch.cat([ 99 | input_tensor, 100 | xx_channel.type_as(input_tensor), 101 | yy_channel.type_as(input_tensor)], dim=1) 102 | 103 | if self.with_r: 104 | rr = torch.sqrt(torch.pow(xx_channel.type_as(input_tensor) - 0.5, 2) + torch.pow(yy_channel.type_as(input_tensor) - 0.5, 2)) 105 | ret = torch.cat([ret, rr], dim=1) 106 | 107 | return ret 108 | 109 | 110 | class CoordConv(nn.Module): 111 | 112 | def __init__(self, in_channels, out_channels, with_r=False, **kwargs): 113 | super().__init__() 114 | self.addcoords = AddCoords(with_r=with_r) 115 | in_size = in_channels+2 116 | if with_r: 117 | in_size += 1 118 | self.conv = nn.Conv2d(in_size, out_channels, **kwargs) 119 | 120 | def forward(self, x): 121 | ret = self.addcoords(x) 122 | ret = self.conv(ret) 123 | return ret 124 | 125 | --------------------------------------------------------------------------------