├── CNN_ConvLSTM ├── model_build.py └── utils │ ├── __pycache__ │ ├── convlstm.cpython-37.pyc │ └── resnet.cpython-37.pyc │ ├── convlstm.py │ └── resnet.py ├── LICENSE └── README.md /CNN_ConvLSTM/model_build.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.models as models 3 | from torch import nn 4 | from utils.convlstm import ConvLSTM 5 | from utils.resnet import * 6 | 7 | class ConvLSTM_net(nn.Module): 8 | def __init__(self, num_hiddens, num_classes): 9 | super(ConvLSTM_net, self).__init__() # 继承父类的初始化 10 | 11 | self.convlstm = ConvLSTM(input_size=(7, 7), input_dim=64, hidden_dim=num_hiddens, kernel_size=(3, 3), 12 | num_layers=2, batch_first=True, \ 13 | bias=True, return_all_layers=False) 14 | 15 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) 16 | 17 | # 输出shape为(seq_len*batch_size,num_hiddens) 18 | self.classifier_convlstm = nn.Sequential( 19 | nn.Linear(num_hiddens, num_classes, bias=False), ##输出shape为(seq_len*batch_size,num_classes) 20 | ) 21 | 22 | def forward(self, x): 23 | conv_lstm_output, _ = self.convlstm(x) # list,[shape(1,10,64,7,7)] 24 | conv_lstm_output = conv_lstm_output[0][:, -1, ...].squeeze(dim=1) # shape(1,64,7,7) 25 | avgpool = self.avgpool(conv_lstm_output) # shape(1,64,1,1) 26 | avgpool = avgpool.view(avgpool.size(0), -1) 27 | output = self.classifier_convlstm(avgpool) # shape(1,num_classes) 28 | 29 | return output 30 | 31 | 32 | 33 | class Resnet_ConvLSTM(nn.Module): 34 | def __init__(self, num_hiddens, num_classes): 35 | super(Resnet_ConvLSTM, self).__init__() # 继承父类的初始化 36 | 37 | model = models.resnet50(pretrained=True) 38 | net = nn.Sequential() 39 | net.add_module('conv1', model.conv1) 40 | net.add_module('bn1', model.bn1) 41 | net.add_module('relu', model.relu) 42 | net.add_module('maxpool', model.maxpool) 43 | net.add_module('layer1', model.layer1) 44 | net.add_module('layer2', model.layer2) 45 | net.add_module('layer3', model.layer3) 46 | net.add_module('layer4', model.layer4) 47 | self.cnn = net # 输出shape为(batch_size,2048,7,7) 48 | 49 | self.cnn_layer = nn.Sequential(nn.Conv2d(2048, 64, kernel_size=1), 50 | nn.ReLU(inplace=True)) 51 | 52 | self.convlstm = ConvLSTM(input_size=(7, 7), input_dim=64, hidden_dim=num_hiddens, kernel_size=(3, 3), 53 | num_layers=2, batch_first=True, \ 54 | bias=True, return_all_layers=False) 55 | 56 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) 57 | 58 | # 输出shape为(seq_len*batch_size,num_hiddens) 59 | self.classifier_convlstm = nn.Sequential( 60 | nn.Linear(num_hiddens, num_classes, bias=False), ##输出shape为(seq_len*batch_size,num_classes) 61 | ) 62 | 63 | def forward(self, x): 64 | batch_size, time_steps, channels, height, width = x.size() 65 | c_in = x.view(batch_size * time_steps, channels, height, width) # (img_num,3,224,224) 66 | feature_map = self.cnn(c_in) # (img_num,2048,7,7) 67 | feature_map = self.cnn_layer(feature_map) # (img_num,64,7,7) 68 | conv_lstm_input = feature_map.unsqueeze(dim=0) # (1,img_num,64,7,7) 69 | conv_lstm_output, _ = self.convlstm(conv_lstm_input) # list,[shape(1,img_num,64,7,7)] 70 | conv_lstm_output = conv_lstm_output[0][:, -1, ...].squeeze(dim=1) # shape(1,64,7,7) 71 | avgpool = self.avgpool(conv_lstm_output) # shape(1,64,1,1) 72 | avgpool = avgpool.view(avgpool.size(0), -1) 73 | output = self.classifier_convlstm(avgpool) # shape(1,num_classes) 74 | 75 | return output 76 | 77 | class ChannelAttention(nn.Module): 78 | def __init__(self, in_planes, ratio=16): 79 | super(ChannelAttention, self).__init__() 80 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 81 | self.max_pool = nn.AdaptiveMaxPool2d(1) 82 | 83 | self.fc1 = nn.Conv2d(in_planes, in_planes // 16, 1, bias=False) 84 | self.relu1 = nn.ReLU() 85 | self.fc2 = nn.Conv2d(in_planes // 16, in_planes, 1, bias=False) 86 | 87 | self.sigmoid = nn.Sigmoid() 88 | 89 | def forward(self, x): 90 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 91 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 92 | out = avg_out + max_out 93 | out = self.sigmoid(out) 94 | return out*x 95 | 96 | class SpatialAttention(nn.Module): 97 | def __init__(self, kernel_size=7): 98 | super(SpatialAttention, self).__init__() 99 | 100 | assert kernel_size in (3, 7), 'kernel size must be 3 or 7' 101 | padding = 3 if kernel_size == 7 else 1 102 | 103 | self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) 104 | self.sigmoid = nn.Sigmoid() 105 | 106 | def forward(self, x): 107 | avg_out = torch.mean(x, dim=1, keepdim=True) 108 | max_out, _ = torch.max(x, dim=1, keepdim=True) 109 | map = torch.cat([avg_out, max_out], dim=1) 110 | map = self.conv1(map) 111 | map = self.sigmoid(map) 112 | x = map*x 113 | 114 | return x 115 | 116 | class Crashattention(nn.Module) : 117 | def __init__(self, kernel_size=None,in_planes=None): 118 | super(Crashattention, self).__init__() 119 | self.ChannelAttention = ChannelAttention(in_planes=in_planes) 120 | self.SpatialAttention = SpatialAttention(kernel_size=kernel_size) 121 | self.conv = nn.Conv2d(4096, 2048,1 ) 122 | def forward(self,x): 123 | x = self.ChannelAttention(x) 124 | x1 = self.SpatialAttention(x) 125 | output = torch.cat((x,x1),1) 126 | output = self.conv(output) 127 | return output 128 | 129 | 130 | class ResNet_CrashAttention(nn.Module): 131 | def __init__(self, num_classes): 132 | super( ResNet_CrashAttention, self).__init__() 133 | crash_attention = Crashattention(7,2048) 134 | model = models.resnet50(pretrained=True) 135 | net = nn.Sequential() 136 | net.add_module('conv1', model.conv1) 137 | net.add_module('bn1', model.bn1) 138 | net.add_module('relu', model.relu) 139 | net.add_module('maxpool', model.maxpool) 140 | net.add_module('layer1', model.layer1) 141 | net.add_module('layer2', model.layer2) 142 | net.add_module('layer3', model.layer3) 143 | net.add_module('layer4', model.layer4) 144 | net.add_module('crashattention',crash_attention) 145 | self.cnn = net # 输出shape为(batch_size,2048,7,7) 146 | 147 | self.cnn_layer = nn.Sequential(nn.Conv2d(2048, 64, kernel_size=1), 148 | nn.ReLU(inplace=True)) 149 | self.dense = nn.Sequential( 150 | nn.Linear(in_features=7 * 7 * 64, out_features=512, bias=True), 151 | nn.ReLU(inplace=True), 152 | nn.Dropout(0.2), 153 | nn.Linear(512, num_classes) 154 | ) 155 | 156 | def forward(self, x): 157 | x = self.cnn(x) # (img_num,2048,7,7) 158 | print(x.shape) 159 | x = self.cnn_layer(x) # (img_num,64,7,7) 160 | x = self.dense(x.view(x.size(0), -1)) 161 | return x 162 | 163 | class Resnet_crashattention_ConvLSTM(nn.Module): 164 | def __init__(self, num_hiddens, num_classes): 165 | super(Resnet_crashattention_ConvLSTM, self).__init__() 166 | crash_attention = Crashattention(7, 2048) 167 | model = models.resnet50(pretrained=True) 168 | net = nn.Sequential() 169 | net.add_module('conv1', model.conv1) 170 | net.add_module('bn1', model.bn1) 171 | net.add_module('relu', model.relu) 172 | net.add_module('maxpool', model.maxpool) 173 | net.add_module('layer1', model.layer1) 174 | net.add_module('layer2', model.layer2) 175 | net.add_module('layer3', model.layer3) 176 | net.add_module('layer4', model.layer4) 177 | net.add_module('crashattention', crash_attention) 178 | self.cnn = net # 输出shape为(batch_size,2048,7,7) 179 | 180 | self.cnn_layer = nn.Sequential(nn.Conv2d(2048, 64, kernel_size=1), 181 | nn.ReLU(inplace=True)) 182 | 183 | self.convlstm = ConvLSTM(input_size=(7, 7), input_dim=64, hidden_dim=num_hiddens, kernel_size=(3, 3), 184 | num_layers=2, batch_first=True, 185 | bias=True, return_all_layers=False) 186 | 187 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) 188 | 189 | self.classifier_convlstm = nn.Sequential( 190 | nn.Linear(num_hiddens, num_classes, bias=False), 191 | ) 192 | 193 | def forward(self, x): 194 | batch_size, time_steps, channels, height, width = x.size() 195 | c_in = x.view(batch_size * time_steps, channels, height, width) # (img_num,3,224,224) 196 | feature_map = self.cnn(c_in) # (img_num,2048,7,7) 197 | feature_map = self.cnn_layer(feature_map) # (img_num,64,7,7) 198 | conv_lstm_input = feature_map.view(batch_size, time_steps, 64, 7, 7) # (batch_size, time_steps, 64, 7, 7) 199 | conv_lstm_output, _ = self.convlstm(conv_lstm_input) # list,[shape(batch_size,time_steps,64,7,7)] 200 | conv_lstm_output = conv_lstm_output[0][:, -1, ...] # shape(batch_size,64,7,7) 201 | avgpool = self.avgpool(conv_lstm_output) # shape(batch_size,64,1,1) 202 | avgpool = avgpool.view(avgpool.size(0), -1) 203 | output = self.classifier_convlstm(avgpool) # shape(batch_size,num_classes) 204 | 205 | return output 206 | 207 | if __name__ == '__main__': 208 | model = Resnet_crashattention_ConvLSTM(64, 10) 209 | model = model.cuda() 210 | input = torch.randn((2, 10, 3, 224, 224)).cuda() 211 | output = model(input) 212 | print(output.shape) 213 | -------------------------------------------------------------------------------- /CNN_ConvLSTM/utils/__pycache__/convlstm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vvgoder/ResNet_ConvLSTM/9b6fe905dbcc10a1e89d261471d8bf3c0dc2bf4a/CNN_ConvLSTM/utils/__pycache__/convlstm.cpython-37.pyc -------------------------------------------------------------------------------- /CNN_ConvLSTM/utils/__pycache__/resnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vvgoder/ResNet_ConvLSTM/9b6fe905dbcc10a1e89d261471d8bf3c0dc2bf4a/CNN_ConvLSTM/utils/__pycache__/resnet.cpython-37.pyc -------------------------------------------------------------------------------- /CNN_ConvLSTM/utils/convlstm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Variable 3 | import torch 4 | 5 | 6 | class ConvLSTMCell(nn.Module): 7 | 8 | def __init__(self, input_size, input_dim, hidden_dim, kernel_size, bias): 9 | """ 10 | Initialize ConvLSTM cell. 11 | Parameters 12 | ---------- 13 | input_size: (int, int) 14 | Height and width of input tensor as (height, width). 15 | input_dim: int 16 | Number of channels of input tensor. 17 | hidden_dim: int 18 | Number of channels of hidden state. 19 | kernel_size: (int, int) 20 | Size of the convolutional kernel. 21 | bias: bool 22 | Whether or not to add the bias. 23 | """ 24 | 25 | super(ConvLSTMCell, self).__init__() 26 | 27 | self.height, self.width = input_size 28 | self.input_dim = input_dim 29 | self.hidden_dim = hidden_dim 30 | 31 | self.kernel_size = kernel_size 32 | self.padding = kernel_size[0] // 2, kernel_size[1] // 2 33 | self.bias = bias 34 | 35 | self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim, 36 | out_channels=4 * self.hidden_dim, #输出为4*hidden_dim,后面拆成四个部分 37 | kernel_size=self.kernel_size, 38 | padding=self.padding, 39 | bias=self.bias) 40 | 41 | def forward(self, input_tensor, cur_state): 42 | 43 | h_cur, c_cur = cur_state 44 | 45 | combined = torch.cat([input_tensor, h_cur], dim=1) # concatenate along channel axis 46 | 47 | combined_conv = self.conv(combined) 48 | # 输入门,遗忘门,输出门,候选记忆细胞 49 | cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1) 50 | i = torch.sigmoid(cc_i) 51 | f = torch.sigmoid(cc_f) 52 | o = torch.sigmoid(cc_o) 53 | g = torch.tanh(cc_g) 54 | 55 | c_next = f * c_cur + i * g 56 | h_next = o * torch.tanh(c_next) 57 | 58 | return h_next, c_next 59 | 60 | def init_hidden(self, batch_size): 61 | return (Variable(torch.zeros(batch_size, self.hidden_dim, self.height, self.width)).cuda(), 62 | Variable(torch.zeros(batch_size, self.hidden_dim, self.height, self.width)).cuda()) 63 | 64 | 65 | class ConvLSTM(nn.Module): 66 | 67 | def __init__(self, input_size, input_dim, hidden_dim, kernel_size, num_layers, 68 | batch_first=False, bias=True, return_all_layers=False): 69 | super(ConvLSTM, self).__init__() 70 | 71 | self._check_kernel_size_consistency(kernel_size) 72 | 73 | # Make sure that both `kernel_size` and `hidden_dim` are lists having len == num_layers 74 | kernel_size = self._extend_for_multilayer(kernel_size, num_layers) 75 | hidden_dim = self._extend_for_multilayer(hidden_dim, num_layers) 76 | if not len(kernel_size) == len(hidden_dim) == num_layers: 77 | raise ValueError('Inconsistent list length.') 78 | 79 | self.height, self.width = input_size 80 | 81 | self.input_dim = input_dim 82 | self.hidden_dim = hidden_dim 83 | self.kernel_size = kernel_size 84 | self.num_layers = num_layers 85 | self.batch_first = batch_first 86 | self.bias = bias 87 | self.return_all_layers = return_all_layers 88 | 89 | cell_list = [] 90 | for i in range(0, self.num_layers): 91 | cur_input_dim = self.input_dim if i == 0 else self.hidden_dim[i-1] 92 | 93 | cell_list.append(ConvLSTMCell(input_size=(self.height, self.width), 94 | input_dim=cur_input_dim, 95 | hidden_dim=self.hidden_dim[i], 96 | kernel_size=self.kernel_size[i], 97 | bias=self.bias)) 98 | 99 | self.cell_list = nn.ModuleList(cell_list) 100 | 101 | def forward(self, input_tensor, hidden_state=None): 102 | """ 103 | 104 | Parameters 105 | ---------- 106 | input_tensor: todo 107 | 5-D Tensor either of shape (t, b, c, h, w) or (b, t, c, h, w) 108 | hidden_state: todo 109 | None. todo implement stateful 110 | 111 | Returns 112 | ------- 113 | last_state_list, layer_output 114 | """ 115 | if not self.batch_first: 116 | # (t, b, c, h, w) -> (b, t, c, h, w) 117 | input_tensor = input_tensor.permute(1, 0, 2, 3, 4) 118 | 119 | # Implement stateful ConvLSTM 120 | if hidden_state is not None: 121 | raise NotImplementedError() 122 | else: 123 | hidden_state = self._init_hidden(batch_size=input_tensor.size(0)) 124 | 125 | layer_output_list = [] 126 | last_state_list = [] 127 | 128 | seq_len = input_tensor.size(1) 129 | cur_layer_input = input_tensor 130 | 131 | for layer_idx in range(self.num_layers): 132 | # 层数 133 | 134 | h, c = hidden_state[layer_idx] 135 | output_inner = [] 136 | for t in range(seq_len): 137 | # 序列长度 138 | 139 | h, c = self.cell_list[layer_idx](input_tensor=cur_layer_input[:, t, :, :, :], 140 | cur_state=[h, c]) 141 | output_inner.append(h) 142 | 143 | layer_output = torch.stack(output_inner, dim=1) 144 | cur_layer_input = layer_output 145 | 146 | layer_output_list.append(layer_output) 147 | last_state_list.append([h, c]) 148 | 149 | if not self.return_all_layers: 150 | layer_output_list = layer_output_list[-1:] 151 | last_state_list = last_state_list[-1:] 152 | 153 | return layer_output_list, last_state_list 154 | 155 | def _init_hidden(self, batch_size): 156 | init_states = [] 157 | for i in range(self.num_layers): 158 | init_states.append(self.cell_list[i].init_hidden(batch_size)) 159 | return init_states 160 | 161 | @staticmethod 162 | def _check_kernel_size_consistency(kernel_size): 163 | if not (isinstance(kernel_size, tuple) or 164 | (isinstance(kernel_size, list) and all([isinstance(elem, tuple) for elem in kernel_size]))): 165 | raise ValueError('`kernel_size` must be tuple or list of tuples') 166 | 167 | @staticmethod 168 | def _extend_for_multilayer(param, num_layers): 169 | if not isinstance(param, list): 170 | param = [param] * num_layers 171 | return param 172 | -------------------------------------------------------------------------------- /CNN_ConvLSTM/utils/resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 4 | 5 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 6 | 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 7 | 'wide_resnet50_2', 'wide_resnet101_2'] 8 | 9 | 10 | model_urls = { 11 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 12 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 13 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 14 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 15 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 16 | 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', 17 | 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', 18 | 'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth', 19 | 'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth', 20 | } 21 | 22 | def nonlocalnet(input_layer,input_channel): 23 | if torch.cuda.is_available(): 24 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 25 | net = NONLocalBlock2D(in_channels=input_channel) 26 | out = net(input_layer) 27 | else: 28 | net = NONLocalBlock2D(in_channels=input_channel) 29 | out = net(input_layer) 30 | return out 31 | 32 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 33 | """3x3 convolution with padding""" 34 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 35 | padding=dilation, groups=groups, bias=False, dilation=dilation) 36 | 37 | 38 | def conv1x1(in_planes, out_planes, stride=1): 39 | """1x1 convolution""" 40 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 41 | 42 | 43 | class BasicBlock(nn.Module): 44 | expansion = 1 45 | __constants__ = ['downsample'] 46 | 47 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 48 | base_width=64, dilation=1, norm_layer=None): 49 | super(BasicBlock, self).__init__() 50 | if norm_layer is None: 51 | norm_layer = nn.BatchNorm2d 52 | if groups != 1 or base_width != 64: 53 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 54 | if dilation > 1: 55 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 56 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 57 | self.conv1 = conv3x3(inplanes, planes, stride) 58 | self.bn1 = norm_layer(planes) 59 | self.relu = nn.ReLU(inplace=True) 60 | self.conv2 = conv3x3(planes, planes) 61 | self.bn2 = norm_layer(planes) 62 | self.downsample = downsample 63 | self.stride = stride 64 | 65 | def forward(self, x): 66 | identity = x 67 | 68 | out = self.conv1(x) 69 | out = self.bn1(out) 70 | out = self.relu(out) 71 | 72 | out = self.conv2(out) 73 | out = self.bn2(out) 74 | 75 | if self.downsample is not None: 76 | identity = self.downsample(x) 77 | 78 | out += identity 79 | out = self.relu(out) 80 | 81 | return out 82 | 83 | 84 | class Bottleneck(nn.Module): 85 | expansion = 4 86 | __constants__ = ['downsample'] 87 | 88 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 89 | base_width=64, dilation=1, norm_layer=None): 90 | super(Bottleneck, self).__init__() 91 | if norm_layer is None: 92 | norm_layer = nn.BatchNorm2d 93 | width = int(planes * (base_width / 64.)) * groups 94 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 95 | self.conv1 = conv1x1(inplanes, width) 96 | self.bn1 = norm_layer(width) 97 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 98 | self.bn2 = norm_layer(width) 99 | self.conv3 = conv1x1(width, planes * self.expansion) 100 | self.bn3 = norm_layer(planes * self.expansion) 101 | self.relu = nn.ReLU(inplace=True) 102 | self.downsample = downsample 103 | self.stride = stride 104 | 105 | 106 | def forward(self, x): 107 | identity = x 108 | 109 | out = self.conv1(x) 110 | out = self.bn1(out) 111 | out = self.relu(out) 112 | 113 | out = self.conv2(out) 114 | out = self.bn2(out) 115 | out = self.relu(out) 116 | 117 | out = self.conv3(out) 118 | out = self.bn3(out) 119 | 120 | if self.downsample is not None: 121 | identity = self.downsample(x) 122 | 123 | out += identity 124 | out = self.relu(out) 125 | 126 | return out 127 | 128 | 129 | class ResNet(nn.Module): 130 | 131 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, 132 | groups=1, width_per_group=64, replace_stride_with_dilation=None, 133 | norm_layer=None): 134 | super(ResNet, self).__init__() 135 | if norm_layer is None: 136 | norm_layer = nn.BatchNorm2d 137 | self._norm_layer = norm_layer 138 | 139 | self.inplanes = 64 140 | self.dilation = 1 141 | if replace_stride_with_dilation is None: 142 | # each element in the tuple indicates if we should replace 143 | # the 2x2 stride with a dilated convolution instead 144 | replace_stride_with_dilation = [False, False, False] 145 | if len(replace_stride_with_dilation) != 3: 146 | raise ValueError("replace_stride_with_dilation should be None " 147 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 148 | self.groups = groups 149 | self.base_width = width_per_group 150 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 151 | bias=False) 152 | self.bn1 = norm_layer(self.inplanes) 153 | self.relu = nn.ReLU(inplace=True) 154 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 155 | self.layer1 = self._make_layer(block, 64, layers[0]) 156 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 157 | dilate=replace_stride_with_dilation[0]) 158 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 159 | dilate=replace_stride_with_dilation[1]) 160 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 161 | dilate=replace_stride_with_dilation[2]) 162 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 163 | self.fc = nn.Linear(512 * block.expansion, num_classes) 164 | 165 | for m in self.modules(): 166 | if isinstance(m, nn.Conv2d): 167 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 168 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 169 | nn.init.constant_(m.weight, 1) 170 | nn.init.constant_(m.bias, 0) 171 | 172 | # Zero-initialize the last BN in each residual branch, 173 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 174 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 175 | if zero_init_residual: 176 | for m in self.modules(): 177 | if isinstance(m, Bottleneck): 178 | nn.init.constant_(m.bn3.weight, 0) 179 | elif isinstance(m, BasicBlock): 180 | nn.init.constant_(m.bn2.weight, 0) 181 | 182 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 183 | norm_layer = self._norm_layer 184 | downsample = None 185 | previous_dilation = self.dilation 186 | if dilate: 187 | self.dilation *= stride 188 | stride = 1 189 | if stride != 1 or self.inplanes != planes * block.expansion: 190 | downsample = nn.Sequential( 191 | conv1x1(self.inplanes, planes * block.expansion, stride), 192 | norm_layer(planes * block.expansion), 193 | ) 194 | 195 | layers = [] 196 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 197 | self.base_width, previous_dilation, norm_layer)) 198 | self.inplanes = planes * block.expansion 199 | for _ in range(1, blocks): 200 | layers.append(block(self.inplanes, planes, groups=self.groups, 201 | base_width=self.base_width, dilation=self.dilation, 202 | norm_layer=norm_layer)) 203 | 204 | return nn.Sequential(*layers) 205 | 206 | def _forward_impl(self, x): 207 | # See note [TorchScript super()] 208 | x = self.conv1(x) 209 | x = self.bn1(x) 210 | x = self.relu(x) 211 | x = self.maxpool(x) 212 | 213 | x = self.layer1(x) 214 | x = nonlocalnet(x,x.size(1)) 215 | x = self.layer2(x) 216 | x = nonlocalnet(x,x.size(1)) 217 | x = self.layer3(x) 218 | x = nonlocalnet(x,x.size(1)) 219 | x = self.layer4(x) 220 | x = nonlocalnet(x,x.size(1)) 221 | 222 | x = self.avgpool(x) 223 | x = torch.flatten(x, 1) 224 | x = self.fc(x) 225 | 226 | return x 227 | 228 | def forward(self, x): 229 | return self._forward_impl(x) 230 | 231 | 232 | def _resnet(arch, block, layers, pretrained, progress, **kwargs): 233 | model = ResNet(block, layers, **kwargs) 234 | if pretrained: 235 | state_dict = load_state_dict_from_url(model_urls[arch], 236 | progress=progress) 237 | model.load_state_dict(state_dict) 238 | return model 239 | 240 | 241 | def resnet18(pretrained=False, progress=True, **kwargs): 242 | r"""ResNet-18 model from 243 | `"Deep Residual Learning for Image Recognition" `_ 244 | 245 | Args: 246 | pretrained (bool): If True, returns a model pre-trained on ImageNet 247 | progress (bool): If True, displays a progress bar of the download to stderr 248 | """ 249 | return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, 250 | **kwargs) 251 | 252 | 253 | def resnet34(pretrained=False, progress=True, **kwargs): 254 | r"""ResNet-34 model from 255 | `"Deep Residual Learning for Image Recognition" `_ 256 | 257 | Args: 258 | pretrained (bool): If True, returns a model pre-trained on ImageNet 259 | progress (bool): If True, displays a progress bar of the download to stderr 260 | """ 261 | return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, 262 | **kwargs) 263 | 264 | 265 | def resnet50(pretrained=False, progress=True, **kwargs): 266 | r"""ResNet-50 model from 267 | `"Deep Residual Learning for Image Recognition" `_ 268 | 269 | Args: 270 | pretrained (bool): If True, returns a model pre-trained on ImageNet 271 | progress (bool): If True, displays a progress bar of the download to stderr 272 | """ 273 | return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, 274 | **kwargs) 275 | 276 | 277 | def resnet101(pretrained=False, progress=True, **kwargs): 278 | r"""ResNet-101 model from 279 | `"Deep Residual Learning for Image Recognition" `_ 280 | 281 | Args: 282 | pretrained (bool): If True, returns a model pre-trained on ImageNet 283 | progress (bool): If True, displays a progress bar of the download to stderr 284 | """ 285 | return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, 286 | **kwargs) 287 | 288 | 289 | def resnet152(pretrained=False, progress=True, **kwargs): 290 | r"""ResNet-152 model from 291 | `"Deep Residual Learning for Image Recognition" `_ 292 | 293 | Args: 294 | pretrained (bool): If True, returns a model pre-trained on ImageNet 295 | progress (bool): If True, displays a progress bar of the download to stderr 296 | """ 297 | return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, 298 | **kwargs) 299 | 300 | 301 | def resnext50_32x4d(pretrained=False, progress=True, **kwargs): 302 | r"""ResNeXt-50 32x4d model from 303 | `"Aggregated Residual Transformation for Deep Neural Networks" `_ 304 | 305 | Args: 306 | pretrained (bool): If True, returns a model pre-trained on ImageNet 307 | progress (bool): If True, displays a progress bar of the download to stderr 308 | """ 309 | kwargs['groups'] = 32 310 | kwargs['width_per_group'] = 4 311 | return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], 312 | pretrained, progress, **kwargs) 313 | 314 | 315 | def resnext101_32x8d(pretrained=False, progress=True, **kwargs): 316 | r"""ResNeXt-101 32x8d model from 317 | `"Aggregated Residual Transformation for Deep Neural Networks" `_ 318 | 319 | Args: 320 | pretrained (bool): If True, returns a model pre-trained on ImageNet 321 | progress (bool): If True, displays a progress bar of the download to stderr 322 | """ 323 | kwargs['groups'] = 32 324 | kwargs['width_per_group'] = 8 325 | return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], 326 | pretrained, progress, **kwargs) 327 | 328 | 329 | def wide_resnet50_2(pretrained=False, progress=True, **kwargs): 330 | r"""Wide ResNet-50-2 model from 331 | `"Wide Residual Networks" `_ 332 | 333 | The model is the same as ResNet except for the bottleneck number of channels 334 | which is twice larger in every block. The number of channels in outer 1x1 335 | convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 336 | channels, and in Wide ResNet-50-2 has 2048-1024-2048. 337 | 338 | Args: 339 | pretrained (bool): If True, returns a model pre-trained on ImageNet 340 | progress (bool): If True, displays a progress bar of the download to stderr 341 | """ 342 | kwargs['width_per_group'] = 64 * 2 343 | return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3], 344 | pretrained, progress, **kwargs) 345 | 346 | 347 | def wide_resnet101_2(pretrained=False, progress=True, **kwargs): 348 | r"""Wide ResNet-101-2 model from 349 | `"Wide Residual Networks" `_ 350 | 351 | The model is the same as ResNet except for the bottleneck number of channels 352 | which is twice larger in every block. The number of channels in outer 1x1 353 | convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 354 | channels, and in Wide ResNet-50-2 has 2048-1024-2048. 355 | 356 | Args: 357 | pretrained (bool): If True, returns a model pre-trained on ImageNet 358 | progress (bool): If True, displays a progress bar of the download to stderr 359 | """ 360 | kwargs['width_per_group'] = 64 * 2 361 | return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3], 362 | pretrained, progress, **kwargs) 363 | 364 | # if __name__=='__main__': 365 | # net=resnet50(pretrained=True).cuda() 366 | # # input=torch.zeros((10,3,224,224)).cuda() 367 | # # print(net(input).shape) 368 | # for name,_ in net.named_parameters(): 369 | # print(name) 370 | # # print(net) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 zhouwei 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ResNet_ConvLSTM 2 | A repository for Crash detection in a ResNet+ConvLSTM fashion 3 | 4 | If you want to run this code repository, you should install: 5 | - python > 3.0 6 | - pytorch > 1.0 7 | - have your own GPU with large GPU Memory 8 | 9 | Among all scripts 10 | - "model_build.py" provides the model definition details 11 | - "utils.resnet.py" defines the building process of ResNet 12 | - "utils.convlstm.py" defines the building process of ConvLSTM 13 | --------------------------------------------------------------------------------