├── Attentive Feedback Network for Boundary-Aware Salient Object Detection ├── GPM.py └── readme.md ├── CARAFE Content-Aware ReAssembly of FEature ├── CARAFE.py └── readme.md ├── Co-Scale Conv-Attentional Image Transformers ├── ConvAttention.py └── readme.md ├── DALI-Saliency ├── datasets.py ├── readme.md └── train.py ├── Dynamic Multi-scale Filters for Semantic Segmentation ├── DCM.py └── readme.md └── README.md /Attentive Feedback Network for Boundary-Aware Salient Object Detection/GPM.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/7/10 下午8:37 3 | # @Author : Lart Pang 4 | # @FileName: GPM.py 5 | # @Home : https://www.yuque.com/lart/architecture/mutli 6 | # @GitHub : https://github.com/lartpang 7 | 8 | # https://drive.google.com/open?id=1EVZR8cNGUv3zb7JtR1fxbXZ8lp5mbgWe 9 | import torch 10 | from torch import nn 11 | 12 | 13 | class BasicConv2d(nn.Module): 14 | def __init__( 15 | self, 16 | in_planes, 17 | out_planes, 18 | kernel_size, 19 | stride=1, 20 | padding=0, 21 | dilation=1, 22 | groups=1, 23 | bias=False 24 | ): 25 | super(BasicConv2d, self).__init__() 26 | 27 | self.basicconv = nn.Sequential( 28 | nn.Conv2d( 29 | in_planes, 30 | out_planes, 31 | kernel_size=kernel_size, 32 | stride=stride, 33 | padding=padding, 34 | dilation=dilation, 35 | groups=groups, 36 | bias=bias 37 | ), nn.BatchNorm2d(out_planes), nn.ReLU(inplace=True) 38 | ) 39 | 40 | def forward(self, x): 41 | return self.basicconv(x) 42 | 43 | 44 | class GPM(nn.Module): 45 | def __init__(self, in_C, out_C, n=(2, 4, 7)): 46 | super(GPM, self).__init__() 47 | self.n = n 48 | channal_list = [] 49 | self.cnn_list = nn.ModuleList() 50 | for i in self.n: 51 | mid_C = i * i * in_C 52 | channal_list.append(mid_C) 53 | self.cnn_list.append(BasicConv2d(mid_C, mid_C, 3, 1, 1)) 54 | 55 | self.fuse = BasicConv2d(3 * in_C, out_C, 1) 56 | 57 | def forward(self, in_feat): 58 | assert all([in_feat.size(2) % n == 0 for n in self.n]) 59 | 60 | feats = [] 61 | for idx, n in enumerate(self.n): 62 | chunk_feats = [y for x in in_feat.chunk(n, 2) for y in x.chunk(n, 3)] 63 | chunk_feats = torch.cat(chunk_feats, dim=1) 64 | chunk_feats = self.cnn_list[idx](chunk_feats) 65 | 66 | total_feat = [] 67 | for x in chunk_feats.chunk(n, 1): 68 | 69 | row_feat = [] 70 | for y in x.chunk(n, 1): 71 | row_feat.append(y) 72 | 73 | row_feat = torch.cat(row_feat, dim=3) 74 | total_feat.append(row_feat) 75 | 76 | total_feat = torch.cat(total_feat, dim=2) 77 | feats.append(total_feat) 78 | 79 | return self.fuse(torch.cat(feats, dim=1)) 80 | 81 | 82 | if __name__ == '__main__': 83 | a = torch.rand((4, 32, 28, 28)).cuda() 84 | gpm = GPM(32, 32).cuda() 85 | print(gpm(a).size()) 86 | -------------------------------------------------------------------------------- /Attentive Feedback Network for Boundary-Aware Salient Object Detection/readme.md: -------------------------------------------------------------------------------- 1 | # Attentive Feedback Network for Boundary-Aware Salient Object Detection 2 | 3 | ![](https://user-images.githubusercontent.com/26847524/74926389-c9331700-5410-11ea-92e1-77823d678c26.png) 4 | 5 | ## 整体结构 6 | 7 | ![](https://user-images.githubusercontent.com/26847524/74926418-d2bc7f00-5410-11ea-863c-ccd3d1c4ad92.png) 8 | 9 | ## 复现模块 10 | 11 | ![](https://user-images.githubusercontent.com/26847524/74926455-dcde7d80-5410-11ea-9fa8-b07e65356031.png) 12 | 13 | ## 相关链接 14 | 15 | * 解析: 16 | - https://blog.csdn.net/P_LarT/article/details/90705714 17 | - https://www.yuque.com/lart/papers/usueg3 18 | -------------------------------------------------------------------------------- /CARAFE Content-Aware ReAssembly of FEature/CARAFE.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/2 下午3:23 3 | # @Author : Lart Pang 4 | # @FileName: CARAFE.py 5 | # @Project : PyTorchCoding 6 | # @GitHub : https://github.com/lartpang 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | 13 | class CARAFE(nn.Module): 14 | def __init__(self, inC, outC, kernel_size=3, up_factor=2): 15 | super(CARAFE, self).__init__() 16 | self.kernel_size = kernel_size 17 | self.up_factor = up_factor 18 | self.down = nn.Conv2d(inC, inC // 4, 1) 19 | self.encoder = nn.Conv2d( 20 | inC // 4, self.up_factor**2 * self.kernel_size**2, self.kernel_size, 1, 21 | self.kernel_size // 2 22 | ) 23 | self.out = nn.Conv2d(inC, outC, 1) 24 | 25 | def forward(self, in_tensor): 26 | N, C, H, W = in_tensor.size() 27 | 28 | # N,C,H,W -> N,C,delta*H,delta*W 29 | # kernel prediction module 30 | kernel_tensor = self.down(in_tensor) # N, Cm, H, W 31 | kernel_tensor = self.encoder(kernel_tensor) # N, S^2 * Kup^2, H, W 32 | kernel_tensor = F.pixel_shuffle(kernel_tensor, self.up_factor) # N, Kup^2, S*H, S*W 33 | kernel_tensor = F.softmax(kernel_tensor, dim=1) # N, Kup^2, S*H, S*W 34 | kernel_tensor = kernel_tensor.unfold(2, self.up_factor, step=self.up_factor) 35 | kernel_tensor = kernel_tensor.unfold(3, self.up_factor, step=self.up_factor) 36 | kernel_tensor = kernel_tensor.reshape(N, self.kernel_size**2, H, W, self.up_factor**2) 37 | kernel_tensor = kernel_tensor.permute(0, 2, 3, 1, 4) # N, H, W, Kup^2, S^2 38 | 39 | # content-aware reassembly module 40 | # tensor.unfold: dim, size, step 41 | in_tensor = F.pad( 42 | in_tensor, 43 | pad=( 44 | self.kernel_size // 2, self.kernel_size // 2, self.kernel_size // 2, 45 | self.kernel_size // 2 46 | ), 47 | mode='constant', 48 | value=0 49 | ) 50 | in_tensor = in_tensor.unfold(2, self.kernel_size, step=1) 51 | in_tensor = in_tensor.unfold(3, self.kernel_size, step=1) 52 | in_tensor = in_tensor.reshape(N, C, H, W, -1) 53 | in_tensor = in_tensor.permute(0, 2, 3, 1, 4) # N, H, W, C, Kup^2 54 | 55 | out_tensor = torch.matmul(in_tensor, kernel_tensor) # N, H, W, C, S^2 56 | out_tensor = out_tensor.reshape(N, H, W, -1) 57 | out_tensor = out_tensor.permute(0, 3, 1, 2) 58 | out_tensor = F.pixel_shuffle(out_tensor, self.up_factor) 59 | out_tensor = self.out(out_tensor) 60 | return out_tensor 61 | 62 | 63 | if __name__ == '__main__': 64 | a = torch.rand(4, 20, 10, 10) 65 | sub = CARAFE(20) 66 | print(sub(a).size()) 67 | -------------------------------------------------------------------------------- /CARAFE Content-Aware ReAssembly of FEature/readme.md: -------------------------------------------------------------------------------- 1 | # CARAFE: Content-Aware ReAssembly of FEature (ICCV 2019) 2 | 3 | ![image](https://user-images.githubusercontent.com/26847524/74926754-5b3b1f80-5411-11ea-85df-cd39326ea3b9.png) 4 | 5 | ## 说在开头 6 | 7 | 仅是根据个人理解书写,参考了相关文章[2]中的一些技巧,如有错误,还请指出,共同进步! 8 | 9 | ## 主要结构 10 | 11 | ![image](https://user-images.githubusercontent.com/26847524/74926777-63935a80-5411-11ea-86be-67b141188dae.png) 12 | 13 | ## 相关链接 14 | 15 | * [1]: CARAFE: 轻量级通用上采样算子 - 陈恺的文章 - 知乎:https://zhuanlan.zhihu.com/p/76063768 16 | * [2]: 动态滤波器介绍链接汇总 - Happy的文章 - 知乎: https://zhuanlan.zhihu.com/p/100470879 17 | * [3]: 论文: https://arxiv.org/pdf/1905.02188.pdf 18 | -------------------------------------------------------------------------------- /Co-Scale Conv-Attentional Image Transformers/ConvAttention.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2021/4/15 3 | # @Author : Lart Pang 4 | # @GitHub : https://github.com/lartpang 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from einops import rearrange 10 | 11 | 12 | class MHConvAttention(nn.Module): 13 | """ 14 | https://github.com/mlpc-ucsd/CoaT 15 | 16 | @misc{xu2021coscale, 17 | title={Co-Scale Conv-Attentional Image Transformers}, 18 | author={Weijian Xu and Yifan Xu and Tyler Chang and Zhuowen Tu}, 19 | year={2021}, 20 | eprint={2104.06399}, 21 | archivePrefix={arXiv}, 22 | primaryClass={cs.CV} 23 | } 24 | """ 25 | 26 | def __init__(self, num_heads=4, embedding_dim=64, window_size=5): 27 | super().__init__() 28 | self.nh = num_heads 29 | self.window_size = window_size 30 | self.pos_embed_dim = embedding_dim // self.nh 31 | self.rel_pos_embed = nn.Parameter(torch.zeros(self.pos_embed_dim, window_size, window_size)) 32 | nn.init.trunc_normal_(self.rel_pos_embed, std=0.02) 33 | 34 | self.qkv_conv = nn.Conv2d(embedding_dim, 3 * embedding_dim, 1, bias=False) 35 | 36 | # Depthwise Convolution based Conditional Positional Encodings for Vision Transformers 37 | self.cpe = nn.Conv2d(embedding_dim, embedding_dim, 3, 1, 1, bias=False, groups=embedding_dim) 38 | 39 | self.out = nn.Conv2d(embedding_dim, embedding_dim, 1, bias=False) 40 | 41 | def forward(self, src): 42 | """ 43 | :param src: B,C,H,W 44 | :return: B,C,H,W 45 | """ 46 | _, C, H, W = src.shape 47 | scaling_factor = (C // self.nh) ** -0.5 48 | 49 | # Convolutional Position Encoding 50 | src = self.cpe(src) + src 51 | 52 | # Linear Projection C -> C,C,C 53 | qkv = self.qkv_conv(src) 54 | q, k, v = qkv.chunk(3, dim=1) 55 | 56 | q = rearrange(q, "b (nh hd) h w -> (b nh) hd h w", nh=self.nh) 57 | k = rearrange(k, "b (nh hd) h w -> (b nh) hd h w", nh=self.nh) 58 | v = rearrange(v, "b (nh hd) h w -> (b nh) hd h w", nh=self.nh) 59 | 60 | # Factorized Attention 61 | content_lambda = torch.einsum("bin, bon -> bio", k.flatten(-2).softmax(-1), v.flatten(-2)) 62 | content_output = torch.einsum("bin, bio -> bon", q.flatten(-2) * scaling_factor, content_lambda) 63 | # content_output = content_output.unflatten(dim=-1, sizes=(H, W)) 64 | content_output = rearrange(content_output, "bnh hd (h w) -> bnh hd h w", h=H) 65 | 66 | 67 | # Convolutional Relative Position Encoding 68 | position_lambda = F.conv2d( 69 | v, 70 | weight=rearrange(self.rel_pos_embed, "D Mx My -> D 1 Mx My"), 71 | padding=self.window_size // 2, 72 | groups=self.pos_embed_dim, 73 | ) 74 | position_output = q * position_lambda 75 | 76 | # Output Feature Map 77 | result = content_output + position_output 78 | 79 | result = rearrange(result, "(b nh) hd h w -> b (nh hd) h w", nh=self.nh) 80 | return self.out(result) 81 | 82 | 83 | if __name__ == "__main__": 84 | src = torch.randn(1, 64, 32, 32) 85 | 86 | conv_attention = MHConvAttention() 87 | print(conv_attention(src).shape) 88 | -------------------------------------------------------------------------------- /Co-Scale Conv-Attentional Image Transformers/readme.md: -------------------------------------------------------------------------------- 1 | ## Co-Scale Conv-Attentional Image Transformers 2 | 3 | ### Note 4 | 5 | If you have a better implementation or find a bug, please let me know 😆 , thanks! 6 | 7 | - 2021-04-23 The author release the code of CoaT. Official code is preferred. . In my implementation, some details are different with the author's. For example: 8 | - I did not introduce the CLS token for the classification task, which would cause the difference in the specific calculation forms of the attention and the convolutional relative position encoding. 9 | - I have only implemented the form in which all the attention heads have the same window size. 10 | 11 | ### Change Log 12 | 13 | - 2021-04-18 Change the method `tensor.unflatten()` to `einops.rearrange()` for avoiding some problems. 14 | - 2021-04-15 Create the initial version of Conv-Attention. 15 | - 2021-04-16 Modify the `Conditional Positional Encodings for Vision Transformers` to depthwise convolution. 16 | 17 | ### Conv-Attentional 18 | 19 | ![image](https://user-images.githubusercontent.com/26847524/114978283-9d4bde00-9ebb-11eb-9229-565ba41d59c6.png) 20 | 21 | ![image](https://user-images.githubusercontent.com/26847524/114978366-b3599e80-9ebb-11eb-8bb9-de1ee6ae696e.png) 22 | 23 | ### Project 24 | 25 | - https://github.com/mlpc-ucsd/CoaT 26 | 27 | ### bibtex 28 | 29 | ``` 30 | @misc{xu2021coscale, 31 | title={Co-Scale Conv-Attentional Image Transformers}, 32 | author={Weijian Xu and Yifan Xu and Tyler Chang and Zhuowen Tu}, 33 | year={2021}, 34 | eprint={2104.06399}, 35 | archivePrefix={arXiv}, 36 | primaryClass={cs.CV} 37 | } 38 | ``` 39 | -------------------------------------------------------------------------------- /DALI-Saliency/datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import types 4 | from random import shuffle 5 | 6 | import numpy as np 7 | import nvidia.dali.ops as ops 8 | import nvidia.dali.types as types 9 | from nvidia.dali.pipeline import Pipeline 10 | from nvidia.dali.plugin.pytorch import DALIGenericIterator 11 | 12 | 13 | class ExternalInputIterator(object): 14 | def __init__(self, root, batch_size, random_shuffle=True): 15 | self.source_path = os.path.join(root, 'Image') 16 | self.mask_path = os.path.join(root, 'Mask') 17 | self.files = [x.split('.')[0] for x in os.listdir(self.source_path)] 18 | self.batch_size = batch_size 19 | if random_shuffle: 20 | shuffle(self.files) 21 | 22 | def __iter__(self): 23 | self.i = 0 24 | self.n = len(self.files) 25 | return self 26 | 27 | def __next__(self): 28 | try: 29 | images = [] 30 | masks = [] 31 | for _ in range(self.batch_size): 32 | img_path = os.path.join(self.source_path, self.files[self.i] + '.jpg') 33 | gt_path = os.path.join(self.mask_path, self.files[self.i] + '.png') 34 | 35 | with open(img_path, 'rb') as img: 36 | images.append(np.frombuffer(img.read(), dtype=np.uint8)) 37 | with open(gt_path, 'rb') as gt: 38 | masks.append(np.frombuffer(gt.read(), dtype=np.uint8)) 39 | 40 | self.i = (self.i + 1) 41 | return (images, masks) 42 | except: 43 | self.i = 0 44 | raise StopIteration 45 | 46 | next = __next__ 47 | 48 | 49 | class ImagePipeline(Pipeline): 50 | def __init__( 51 | self, imageset_dir, image_size, random_shuffle, batch_size=4, num_threads=2, device_id=0 52 | ): 53 | super(ImagePipeline, self).__init__(batch_size, num_threads, device_id, seed=12) 54 | self.imageset_dir = imageset_dir 55 | self.random_shuffle = random_shuffle 56 | 57 | eii = ExternalInputIterator( 58 | root=self.imageset_dir, batch_size=self.batch_size, random_shuffle=self.random_shuffle 59 | ) 60 | self.iterator = iter(eii) 61 | self.num_inputs = len(self.iterator.files) 62 | 63 | self.input_image = ops.ExternalSource() 64 | self.input_mask = ops.ExternalSource() 65 | 66 | self.decode_image = ops.ImageDecoder(device="mixed", output_type=types.RGB) 67 | self.decode_mask = ops.ImageDecoder(device="mixed", output_type=types.GRAY) 68 | 69 | # The rest of pre-processing is done on the GPU 70 | self.res = ops.Resize(device="gpu", resize_x=image_size, resize_y=image_size) 71 | self.flip = ops.Flip(device="gpu", horizontal=1, vertical=0) 72 | 73 | rotate_degree = random.random() * 2 * 10 - 10 74 | self.rotate_image = ops.Rotate( 75 | device="gpu", angle=rotate_degree, interp_type=types.DALIInterpType.INTERP_LINEAR 76 | ) 77 | self.rotate_mask = ops.Rotate( 78 | device="gpu", angle=rotate_degree, interp_type=types.DALIInterpType.INTERP_NN 79 | ) 80 | 81 | self.cmnp_image = ops.CropMirrorNormalize( 82 | device="gpu", 83 | output_dtype=types.FLOAT, 84 | output_layout=types.NCHW, 85 | image_type=types.RGB, 86 | mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], 87 | std=[0.229 * 255, 0.224 * 255, 0.225 * 255] 88 | ) 89 | self.cmnp_mask = ops.CropMirrorNormalize( 90 | device="gpu", 91 | output_dtype=types.FLOAT, 92 | output_layout=types.NCHW, 93 | image_type=types.GRAY, 94 | mean=[0], 95 | std=[255] 96 | ) 97 | 98 | # epoch_size = number of (image, mask) image pairs in the dataset 99 | def epoch_size(self, name=None): 100 | return self.num_inputs 101 | 102 | def define_graph(self): 103 | self.images = self.input_image(name="images") 104 | self.masks = self.input_mask(name="masks") 105 | 106 | images = self.decode_image(self.images) 107 | masks = self.decode_mask(self.masks) 108 | 109 | output_image = self.res(images) 110 | output_mask = self.res(masks) 111 | 112 | if random.random() < 0.5: 113 | output_image = self.flip(output_image) 114 | output_mask = self.flip(output_mask) 115 | 116 | output_image = self.rotate_image(output_image) 117 | output_mask = self.rotate_mask(output_mask) 118 | 119 | output_image = self.cmnp_image(output_image) 120 | output_mask = self.cmnp_mask(output_mask) 121 | 122 | return (output_image, output_mask) 123 | 124 | def iter_setup(self): 125 | try: 126 | (images, masks) = self.iterator.next() 127 | self.feed_input(self.images, images) 128 | self.feed_input(self.masks, masks) 129 | except StopIteration: 130 | self.iterator = iter( 131 | ExternalInputIterator( 132 | root=self.imageset_dir, 133 | batch_size=self.batch_size, 134 | random_shuffle=self.random_shuffle 135 | ) 136 | ) 137 | raise StopIteration 138 | 139 | 140 | if __name__ == '__main__': 141 | datapath = 'training_set' 142 | gpu_id = 0 143 | batch_size = 5 144 | 145 | train_pipe = ImagePipeline( 146 | imageset_dir='/home/erti/Datasets/RGBSaliency/DUTS/Train', 147 | image_size=128, 148 | random_shuffle=False, 149 | batch_size=batch_size 150 | ) 151 | m_train = train_pipe.epoch_size() 152 | print("Size of the training set: ", m_train) 153 | train_pipe_loader = DALIGenericIterator( 154 | pipelines=train_pipe, 155 | output_map=["images", "masks"], 156 | size=m_train, 157 | auto_reset=True, 158 | fill_last_batch=False, 159 | last_batch_padded=True 160 | ) 161 | # 只有batchsize可以整除整个数据集的长度的时候,才能触发shuffle 162 | for j in range(5): 163 | print(train_pipe.iterator.files) 164 | for i, train_data in enumerate(train_pipe_loader): 165 | train_inputs = train_data[0]['images'] 166 | train_labels = train_data[0]['masks'] 167 | print(train_inputs.max(), train_inputs.min(), train_labels.max(), train_labels.min()) 168 | -------------------------------------------------------------------------------- /DALI-Saliency/readme.md: -------------------------------------------------------------------------------- 1 | # DALI-Saliency 2 | 3 | ## 2019年11月03日 4 | 5 | 这里放着使用`DALI`加速PyTorch预处理的代码,这里的代码和我自己项目代码是一致的,之后有时间会进行抽离,把关于`DALI`在显著性中的使用进行一下总结。 6 | -------------------------------------------------------------------------------- /DALI-Saliency/train.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import shutil 3 | from datetime import datetime 4 | from pprint import pprint 5 | 6 | import numpy as np 7 | import torch 8 | import torch.backends.cudnn as torchcudnn 9 | from PIL import Image 10 | from nvidia.dali.plugin.pytorch import DALIGenericIterator 11 | from torch.nn import BCELoss 12 | from torch.optim import Adam, SGD, lr_scheduler 13 | from torchvision import transforms 14 | from tqdm import tqdm 15 | 16 | from Loss.ConsistencyEnhanecdLoss import CELV1 17 | from Loss.DiceLoss import DiceLoss 18 | from Utils.datasets import DataLoaderX, ImageFolder 19 | from Utils.datasets_dali import ImagePipeline 20 | from Utils.epoch_config import arg_config, path_config, proj_root 21 | from Utils.metric import cal_maxf, cal_pr_mae_meanf 22 | from Utils.misc import AvgMeter, check_mkdir, make_log 23 | 24 | torch.manual_seed(0) 25 | torch.cuda.manual_seed_all(0) 26 | torchcudnn.benchmark = True 27 | # 使用确定性卷积 用torchcudnn.deterministic = True 28 | # 这样调用的CuDNN的卷积操作就是每次一样的了 29 | torchcudnn.deterministic = True 30 | torchcudnn.enabled = True 31 | 32 | 33 | class Trainer(): 34 | def __init__(self, args, path): 35 | super(Trainer, self).__init__() 36 | self.args = args 37 | self.path = path 38 | self.dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 39 | self.to_pil = transforms.ToPILImage() 40 | self.best_results = {"maxf": 0, "meanf": 0, "mae": 0} 41 | 42 | # 提前创建好记录文件,避免自动创建的时候触发文件创建事件 43 | check_mkdir(self.path["pth_log"]) 44 | make_log(self.path["val_log"], f"=== val_log {datetime.now()} ===") 45 | make_log(self.path["tr_log"], f"=== tr_log {datetime.now()} ===") 46 | 47 | # 提前创建好存储预测结果和存放模型以及tensorboard的文件夹 48 | check_mkdir(self.path["save"]) 49 | check_mkdir(self.path["pth"]) 50 | check_mkdir(self.path["tb"]) 51 | self.save_path = self.path["save"] 52 | 53 | # 依赖与前面属性的属性 54 | self.pth_path = self.path["final_net"] 55 | self.tr_loader, self.te_loader, self.val_loader = self.make_loader() 56 | if not self.args['use_backbone']: 57 | self.net = self.args[self.args["NET"]]["net"]().to(self.dev) 58 | else: 59 | self.net = self.args[self.args["NET"]]["net"](self.args[self.args["backbone"]]).to( 60 | self.dev 61 | ) 62 | # 输出并记录模型运算量和参数量 63 | # model_msg = get_FLOPs_Params(self.net, self.args["input_size"], mode="print&return") 64 | # make_log(self.path["val_log"], f"=== model info ==={self.net}" 65 | # f"\n{model_msg}\n=== val record ===\n") 66 | pprint(self.args) 67 | 68 | # 损失相关 69 | self.sod_crit = BCELoss(reduction=self.args['reduction']).to(self.dev) 70 | self.dice_loss = DiceLoss().to(self.dev) 71 | # self.sm_loss = SmeasureLoss().to(self.dev) 72 | 73 | # 训练相关 74 | self.start_epoch = self.args["start_epoch"] 75 | self.end_epoch = self.args["end_epoch"] 76 | 77 | # 计算正常衰减的部分的迭代次数, 一个周期batch数量为len(self.tr_loader) 78 | self.epoch_num = self.end_epoch - self.start_epoch 79 | if self.tr_loader._size % self.args['batch_size'] == 0: 80 | self.niter_per_epoch = self.tr_loader._size // self.args['batch_size'] 81 | 82 | else: 83 | self.niter_per_epoch = self.tr_loader._size // self.args['batch_size'] + 1 84 | self.iter_num = self.epoch_num * self.niter_per_epoch 85 | 86 | print(f" ==>> 总共迭代次数: {self.iter_num} <<== ") 87 | self.opti = self.make_optim() 88 | self.sche = self.make_scheduler() 89 | 90 | def train(self): 91 | for curr_epoch in range(self.start_epoch, self.end_epoch): 92 | torch.cuda.empty_cache() # 定期清空模型 93 | train_loss_record = AvgMeter() 94 | for train_batch_id, train_data in enumerate(self.tr_loader): 95 | curr_iter = curr_epoch * self.niter_per_epoch + train_batch_id 96 | 97 | # 这里要和dataset.py中的设置对应好 98 | train_inputs = train_data[0]['images'] 99 | train_labels = train_data[0]['masks'] 100 | 101 | self.opti.zero_grad() 102 | *_, otr = self.net(train_inputs) 103 | 104 | loss_list = [] 105 | loss_item_list = [] 106 | sod_out = self.sod_crit(otr, train_labels) 107 | loss_list.append(sod_out) 108 | loss_item_list.append(f"{sod_out.item():.5f}") 109 | dice_out = self.dice_loss(otr, train_labels) 110 | loss_list.append(dice_out) 111 | loss_item_list.append(f"{dice_out.item():.5f}") 112 | train_loss = sum(loss_list) 113 | 114 | train_loss.backward() 115 | self.opti.step() 116 | 117 | if self.args["sche_usebatch"]: 118 | if self.args["lr_type"] == "poly": 119 | self.sche.step(curr_iter + 1) 120 | elif self.args["lr_type"] == "cos": 121 | self.sche.step() 122 | else: 123 | raise NotImplementedError 124 | 125 | # 仅在累计的时候使用item()获取数据 126 | train_iter_loss = train_loss.item() 127 | train_batch_size = train_inputs.size(0) 128 | train_loss_record.update(train_iter_loss, train_batch_size) 129 | 130 | # 记录每一次迭代的数据 131 | if (self.args["print_freq"] > 0 and (curr_iter + 1) % self.args["print_freq"] == 0): 132 | log = ( 133 | f"[I:{curr_iter}/{self.iter_num}][E:{curr_epoch}:{self.end_epoch}]>" 134 | f"[{self.args[self.args['NET']]['exp_name']}]" 135 | f"[Lr:{self.opti.param_groups[0]['lr']:.7f}]" 136 | f"[Avg:{train_loss_record.avg:.5f}|Cur:{train_iter_loss:.5f}|" 137 | f"{loss_item_list}]" 138 | ) 139 | print(log) 140 | make_log(self.path["tr_log"], log) 141 | 142 | # 根据周期修改学习率 143 | if not self.args["sche_usebatch"]: 144 | if self.args["lr_type"] == "poly": 145 | self.sche.step(curr_epoch + 1) 146 | elif self.args["lr_type"] == "cos": 147 | self.sche.step() 148 | elif self.args["lr_type"] == "step": 149 | self.sche.step() 150 | else: 151 | raise NotImplementedError 152 | 153 | # 每个周期都进行保存测试 154 | torch.save(self.net.state_dict(), self.path["final_net"]) 155 | if self.args["val_freq"] > 0 and ((curr_epoch) % self.args["val_freq"] == 0): 156 | self.pth_path = self.path["final_net"] 157 | if self.args["test_as_val"]: 158 | # 使用测试集来验证 159 | results = self.test( 160 | mode="test", save_pre=False, data_path=self.args['te_data_path'] 161 | ) 162 | else: 163 | results = self.test( 164 | mode="val", save_pre=False, data_path=self.args['val_data_path'] 165 | ) 166 | if results["maxf"] > self.best_results["maxf"]: 167 | self.best_results = results 168 | torch.save(self.net.state_dict(), self.path["best_net"]) 169 | msg = f"epoch:{curr_epoch}=>{results} is best, so far..." 170 | else: 171 | msg = f"epoch:{curr_epoch}=>{results}" 172 | print(f" ==>> 验证结果:{msg} <<== ") 173 | make_log(self.path["val_log"], msg) 174 | self.net.train() 175 | 176 | # 进行最终的测试,首先输出验证结果 177 | print(f" ==>> 训练结束,最好验证结果:{self.best_results} <<== ") 178 | 179 | if self.args["val_freq"] > 0 and (not self.args["test_as_val"]): 180 | self.pth_path = self.path["best_net"] 181 | elif self.args["val_freq"] > 0 and self.args["test_as_val"]: 182 | self.pth_path = self.path["final_net"] 183 | elif not self.args["val_freq"] > 0: 184 | self.pth_path = self.path["final_net"] 185 | for data_name, data_path in self.args['te_data_list'].items(): 186 | if data_name == 'hkuis': 187 | prefix = ('.png', '.png') 188 | else: 189 | prefix = self.args['prefix'] 190 | print(f" ==>> 使用测试集{data_name}测试 <<== ") 191 | self.te_loader = DataLoaderX( 192 | ImageFolder(data_path, mode="test", in_size=self.args["input_size"], prefix=prefix), 193 | batch_size=self.args["batch_size"], 194 | num_workers=self.args["num_workers"], 195 | shuffle=False, 196 | drop_last=False, 197 | pin_memory=True 198 | ) 199 | results = self.test(mode="test", save_pre=False, data_path=data_path) 200 | msg = (f" ==>> 在{data_name}:'{data_path}'测试集上结果:{results} <<== ") 201 | print(msg) 202 | make_log(self.path["val_log"], msg) 203 | 204 | def test(self, mode, save_pre, data_path): 205 | print(f" ==>> 导入模型...{self.pth_path} <<== ") 206 | try: 207 | self.net.load_state_dict(torch.load(self.pth_path)) 208 | except FileNotFoundError: 209 | print("请指定模型") 210 | exit() 211 | self.net.eval() 212 | 213 | if mode == "test": 214 | loader = self.te_loader 215 | elif mode == "val": 216 | loader = self.val_loader 217 | else: 218 | raise NotImplementedError 219 | 220 | gt_path = osp.join(data_path, "Mask") 221 | 222 | pres = [AvgMeter() for _ in range(256)] 223 | recs = [AvgMeter() for _ in range(256)] 224 | meanfs = AvgMeter() 225 | maes = AvgMeter() 226 | tqdm_iter = tqdm(enumerate(loader), total=len(loader), leave=False) 227 | for test_batch_id, test_data in tqdm_iter: 228 | tqdm_iter.set_description( 229 | f"{self.args[self.args['NET']]['exp_name']}:" 230 | f"te=>{test_batch_id + 1}" 231 | ) 232 | in_imgs, in_names = test_data 233 | in_imgs = in_imgs.to(self.dev) 234 | with torch.no_grad(): 235 | *_, outputs = self.net(in_imgs) 236 | outputs_np = outputs.cpu().detach() 237 | 238 | for item_id, out_item in enumerate(outputs_np): 239 | gimg_path = osp.join(gt_path, in_names[item_id] + ".png") 240 | gt_img = Image.open(gimg_path).convert("L") 241 | out_img = self.to_pil(out_item).resize(gt_img.size) 242 | gt_img = np.array(gt_img) 243 | 244 | if save_pre: 245 | oimg_path = osp.join(self.save_path, in_names[item_id] + ".png") 246 | out_img.save(oimg_path) 247 | 248 | out_img = np.array(out_img) 249 | ps, rs, mae, meanf = cal_pr_mae_meanf(out_img, gt_img) 250 | for pidx, pdata in enumerate(zip(ps, rs)): 251 | p, r = pdata 252 | pres[pidx].update(p) 253 | recs[pidx].update(r) 254 | maes.update(mae) 255 | meanfs.update(meanf) 256 | maxf = cal_maxf([pre.avg for pre in pres], [rec.avg for rec in recs]) 257 | results = {"maxf": maxf, "meanf": meanfs.avg, "mae": maes.avg} 258 | return results 259 | 260 | def make_scheduler(self): 261 | total_num = self.iter_num if self.args['sche_usebatch'] else self.epoch_num 262 | if self.args["lr_type"] == "poly": 263 | lamb = lambda curr: pow((1 - float(curr) / total_num), self.args["lr_decay"]) 264 | scheduler = lr_scheduler.LambdaLR(self.opti, lr_lambda=lamb) 265 | elif self.args["lr_type"] == "cos": 266 | scheduler = lr_scheduler.CosineAnnealingLR( 267 | self.opti, T_max=total_num - 1, eta_min=4e-08 268 | ) 269 | elif self.args["lr_type"] == "step": 270 | scheduler = lr_scheduler.StepLR( 271 | self.opti, step_size=self.args['steplr_epoch'], gamma=self.args['steplr_gamma'] 272 | ) 273 | else: 274 | raise NotImplementedError 275 | return scheduler 276 | 277 | def make_loader(self): 278 | print(f" ==>> 使用训练集{self.args['tr_data_path']}训练 <<== ") 279 | train_pipe = ImagePipeline( 280 | imageset_dir=self.args['tr_data_path'], 281 | image_size=self.args["input_size"], 282 | random_shuffle=True, 283 | batch_size=self.args["batch_size"] 284 | ) 285 | train_loader = DALIGenericIterator( 286 | pipelines=train_pipe, 287 | output_map=["images", "masks"], 288 | size=train_pipe.epoch_size(), 289 | auto_reset=True, 290 | fill_last_batch=False, 291 | last_batch_padded=False 292 | ) 293 | 294 | if self.args['val_data_path'] != None: 295 | print(f" ==>> 使用验证集{self.args['val_data_path']}验证 <<== ") 296 | val_set = ImageFolder( 297 | self.args['val_data_path'], 298 | mode="test", 299 | in_size=self.args["input_size"], 300 | prefix=self.args['prefix'] 301 | ) 302 | val_loader = DataLoaderX( 303 | val_set, 304 | batch_size=self.args["batch_size"], 305 | num_workers=self.args["num_workers"], 306 | shuffle=False, 307 | drop_last=False, 308 | pin_memory=True 309 | ) 310 | else: 311 | print(" ==>> 不使用验证集验证 <<== ") 312 | val_loader = None 313 | 314 | if self.args['te_data_path'] != None: 315 | print(f" ==>> 使用测试集{self.args['te_data_path']}测试 <<== ") 316 | test_set = ImageFolder( 317 | self.args['te_data_path'], 318 | mode="test", 319 | in_size=self.args["input_size"], 320 | prefix=self.args['prefix'] 321 | ) 322 | test_loader = DataLoaderX( 323 | test_set, 324 | batch_size=self.args["batch_size"], 325 | num_workers=self.args["num_workers"], 326 | shuffle=False, 327 | drop_last=False, 328 | pin_memory=True 329 | ) 330 | else: 331 | print(f" ==>> 不使用测试集测试 <<== ") 332 | test_loader = None 333 | return train_loader, test_loader, val_loader 334 | 335 | def make_optim(self): 336 | if self.args["optim"] == "sgd_trick": 337 | # https://github.com/implus/PytorchInsight/blob/master/classification/imagenet_tricks.py 338 | params = [ 339 | { 340 | "params": [ 341 | p for name, p in self.net.named_parameters() 342 | if ("bias" in name or "bn" in name) 343 | ], 344 | "weight_decay": 345 | 0, 346 | }, 347 | { 348 | "params": [ 349 | p for name, p in self.net.named_parameters() 350 | if ("bias" not in name and "bn" not in name) 351 | ] 352 | }, 353 | ] 354 | optimizer = SGD( 355 | params, 356 | lr=self.args["lr"], 357 | momentum=self.args["momentum"], 358 | weight_decay=self.args["weight_decay"] 359 | ) 360 | elif self.args["optim"] == "sgd_r3": 361 | params = [ 362 | # 不对bias参数执行weight decay操作,weight decay主要的作用就是通过对网络 363 | # 层的参数(包括weight和bias)做约束(L2正则化会使得网络层的参数更加平滑)达 364 | # 到减少模型过拟合的效果。 365 | { 366 | "params": 367 | [param for name, param in self.net.named_parameters() if name[-4:] == "bias"], 368 | "lr": 369 | 2 * self.args["lr"], 370 | }, 371 | { 372 | "params": 373 | [param for name, param in self.net.named_parameters() if name[-4:] != "bias"], 374 | "lr": 375 | self.args["lr"], 376 | "weight_decay": 377 | self.args["weight_decay"], 378 | }, 379 | ] 380 | optimizer = SGD(params, momentum=self.args["momentum"]) 381 | elif self.args["optim"] == "sgd_all": 382 | optimizer = SGD( 383 | self.net.parameters(), 384 | lr=self.args["lr"], 385 | weight_decay=self.args["weight_decay"], 386 | momentum=self.args["momentum"] 387 | ) 388 | elif self.args["optim"] == "adam": 389 | optimizer = Adam( 390 | self.net.parameters(), 391 | lr=self.args["lr"], 392 | betas=(0.9, 0.999), 393 | eps=1e-8, 394 | weight_decay=self.args["weight_decay"] 395 | ) 396 | else: 397 | raise NotImplementedError 398 | print("optimizer = ", optimizer) 399 | return optimizer 400 | 401 | 402 | if __name__ == "__main__": 403 | # 保存备份数据 ########################################################### 404 | print(f" ===========>> {datetime.now()}: 初始化开始 <<=========== ") 405 | init_start = datetime.now() 406 | trainer = Trainer(arg_config, path_config) 407 | print(f" ==>> 初始化完毕,用时:{datetime.now() - init_start} <<== ") 408 | 409 | shutil.copy(f"{proj_root}/Utils/epoch_config.py", path_config["cfg_log"]) 410 | shutil.copy(__file__, path_config["trainer_log"]) 411 | 412 | # 训练模型 ############################################################### 413 | print(f" ===========>> {datetime.now()}: 开始训练 <<=========== ") 414 | trainer.train() 415 | print(f" ===========>> {datetime.now()}: 结束训练 <<=========== ") 416 | -------------------------------------------------------------------------------- /Dynamic Multi-scale Filters for Semantic Segmentation/DCM.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2020-2-20 18:49:41 3 | # @Author : Lart Pang 4 | # @FileName: DCM.py 5 | # @Project : PyTorchCoding 6 | # @GitHub : https://github.com/lartpang 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | 13 | class DCM(nn.Module): 14 | def __init__(self, in_C, out_C): 15 | super(DCM, self).__init__() 16 | self.ks = [1, 3, 5] 17 | self.mid_C = in_C // 4 18 | 19 | self.ger_kernel_branches = nn.ModuleList() 20 | for k in self.ks: 21 | self.ger_kernel_branches.append( 22 | nn.Sequential(nn.AdaptiveAvgPool2d(k), nn.Conv2d(in_C, self.mid_C, kernel_size=1)) 23 | ) 24 | 25 | self.trans_branches = nn.ModuleList() 26 | self.fuse_inside_branches = nn.ModuleList() 27 | for i in range(len(self.ks)): 28 | self.trans_branches.append(nn.Conv2d(in_C, self.mid_C, kernel_size=1)) 29 | self.fuse_inside_branches.append(nn.Conv2d(self.mid_C, self.mid_C, 1)) 30 | 31 | self.fuse_outside = nn.Conv2d(len(self.ks) * self.mid_C + in_C, out_C, 1) 32 | 33 | def forward(self, x, y): 34 | """ 35 | x: 被卷积的特征 36 | y: 用来生成卷积核 37 | """ 38 | feats_branches = [x] 39 | for i in range(len(self.ks)): 40 | kernel = self.ger_kernel_branches[i](y) 41 | kernel_single = kernel.split(1, dim=0) 42 | x_inside = self.trans_branches[i](x) 43 | x_inside_single = x_inside.split(1, dim=0) 44 | feat_single = [] 45 | for kernel_single_item, x_inside_single_item \ 46 | in zip(kernel_single, x_inside_single): 47 | feat_inside_single = self.fuse_inside_branches[i]( 48 | F.conv2d( 49 | x_inside_single_item, 50 | weight=kernel_single_item.transpose(0, 1), 51 | bias=None, 52 | stride=1, 53 | padding=self.ks[i] // 2, 54 | dilation=1, 55 | groups=self.mid_C 56 | ) 57 | ) 58 | feat_single.append(feat_inside_single) 59 | feat_single = torch.cat(feat_single, dim=0) 60 | feats_branches.append(feat_single) 61 | return self.fuse_outside(torch.cat(feats_branches, dim=1)) 62 | 63 | 64 | if __name__ == '__main__': 65 | x = torch.randn(4, 2048, 20, 20) 66 | y = torch.randn(4, 2048, 20, 20) 67 | dcm = DCM(in_C=2048, out_C=20) 68 | print(dcm(x, y).size()) 69 | -------------------------------------------------------------------------------- /Dynamic Multi-scale Filters for Semantic Segmentation/readme.md: -------------------------------------------------------------------------------- 1 | # Dynamic Multi-scale Filters for Semantic Segmentation (ICCV 2019) 2 | 3 | ![](https://user-images.githubusercontent.com/26847524/74925080-8ec87a80-540e-11ea-9dd4-8209b3dd33bd.png) 4 | 5 | ## 主要结构 6 | 7 | ![](https://user-images.githubusercontent.com/26847524/74925122-a0aa1d80-540e-11ea-96d5-74e39fb2aa1a.png) 8 | 9 | ## 相关链接 10 | 11 | * 论文: http://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf 12 | * 代码: 未找到完整代码, 个人实现了其中的关键模块`DCM`, 见本文件夹下`DCM.py`文件 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorchCoding 2 | 3 | Some example of PyTorch about some papers 4 | 5 | This is mainly the code of the key modules in some papers. 6 | --------------------------------------------------------------------------------