├── ICNet ├── dataset.py ├── evaluate.py ├── evaluator │ ├── __init__.py │ ├── dataset.py │ ├── evaluator.py │ ├── fmeasure.py │ ├── mae.py │ └── smeasure.py ├── loss.py ├── network.py ├── solver.py ├── test.py ├── train.py └── utils.py ├── LICENSE ├── README.md └── thumbnail.png /ICNet/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image, ImageFile 3 | ImageFile.LOAD_TRUNCATED_IMAGES = True 4 | import torch 5 | import random 6 | import numpy as np 7 | from torch.utils import data 8 | import PIL.ImageOps 9 | import cv2 10 | import torchvision.transforms as transforms 11 | import matplotlib.pyplot as plt 12 | from os.path import join 13 | from os import listdir 14 | 15 | """ 16 | build_file_paths: 17 | When "file_names == None and group_names == None", 18 | traverse file folder to build "file_paths", "group_names", "file_names" and "indices". 19 | Otherwise, build "file_paths" based on given "file_names" and "group_names". 20 | """ 21 | def build_file_paths(base, group_names=None, file_names=None, suffix='.png'): 22 | if file_names == None and group_names == None: 23 | file_paths = [] 24 | group_names = [] 25 | file_names = [] 26 | indices = [] 27 | cur_group_end_index = 0 28 | for group_name in listdir(base): 29 | group_path = join(base, group_name) 30 | group_file_names = listdir(group_path) 31 | cur_group_end_index += len(group_file_names) 32 | 33 | # Save the ending index of current group into "indices", which is prepared for "Cosal_Sampler". 34 | indices.append(cur_group_end_index) 35 | 36 | for file_name in group_file_names: 37 | file_path = join(group_path, file_name) 38 | file_paths.append(file_path) 39 | group_names.append(group_name) 40 | file_names.append(file_name[:str(file_name).rfind('.')]) 41 | return file_paths, group_names, file_names, indices 42 | else: 43 | file_paths = list(map(lambda i: join(base, group_names[i], file_names[i] + suffix), range(len(file_names)))) 44 | return file_paths 45 | 46 | """ 47 | random_flip: 48 | Flip inputs horizontally with a possibility of 0.5. 49 | """ 50 | def random_flip(img, gt, sism): 51 | datas = (img, gt, sism) 52 | if random.random() > 0.5: 53 | datas = tuple(map(lambda data: transforms.functional.hflip(data) if data is not None else None, datas)) 54 | return datas 55 | 56 | 57 | class ImageData(data.Dataset): 58 | def __init__(self, roots, request, aug_transform=None, rgb_transform=None, gray_transform=None): 59 | if 'img' in request == False: 60 | raise Exception('\'img\' must be contained in \'request\'.') 61 | 62 | self.need_gt = True if 'gt' in request else False 63 | self.need_file_name = True if 'file_name' in request else False 64 | self.need_group_name = True if 'group_name' in request else False 65 | self.need_sism = True if 'sism' in request else False 66 | self.need_size = True if 'size' in request else False 67 | 68 | img_paths, group_names, file_names, indices = build_file_paths(roots['img']) 69 | gt_paths = build_file_paths(roots['gt'], group_names, file_names) if self.need_gt else None 70 | sism_paths = build_file_paths(roots['sism'], group_names, file_names) if self.need_sism else None 71 | 72 | self.img_paths = img_paths 73 | self.gt_paths = gt_paths 74 | self.sism_paths = sism_paths 75 | self.file_names = file_names 76 | self.group_names = group_names 77 | self.indices = indices 78 | self.aug_transform = aug_transform 79 | self.rgb_transform = rgb_transform 80 | self.gray_transform = gray_transform 81 | 82 | def __getitem__(self, item): 83 | img = Image.open(self.img_paths[item]).convert('RGB') 84 | W, H = img.size 85 | gt = Image.open(self.gt_paths[item]).convert('L') if self.need_gt else None 86 | sism = Image.open(self.sism_paths[item]).convert('L') if self.need_sism else None 87 | group_name = self.group_names[item] if self.need_group_name else None 88 | file_name = self.file_names[item] if self.need_file_name else None 89 | 90 | if self.aug_transform is not None: 91 | img, gt, sism = self.aug_transform(img, gt, sism) 92 | 93 | if self.rgb_transform is not None: 94 | img = self.rgb_transform(img) 95 | if self.gray_transform is not None and self.need_gt: 96 | gt = self.gray_transform(gt) 97 | if self.gray_transform is not None and self.need_sism: 98 | sism = self.gray_transform(sism) 99 | 100 | data_item = {} 101 | data_item['img'] = img 102 | if self.need_gt: data_item['gt'] = gt 103 | if self.need_sism: data_item['sism'] = sism 104 | if self.need_file_name: data_item['file_name'] = file_name 105 | if self.need_group_name: data_item['group_name'] = group_name 106 | if self.need_size: data_item['size'] = (H, W) 107 | return data_item 108 | 109 | def __len__(self): 110 | return len(self.img_paths) 111 | 112 | 113 | """ 114 | Cosal_Sampler: 115 | Provide indices of each batch, ensuring that each batch data is extracted from the same image group (with the same category). 116 | """ 117 | class Cosal_Sampler(data.Sampler): 118 | def __init__(self, indices, shuffle, batch_size): 119 | self.indices = indices 120 | self.shuffle = shuffle 121 | self.batch_size = batch_size 122 | self.len = None 123 | self.batches_indices = None 124 | self.reset_batches_indices() 125 | 126 | def reset_batches_indices(self): 127 | batches_indices = [] 128 | start_idx = 0 129 | # For each image group (with same category): 130 | for end_idx in self.indices: 131 | # Initalize "group_indices". 132 | group_indices = list(range(start_idx, end_idx)) 133 | 134 | # Shuffle "group_indices" if needed. 135 | if self.shuffle: 136 | np.random.shuffle(group_indices) 137 | 138 | # Get the size of current image group. 139 | num = end_idx - start_idx 140 | 141 | # Split "group_indices" to multiple batches according to "self.batch_size", 142 | # then append the splited indices ("batch_indices") to "batches_indices". 143 | # Note that, when "self.batch_size == None", each image group is regarded as a batch ("batch_size = num"). 144 | idx = 0 145 | while idx < num: 146 | batch_size = num if self.batch_size == None else self.batch_size 147 | batch_indices = group_indices[idx:idx + batch_size] 148 | batches_indices.append(batch_indices) 149 | idx += batch_size 150 | start_idx = end_idx 151 | 152 | # Each entry of "batches_indices" is a list indicating indices of a specific batch, 153 | # but neighbouring entries basically belongs to the same image group (with same category). 154 | # Thus, shuffle "batches_indices" if needed. 155 | if self.shuffle: 156 | np.random.shuffle(batches_indices) 157 | 158 | self.len = len(batches_indices) 159 | self.batches_indices = batches_indices 160 | 161 | def __iter__(self): 162 | if self.shuffle: 163 | self.reset_batches_indices() 164 | return iter(self.batches_indices) 165 | 166 | def __len__(self): 167 | return self.len 168 | 169 | 170 | def get_loader(roots, request, batch_size, data_aug, shuffle, num_thread=4, pin=True): 171 | aug_transform = random_flip if data_aug else None 172 | rgb_transform = transforms.Compose([ 173 | transforms.Resize([224, 224]), 174 | transforms.ToTensor(), 175 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 176 | ]) 177 | gray_transform = transforms.Compose([ 178 | transforms.Resize([224, 224]), 179 | transforms.ToTensor() 180 | ]) 181 | dataset = ImageData(roots, request, aug_transform=aug_transform, rgb_transform=rgb_transform, gray_transform=gray_transform) 182 | cosal_sampler = Cosal_Sampler(indices=dataset.indices, shuffle=shuffle, batch_size=batch_size) 183 | data_loader = data.DataLoader(dataset=dataset, batch_sampler=cosal_sampler, num_workers=num_thread, pin_memory=pin) 184 | return data_loader -------------------------------------------------------------------------------- /ICNet/evaluate.py: -------------------------------------------------------------------------------- 1 | import os 2 | from evaluator.evaluator import evaluate_dataset 3 | from utils import write_doc 4 | 5 | """ 6 | * Note: 7 | The evaluation codes in "./evaluator/" are implemented in PyTorch (GPU-version) for acceleration. 8 | 9 | Since some GTs (e.g. in "Cosal2015" dataset) are of too large original sizes to be evaluated on GPU with limited memory 10 | (our "TITAN Xp" runs out of 12G memory when computing F-measure), the input prediction map and corresponding GT 11 | are resized to 224*224 by our evaluation codes before computing metrics. 12 | """ 13 | 14 | """ 15 | evaluate: 16 | Given predictions, compute multiple metrics (max F-measure, S-measure and MAE). 17 | The evaluation results are saved in "doc_path". 18 | """ 19 | def evaluate(roots, doc_path, num_thread, pin): 20 | datasets = roots.keys() 21 | for dataset in datasets: 22 | # Evaluate predictions of "dataset". 23 | results = evaluate_dataset(roots=roots[dataset], 24 | dataset=dataset, 25 | batch_size=1, 26 | num_thread=num_thread, 27 | demical=True, 28 | suffixes={'gt': '.png', 'pred': '.png'}, 29 | pin=pin) 30 | 31 | # Save evaluation results. 32 | content = '{}:\n'.format(dataset) 33 | content += 'max-Fmeasure={}'.format(results['max_f']) 34 | content += ' ' 35 | content += 'Smeasure={}'.format(results['s']) 36 | content += ' ' 37 | content += 'MAE={}\n'.format(results['mae']) 38 | write_doc(doc_path, content) 39 | content = '\n' 40 | write_doc(doc_path, content) 41 | 42 | """ 43 | Evaluation settings (used for "evaluate.py"): 44 | 45 | eval_device: 46 | Index of the GPU used for evaluation. 47 | 48 | eval_doc_path: 49 | Path of the file (".txt") used to save the evaluation results. 50 | 51 | eval_roots: 52 | A dictionary including multiple sub-dictionary, 53 | each sub-dictionary contains the GT and prediction folder paths of a specific dataset. 54 | Format: 55 | eval_roots = { 56 | name of dataset_1: { 57 | 'gt': GT folder path of dataset_1, 58 | 'pred': prediction folder path of dataset_1 59 | }, 60 | name of dataset_2: { 61 | 'gt': GT folder path of dataset_2, 62 | 'pred': prediction folder path of dataset_2 63 | } 64 | . 65 | . 66 | . 67 | } 68 | """ 69 | 70 | eval_device = '0' 71 | eval_doc_path = './evaluation.txt' 72 | eval_num_thread = 4 73 | 74 | # An example to build "eval_roots". 75 | eval_roots = dict() 76 | datasets = ['MSRC', 'iCoSeg', 'CoSal2015', 'CoSOD3k', 'CoCA'] 77 | 78 | for dataset in datasets: 79 | roots = {'gt': '/mnt/jwd/data/{}/gt_bilinear_224/'.format(dataset), 80 | 'pred': './pred/{}/'.format(dataset)} 81 | eval_roots[dataset] = roots 82 | # ------------- end ------------- 83 | 84 | if __name__ == "__main__": 85 | os.environ['CUDA_VISIBLE_DEVICES'] = eval_device 86 | evaluate(roots=eval_roots, 87 | doc_path=eval_doc_path, 88 | num_thread=eval_num_thread, 89 | pin=False) -------------------------------------------------------------------------------- /ICNet/evaluator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blanclist/ICNet/f0d5e463839caa2e39a6fc042da22284b7d4c9d9/ICNet/evaluator/__init__.py -------------------------------------------------------------------------------- /ICNet/evaluator/dataset.py: -------------------------------------------------------------------------------- 1 | from os import listdir 2 | from os.path import join 3 | from PIL import Image, ImageFile 4 | ImageFile.LOAD_TRUNCATED_IMAGES = True 5 | from torch.utils import data 6 | import torchvision.transforms as transforms 7 | 8 | def build_file_paths(roots, suffixes): 9 | pred_base = roots['pred'] 10 | gt_base = roots['gt'] 11 | pred_suffix = suffixes['pred'] 12 | gt_suffix = suffixes['gt'] 13 | 14 | pred_paths = [] 15 | gt_paths = [] 16 | group_names = listdir(pred_base) 17 | for group_name in group_names: 18 | group_pred_names = list(filter(lambda name: name.endswith(pred_suffix), listdir(join(pred_base, group_name)))) 19 | pred_paths += list(map(lambda pred_name: join(pred_base, group_name, pred_name), group_pred_names)) 20 | gt_paths += list(map(lambda pred_name: join(gt_base, group_name, pred_name[:-len(pred_suffix)] + gt_suffix), group_pred_names)) 21 | return gt_paths, pred_paths 22 | 23 | 24 | class ImageData(data.Dataset): 25 | def __init__(self, roots, suffixes): 26 | gt_paths, pred_paths = build_file_paths(roots, suffixes) 27 | 28 | self.gt_paths = gt_paths 29 | self.pred_paths = pred_paths 30 | 31 | def __getitem__(self, item): 32 | gt = Image.open(self.gt_paths[item]).convert('L') 33 | pred = Image.open(self.pred_paths[item]).convert('L') 34 | 35 | transform = transforms.Compose([ 36 | transforms.Resize([224, 224]), 37 | transforms.ToTensor() 38 | ]) 39 | gt, pred = transform(gt), transform(pred) 40 | 41 | data_item = {} 42 | data_item['pred'] = pred 43 | data_item['gt'] = gt 44 | return data_item 45 | 46 | def __len__(self): 47 | return len(self.pred_paths) 48 | 49 | 50 | def get_loader(roots, suffixes, batch_size, num_thread, pin=True): 51 | dataset = ImageData(roots, suffixes) 52 | data_loader = data.DataLoader(dataset=dataset, shuffle=False, batch_size=batch_size, num_workers=num_thread, pin_memory=pin) 53 | return data_loader -------------------------------------------------------------------------------- /ICNet/evaluator/evaluator.py: -------------------------------------------------------------------------------- 1 | from .dataset import get_loader 2 | from .smeasure import calc_smeasure 3 | from .fmeasure import calc_p_r_fmeasure 4 | from .mae import calc_mae 5 | import numpy as np 6 | import torch 7 | from decimal import Decimal 8 | 9 | def tf(data): 10 | return float(data) 11 | 12 | def tn(data): 13 | return np.array(data.cpu()) 14 | 15 | def td(data): 16 | return Decimal(data).quantize(Decimal('0.000')) 17 | 18 | def get_n(gt, pred, n_mask): 19 | H, W = gt.shape 20 | HW = H * W 21 | n_gt = gt.view(1, HW).repeat(255, 1) # [255, HW] 22 | n_pred = pred.view(1, HW).repeat(255, 1) # [255, HW] 23 | n_pred = torch.where(n_pred <= n_mask, torch.zeros_like(n_pred), torch.ones_like(n_pred)) 24 | return n_gt, n_pred 25 | 26 | def evaluate_dataset(roots, dataset, batch_size, num_thread, demical, suffixes, pin): 27 | with torch.no_grad(): 28 | dataloader = get_loader(roots, suffixes, batch_size, num_thread, pin=pin) 29 | p = np.zeros(255) 30 | r = np.zeros(255) 31 | s = 0.0 32 | f = np.zeros(255) 33 | mae = 0.0 34 | n_mask = torch.FloatTensor(np.array(range(255)) / 255.0).view(255, 1).repeat(1, 224 * 224).cuda() # [255, HW] 35 | for batch in dataloader: 36 | gt, pred = batch['gt'].cuda().view(224, 224), batch['pred'].cuda().view(224, 224) 37 | 38 | _s = calc_smeasure(gt, pred) 39 | _mae = calc_mae(gt, pred) 40 | n_gt, n_pred = get_n(gt, pred, n_mask) 41 | _p, _r, _f = calc_p_r_fmeasure(n_gt, n_pred, n_mask) 42 | _mean_f = torch.mean(_f) 43 | _max_f = torch.max(_f) 44 | 45 | _s = tf(_s) 46 | _p = tn(_p) 47 | _r = tn(_r) 48 | _f = tn(_f) 49 | _mae = tf(_mae) 50 | _mean_f = tf(_mean_f) 51 | _max_f = tf(_max_f) 52 | 53 | p += _p 54 | r += _r 55 | s += _s 56 | f += _f 57 | mae += _mae 58 | num = len(dataloader) 59 | p /= num 60 | r /= num 61 | f /= num 62 | s, mae, mean_f, max_f = s / num, mae / num, np.mean(f), np.max(f) 63 | if demical == True: 64 | s, mae, mean_f, max_f = td(s), td(mae), td(mean_f), td(max_f) 65 | 66 | results = {'s': s, 'p': p, 'r': r, 'f': f, 67 | 'mae': mae, 68 | 'mean_f': mean_f, 'max_f': max_f} 69 | return results -------------------------------------------------------------------------------- /ICNet/evaluator/fmeasure.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def calc_p_r_fmeasure(n_gt, n_pred, n_mask): 4 | tp = torch.sum(n_pred * n_gt, dim=1) # [255] 5 | tp_plus_fp = torch.sum(n_pred, dim=1) # [255] 6 | temp = torch.ones_like(tp_plus_fp) 7 | tp_plus_fp = torch.where(tp_plus_fp == 0.0, temp, tp_plus_fp) 8 | tp_plus_fn = torch.sum(n_gt, dim=1) # [255] 9 | tp_plus_fn = torch.where(tp_plus_fn == 0.0, temp, tp_plus_fn) 10 | precision = tp / tp_plus_fp 11 | recall = tp / tp_plus_fn 12 | a = 1.3 * precision * recall 13 | b = 0.3 * precision + recall 14 | temp = torch.ones_like(b) * 1e31 15 | b = torch.where(b == 0.0, temp, b) 16 | fBetaScore = a / b 17 | return precision, recall, fBetaScore -------------------------------------------------------------------------------- /ICNet/evaluator/mae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def calc_mae(gt, pred): 4 | return torch.mean(torch.abs(gt - pred)) -------------------------------------------------------------------------------- /ICNet/evaluator/smeasure.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def calc_smeasure(gt, pred): 5 | def mean(x): 6 | return torch.mean(x) 7 | 8 | def cov(x, y): 9 | mean_x = mean(x) 10 | mean_y = mean(y) 11 | return torch.mean((x - mean_x) * (y - mean_y)) 12 | 13 | def ssim(x, y): 14 | mean_x = mean(x) 15 | mean_y = mean(y) 16 | cov_x_x = cov(x, x) 17 | cov_y_y = cov(y, y) 18 | cov_x_y = cov(x, y) 19 | a = 4.0 * mean_x * mean_y * cov_x_y 20 | b = (mean_x ** 2 + mean_y ** 2) * (cov_x_x + cov_y_y) 21 | return a / (b + 1e-12) 22 | 23 | def O(x, mask): 24 | mean = torch.sum(x * mask) / (1e-12 + torch.sum(mask)) 25 | var = torch.sqrt(torch.sum(((x - mean) ** 2) * mask) / (1e-12 + torch.sum(mask))) 26 | return mean * 2.0 / (1.0 + mean ** 2 + var) 27 | 28 | def centroid(y): 29 | h, w = y.shape 30 | total = 1e-12 + torch.sum(y) 31 | hw = int(torch.round(torch.sum(torch.sum(y, axis=0) * torch.from_numpy(np.array(range(1, 1 + w))).cuda()) / total)) 32 | hh = int(torch.round(torch.sum(torch.sum(y, axis=1) * torch.from_numpy(np.array(range(1, 1 + h))).cuda()) / total)) 33 | 34 | area = h * w 35 | w1 = hh * hw / area 36 | w2 = hh * (w - hw) / area 37 | w3 = (h - hh) * hw / area 38 | w4 = 1.0 - w1 - w2 - w3 39 | return hh, hw, h, w, w1, w2, w3, w4 40 | 41 | def seg(x, hh, hw, h, w): 42 | x1 = x[0:hh, 0:hw] 43 | x2 = x[0:hh, hw:w] 44 | x3 = x[hh:h, 0:hw] 45 | x4 = x[hh:h, hw:w] 46 | return x1, x2, x3, x4 47 | 48 | def Sr(x, y): 49 | hh, hw, h, w, w1, w2, w3, w4 = centroid(y) 50 | x1, x2, x3, x4 = seg(x, hh, hw, h, w) 51 | y1, y2, y3, y4 = seg(y, hh, hw, h, w) 52 | return ssim(x1, y1) * w1 + ssim(x2, y2) * w2 + ssim(x3, y3) * w3 + ssim(x4, y4) * w4 53 | 54 | def So(x, y): 55 | mu = mean(y) 56 | return O(x, y) * mu + O(1.0 - x, 1.0 - y) * (1.0 - mu) 57 | 58 | def Sm(x, y): 59 | return Sr(x, y) * 0.5 + So(x, y) * 0.5 60 | 61 | return Sm(pred, gt) -------------------------------------------------------------------------------- /ICNet/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | """ 4 | IoU_loss: 5 | Compute IoU loss between predictions and ground-truths for training [Equation 3]. 6 | """ 7 | def IoU_loss(preds_list, gt): 8 | preds = torch.cat(preds_list, dim=1) 9 | N, C, H, W = preds.shape 10 | min_tensor = torch.where(preds < gt, preds, gt) # shape=[N, C, H, W] 11 | max_tensor = torch.where(preds > gt, preds, gt) # shape=[N, C, H, W] 12 | min_sum = min_tensor.view(N, C, H * W).sum(dim=2) # shape=[N, C] 13 | max_sum = max_tensor.view(N, C, H * W).sum(dim=2) # shape=[N, C] 14 | loss = 1 - (min_sum / max_sum).mean() 15 | return loss -------------------------------------------------------------------------------- /ICNet/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | import numpy as np 4 | import torch.nn.functional as F 5 | from torch import nn 6 | from torch.nn import init 7 | from os.path import join 8 | np.set_printoptions(suppress=True, threshold=1e5) 9 | 10 | """ 11 | resize: 12 | Resize tensor (shape=[N, C, H, W]) to the target size (default: 224*224). 13 | """ 14 | def resize(input, target_size=(224, 224)): 15 | return F.interpolate(input, (target_size[0], target_size[1]), mode='bilinear', align_corners=True) 16 | 17 | """ 18 | weights_init: 19 | Weights initialization. 20 | """ 21 | def weights_init(module): 22 | if isinstance(module, nn.Conv2d): 23 | init.normal_(module.weight, 0, 0.01) 24 | if module.bias is not None: 25 | init.constant_(module.bias, 0) 26 | elif isinstance(module, nn.BatchNorm2d): 27 | init.constant_(module.weight, 1) 28 | init.constant_(module.bias, 0) 29 | 30 | 31 | """" 32 | VGG16: 33 | VGG16 backbone. 34 | """ 35 | class VGG16(nn.Module): 36 | def __init__(self): 37 | super(VGG16, self).__init__() 38 | layers = [] 39 | in_channel = 3 40 | vgg_out_channels = (64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M') 41 | for out_channel in vgg_out_channels: 42 | if out_channel == 'M': 43 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 44 | else: 45 | conv2d = nn.Conv2d(in_channel, out_channel, 3, 1, 1) 46 | layers += [conv2d, nn.ReLU(inplace=True)] 47 | in_channel = out_channel 48 | self.vgg = nn.ModuleList(layers) 49 | self.table = {'conv1_1': 0, 'conv1_2': 2, 'conv1_2_mp': 4, 50 | 'conv2_1': 5, 'conv2_2': 7, 'conv2_2_mp': 9, 51 | 'conv3_1': 10, 'conv3_2': 12, 'conv3_3': 14, 'conv3_3_mp': 16, 52 | 'conv4_1': 17, 'conv4_2': 19, 'conv4_3': 21, 'conv4_3_mp': 23, 53 | 'conv5_1': 24, 'conv5_2': 26, 'conv5_3': 28, 'conv5_3_mp': 30, 'final': 31} 54 | 55 | def forward(self, feats, start_layer_name, end_layer_name): 56 | start_idx = self.table[start_layer_name] 57 | end_idx = self.table[end_layer_name] 58 | for idx in range(start_idx, end_idx): 59 | feats = self.vgg[idx](feats) 60 | return feats 61 | 62 | 63 | """ 64 | Prediction: 65 | Compress the channel of input features to 1, then predict maps with sigmoid function. 66 | """ 67 | class Prediction(nn.Module): 68 | def __init__(self, in_channel): 69 | super(Prediction, self).__init__() 70 | self.pred = nn.Sequential(nn.Conv2d(in_channel, 1, 1), nn.Sigmoid()) 71 | 72 | def forward(self, feats): 73 | pred = self.pred(feats) 74 | return pred 75 | 76 | 77 | """ 78 | Res: 79 | Two convolutional layers with residual structure. 80 | """ 81 | class Res(nn.Module): 82 | def __init__(self, in_channel): 83 | super(Res, self).__init__() 84 | self.conv = nn.Sequential(nn.Conv2d(in_channel, in_channel, 3, 1, 1), 85 | nn.BatchNorm2d(in_channel), nn.ReLU(inplace=True), 86 | nn.Conv2d(in_channel, in_channel, 3, 1, 1)) 87 | 88 | def forward(self, feats): 89 | feats = feats + self.conv(feats) 90 | feats = F.relu(feats, inplace=True) 91 | return feats 92 | 93 | """ 94 | Cosal_Module: 95 | Given features extracted from the VGG16 backbone, 96 | exploit SISMs to build intra cues and inter cues. 97 | """ 98 | class Cosal_Module(nn.Module): 99 | def __init__(self, H, W): 100 | super(Cosal_Module, self).__init__() 101 | self.cosal_feat = Cosal_Sub_Module(H, W) 102 | self.conv = nn.Sequential(nn.Conv2d(256, 128, 1), Res(128)) 103 | 104 | def forward(self, feats, SISMs): 105 | # Get foreground co-saliency features. 106 | fore_cosal_feats = self.cosal_feat(feats, SISMs) 107 | 108 | # Get background co-saliency features. 109 | back_cosal_feats = self.cosal_feat(feats, 1.0 - SISMs) 110 | 111 | # Fuse foreground and background co-saliency features 112 | # to generate co-saliency enhanced features. 113 | cosal_enhanced_feats = self.conv(torch.cat([fore_cosal_feats, back_cosal_feats], dim=1)) 114 | return cosal_enhanced_feats 115 | 116 | """ 117 | Cosal_Sub_Module: 118 | * The kernel module of ICNet. 119 | Generate foreground/background co-salient features by using SISMs. 120 | """ 121 | class Cosal_Sub_Module(nn.Module): 122 | def __init__(self, H, W): 123 | super(Cosal_Sub_Module, self).__init__() 124 | channel = H * W 125 | self.conv = nn.Sequential(nn.Conv2d(channel, 128, 1), Res(128)) 126 | 127 | def forward(self, feats, SISMs): 128 | N, C, H, W = feats.shape 129 | HW = H * W 130 | 131 | # Resize SISMs to the same size as the input feats. 132 | SISMs = resize(SISMs, [H, W]) # shape=[N, 1, H, W] 133 | 134 | # NFs: L2-normalized features. 135 | NFs = F.normalize(feats, dim=1) # shape=[N, C, H, W] 136 | 137 | def CFM(SIVs, NFs): 138 | # Compute correlation maps [Figure 4] between SIVs and pixel-wise feature vectors in NFs by inner product. 139 | # We implement this process by ``F.conv2d()'', which takes SIVs as 1*1 kernels to convolve NFs. 140 | correlation_maps = F.conv2d(NFs, weight=SIVs) # shape=[N, N, H, W] 141 | 142 | # Vectorize and normalize correlation maps. 143 | correlation_maps = F.normalize(correlation_maps.reshape(N, N, HW), dim=2) # shape=[N, N, HW] 144 | 145 | # Compute the weight vectors [Equation 2]. 146 | correlation_matrix = torch.matmul(correlation_maps, correlation_maps.permute(0, 2, 1)) # shape=[N, N, N] 147 | weight_vectors = correlation_matrix.sum(dim=2).softmax(dim=1) # shape=[N, N] 148 | 149 | # Fuse correlation maps with the weight vectors to build co-salient attention (CSA) maps. 150 | CSA_maps = torch.sum(correlation_maps * weight_vectors.view(N, N, 1), dim=1) # shape=[N, HW] 151 | 152 | # Max-min normalize CSA maps. 153 | min_value = torch.min(CSA_maps, dim=1, keepdim=True)[0] 154 | max_value = torch.max(CSA_maps, dim=1, keepdim=True)[0] 155 | CSA_maps = (CSA_maps - min_value) / (max_value - min_value + 1e-12) # shape=[N, HW] 156 | CSA_maps = CSA_maps.view(N, 1, H, W) # shape=[N, 1, H, W] 157 | return CSA_maps 158 | 159 | def get_SCFs(NFs): 160 | NFs = NFs.view(N, C, HW) # shape=[N, C, HW] 161 | SCFs = torch.matmul(NFs.permute(0, 2, 1), NFs).view(N, -1, H, W) # shape=[N, HW, H, W] 162 | return SCFs 163 | 164 | # Compute SIVs [Section 3.2, Equation 1]. 165 | SIVs = F.normalize((NFs * SISMs).mean(dim=3).mean(dim=2), dim=1).view(N, C, 1, 1) # shape=[N, C, 1, 1] 166 | 167 | # Compute co-salient attention (CSA) maps [Section 3.3]. 168 | CSA_maps = CFM(SIVs, NFs) # shape=[N, 1, H, W] 169 | 170 | # Compute self-correlation features (SCFs) [Section 3.4]. 171 | SCFs = get_SCFs(NFs) # shape=[N, HW, H, W] 172 | 173 | # Rearrange the channel order of SCFs to obtain RSCFs [Section 3.4]. 174 | evidence = CSA_maps.view(N, HW) # shape=[N, HW] 175 | indices = torch.argsort(evidence, dim=1, descending=True).view(N, HW, 1, 1).repeat(1, 1, H, W) # shape=[N, HW, H, W] 176 | RSCFs = torch.gather(SCFs, dim=1, index=indices) # shape=[N, HW, H, W] 177 | cosal_feat = self.conv(RSCFs * CSA_maps) # shape=[N, 128, H, W] 178 | return cosal_feat 179 | 180 | """ 181 | Decoder_Block: 182 | U-net like decoder block that fuses co-saliency features and low-level features for upsampling. 183 | """ 184 | class Decoder_Block(nn.Module): 185 | def __init__(self, in_channel): 186 | super(Decoder_Block, self).__init__() 187 | self.cmprs = nn.Conv2d(in_channel, 32, 1) 188 | self.merge_conv = nn.Sequential(nn.Conv2d(96, 96, 3, 1, 1), nn.BatchNorm2d(96), nn.ReLU(inplace=True), 189 | nn.Conv2d(96, 32, 3, 1, 1), nn.BatchNorm2d(32), nn.ReLU(inplace=True)) 190 | self.pred = Prediction(32) 191 | 192 | def forward(self, low_level_feats, cosal_map, SISMs, old_feats): 193 | _, _, H, W = low_level_feats.shape 194 | # Adjust cosal_map, SISMs and old_feats to the same spatial size as low_level_feats. 195 | cosal_map = resize(cosal_map, [H, W]) 196 | SISMs = resize(SISMs, [H, W]) 197 | old_feats = resize(old_feats, [H, W]) 198 | 199 | # Predict co-saliency maps with the size of H*W. 200 | cmprs = self.cmprs(low_level_feats) 201 | new_feats = self.merge_conv(torch.cat([cmprs * cosal_map, 202 | cmprs * SISMs, 203 | old_feats], dim=1)) 204 | new_cosal_map = self.pred(new_feats) 205 | return new_feats, new_cosal_map 206 | 207 | 208 | """ 209 | ICNet: 210 | The entire ICNet. 211 | Given a group of images and corresponding SISMs, ICNet outputs a group of co-saliency maps (predictions) at once. 212 | """ 213 | class ICNet(nn.Module): 214 | def __init__(self): 215 | super(ICNet, self).__init__() 216 | self.vgg = VGG16() 217 | self.Co6 = Cosal_Module(7, 7) 218 | self.Co5 = Cosal_Module(14, 14) 219 | self.Co4 = Cosal_Module(28, 28) 220 | self.conv6_cmprs = nn.Sequential(nn.MaxPool2d(2, 2), nn.Conv2d(512, 128, 1), 221 | nn.Conv2d(128, 128, 3, 1, 1), nn.BatchNorm2d(128), nn.ReLU(inplace=True), 222 | nn.Conv2d(128, 128, 3, 1, 1), nn.BatchNorm2d(128), nn.ReLU(inplace=True), 223 | nn.Conv2d(128, 128, 3, 1, 1)) 224 | self.conv5_cmprs = nn.Conv2d(512, 256, 1) 225 | self.conv4_cmprs = nn.Conv2d(512, 256, 1) 226 | 227 | self.merge_co_56 = Res(128) 228 | self.merge_co_45 = nn.Sequential(Res(128), nn.Conv2d(128, 32, 1)) 229 | self.get_pred_4 = Prediction(32) 230 | self.refine_3 = Decoder_Block(256) 231 | self.refine_2 = Decoder_Block(128) 232 | self.refine_1 = Decoder_Block(64) 233 | 234 | def forward(self, image_group, SISMs, is_training): 235 | # Extract features from the VGG16 backbone. 236 | conv1_2 = self.vgg(image_group, 'conv1_1', 'conv1_2_mp') # shape=[N, 64, 224, 224] 237 | conv2_2 = self.vgg(conv1_2, 'conv1_2_mp', 'conv2_2_mp') # shape=[N, 128, 112, 112] 238 | conv3_3 = self.vgg(conv2_2, 'conv2_2_mp', 'conv3_3_mp') # shape=[N, 256, 56, 56] 239 | conv4_3 = self.vgg(conv3_3, 'conv3_3_mp', 'conv4_3_mp') # shape=[N, 512, 28, 28] 240 | conv5_3 = self.vgg(conv4_3, 'conv4_3_mp', 'conv5_3_mp') # shape=[N, 512, 14, 14] 241 | 242 | # Compress the channels of high-level features. 243 | conv6_cmprs = self.conv6_cmprs(conv5_3) # shape=[N, 128, 7, 7] 244 | conv5_cmprs = self.conv5_cmprs(conv5_3) # shape=[N, 256, 14, 14] 245 | conv4_cmprs = self.conv4_cmprs(conv4_3) # shape=[N, 256, 28, 28] 246 | 247 | # Obtain co-saliancy features. 248 | cosal_feat_6 = self.Co6(conv6_cmprs, SISMs) # shape=[N, 128, 7, 7] 249 | cosal_feat_5 = self.Co5(conv5_cmprs, SISMs) # shape=[N, 128, 14, 14] 250 | cosal_feat_4 = self.Co4(conv4_cmprs, SISMs) # shape=[N, 128, 28, 28] 251 | 252 | # Merge co-saliancy features and predict co-saliency maps with size of 28*28 (i.e., "cosal_map_4"). 253 | feat_56 = self.merge_co_56(cosal_feat_5 + resize(cosal_feat_6, [14, 14])) # shape=[N, 128, 14, 14] 254 | feat_45 = self.merge_co_45(cosal_feat_4 + resize(feat_56, [28, 28])) # shape=[N, 128, 28, 28] 255 | cosal_map_4 = self.get_pred_4(feat_45) # shape=[N, 1, 28, 28] 256 | 257 | # Obtain co-saliency maps with size of 224*224 (i.e., "cosal_map_1") by progressively upsampling. 258 | feat_34, cosal_map_3 = self.refine_3(conv3_3, cosal_map_4, SISMs, feat_45) 259 | feat_23, cosal_map_2 = self.refine_2(conv2_2, cosal_map_4, SISMs, feat_34) 260 | _, cosal_map_1 = self.refine_1(conv1_2, cosal_map_4, SISMs, feat_23) # shape=[N, 1, 224, 224] 261 | 262 | # Return predicted co-saliency maps. 263 | if is_training: 264 | preds_list = [resize(cosal_map_4), resize(cosal_map_3), resize(cosal_map_2), cosal_map_1] 265 | return preds_list 266 | else: 267 | preds = cosal_map_1 268 | return preds 269 | -------------------------------------------------------------------------------- /ICNet/solver.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.optim import Adam 3 | import network 4 | from loss import IoU_loss 5 | import numpy as np 6 | import cv2 7 | from dataset import get_loader 8 | from os.path import join 9 | import random 10 | from utils import mkdir, write_doc, get_time 11 | 12 | 13 | class Solver(object): 14 | def __init__(self): 15 | self.ICNet = network.ICNet().cuda() 16 | 17 | def train(self, roots, init_epoch, end_epoch, learning_rate, batch_size, weight_decay, ckpt_root, doc_path, num_thread, pin, vgg_path=None): 18 | # Define Adam optimizer. 19 | optimizer = Adam(self.ICNet.parameters(), 20 | lr=learning_rate, 21 | weight_decay=weight_decay) 22 | 23 | # Load ".pth" to initialize model. 24 | if init_epoch == 0: 25 | # From pre-trained VGG16. 26 | self.ICNet.apply(network.weights_init) 27 | self.ICNet.vgg.vgg.load_state_dict(torch.load(vgg_path)) 28 | else: 29 | # From the existed checkpoint file. 30 | ckpt = torch.load(join(ckpt_root, 'Weights_{}.pth'.format(init_epoch))) 31 | self.ICNet.load_state_dict(ckpt['state_dict']) 32 | optimizer.load_state_dict(ckpt['optimizer']) 33 | 34 | # Define training dataloader. 35 | train_dataloader = get_loader(roots=roots, 36 | request=('img', 'gt', 'sism'), 37 | shuffle=True, 38 | batch_size=batch_size, 39 | data_aug=True, 40 | num_thread=num_thread, 41 | pin=pin) 42 | 43 | # Train. 44 | self.ICNet.train() 45 | for epoch in range(init_epoch + 1, end_epoch): 46 | start_time = get_time() 47 | loss_sum = 0.0 48 | 49 | for data_batch in train_dataloader: 50 | self.ICNet.zero_grad() 51 | 52 | # Obtain a batch of data. 53 | img, gt, sism = data_batch['img'], data_batch['gt'], data_batch['sism'] 54 | img, gt, sism = img.cuda(), gt.cuda(), sism.cuda() 55 | 56 | if len(img) == 1: 57 | # Skip this iteration when training batchsize is 1 due to Batch Normalization. 58 | continue 59 | 60 | # Forward. 61 | preds_list = self.ICNet(image_group=img, 62 | SISMs=sism, 63 | is_training=True) 64 | 65 | # Compute IoU loss. 66 | loss = IoU_loss(preds_list, gt) 67 | 68 | # Backward. 69 | loss.backward() 70 | optimizer.step() 71 | loss_sum = loss_sum + loss.detach().item() 72 | 73 | # Save the checkpoint file (".pth") after each epoch. 74 | mkdir(ckpt_root) 75 | torch.save({'optimizer': optimizer.state_dict(), 76 | 'state_dict': self.ICNet.state_dict()}, join(ckpt_root, 'Weights_{}.pth'.format(epoch))) 77 | 78 | # Compute average loss over the training dataset approximately. 79 | loss_mean = loss_sum / len(train_dataloader) 80 | end_time = get_time() 81 | 82 | # Record training information (".txt"). 83 | content = 'CkptIndex={}: TrainLoss={} LR={} Time={}\n'.format(epoch, loss_mean, learning_rate, end_time - start_time) 84 | write_doc(doc_path, content) 85 | 86 | def test(self, roots, ckpt_path, pred_root, num_thread, batch_size, original_size, pin): 87 | with torch.no_grad(): 88 | # Load the specified checkpoint file(".pth"). 89 | state_dict = torch.load(ckpt_path)['state_dict'] 90 | self.ICNet.load_state_dict(state_dict) 91 | self.ICNet.eval() 92 | 93 | # Get names of the test datasets. 94 | datasets = roots.keys() 95 | 96 | # Test ICNet on each dataset. 97 | for dataset in datasets: 98 | # Define test dataloader for the current test dataset. 99 | test_dataloader = get_loader(roots=roots[dataset], 100 | request=('img', 'sism', 'file_name', 'group_name', 'size'), 101 | shuffle=False, 102 | data_aug=False, 103 | num_thread=num_thread, 104 | batch_size=batch_size, 105 | pin=pin) 106 | 107 | # Create a folder for the current test dataset for saving predictions. 108 | mkdir(pred_root) 109 | cur_dataset_pred_root = join(pred_root, dataset) 110 | mkdir(cur_dataset_pred_root) 111 | 112 | for data_batch in test_dataloader: 113 | # Obtain a batch of data. 114 | img, sism = data_batch['img'].cuda(), data_batch['sism'].cuda() 115 | 116 | # Forward. 117 | preds = self.ICNet(image_group=img, 118 | SISMs=sism, 119 | is_training=False) 120 | 121 | # Create a folder for the current batch according to its "group_name" for saving predictions. 122 | group_name = data_batch['group_name'][0] 123 | cur_group_pred_root = join(cur_dataset_pred_root, group_name) 124 | mkdir(cur_group_pred_root) 125 | 126 | # preds.shape: [N, 1, H, W]->[N, H, W, 1] 127 | preds = preds.permute(0, 2, 3, 1).cpu().numpy() 128 | 129 | # Make paths where predictions will be saved. 130 | pred_paths = list(map(lambda file_name: join(cur_group_pred_root, file_name + '.png'), data_batch['file_name'])) 131 | 132 | # For each prediction: 133 | for i, pred_path in enumerate(pred_paths): 134 | # Resize the prediction to the original size when "original_size == True". 135 | H, W = data_batch['size'][0][i], data_batch['size'][1][i] 136 | pred = cv2.resize(preds[i], (W, H)) if original_size else preds[i] 137 | 138 | # Save the prediction. 139 | cv2.imwrite(pred_path, np.array(pred * 255)) -------------------------------------------------------------------------------- /ICNet/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from solver import Solver 3 | 4 | """ 5 | Test settings (used for "test.py"): 6 | 7 | test_device: 8 | Index of the GPU used for test. 9 | 10 | test_batch_size: 11 | Test batchsize. 12 | * When "test_batch_size == None", the dataloader takes the whole image group as a batch to 13 | perform the test (regardless of the size of the image group). If your GPU does not have enough memory, 14 | you are suggested to set "test_batch_size" with a small number (e.g. test_batch_size = 10). 15 | 16 | pred_root: 17 | Folder path for saving predictions (co-saliency maps). 18 | 19 | ckpt_path: 20 | Path of the checkpoint file (".pth") loaded for test. 21 | 22 | original_size: 23 | When "original_size == True", the prediction (224*224) of ICNet will be resized to the original size. 24 | 25 | test_roots: 26 | A dictionary including multiple sub-dictionary, 27 | each sub-dictionary contains the image and SISM folder paths of a specific test dataset. 28 | Format: 29 | test_roots = { 30 | name of dataset_1: { 31 | 'img': image folder path of dataset_1, 32 | 'sism': SISM folder path of dataset_1 33 | }, 34 | name of dataset_2: { 35 | 'img': image folder path of dataset_2, 36 | 'sism': SISM folder path of dataset_2 37 | } 38 | . 39 | . 40 | . 41 | } 42 | """ 43 | 44 | test_device = '0' 45 | test_batch_size = None 46 | pred_root = './pred/' 47 | ckpt_path = './ICNet_vgg16.pth' 48 | original_size = False 49 | test_num_thread = 4 50 | 51 | # An example to build "test_roots". 52 | test_roots = dict() 53 | datasets = ['MSRC', 'iCoSeg', 'CoSal2015', 'CoSOD3k', 'CoCA'] 54 | 55 | for dataset in datasets: 56 | roots = {'img': '/mnt/jwd/data/{}/img_bilinear_224/'.format(dataset), 57 | 'sism': '/mnt/jwd/data/EGNet-SISMs/{}/'.format(dataset)} 58 | test_roots[dataset] = roots 59 | # ------------- end ------------- 60 | 61 | if __name__ == '__main__': 62 | os.environ['CUDA_VISIBLE_DEVICES'] = test_device 63 | solver = Solver() 64 | solver.test(roots=test_roots, 65 | ckpt_path=ckpt_path, 66 | pred_root=pred_root, 67 | num_thread=test_num_thread, 68 | batch_size=test_batch_size, 69 | original_size=original_size, 70 | pin=False) -------------------------------------------------------------------------------- /ICNet/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | from solver import Solver 3 | 4 | """ 5 | Training settings (used for "train.py"): 6 | 7 | vgg_path: 8 | Path of pre-trained VGG16 (".pth") used to initialize ICNet at the start of training. 9 | 10 | ckpt_root: 11 | Folder path where the checkpoint files (".pth") are saved. 12 | After the i-th training epoch, the checkpoint file is saved to "ckpt_root/Weights_{}.pth".format(i). 13 | 14 | train_init_epoch: 15 | The starting epoch of training. 16 | When "train_init_epoch == 0", ICNet is initialized with pre-trained VGG16; 17 | Otherwise, ICNet loads checkpoint file from "ckpt_root/Weights_{}.pth".format(train_init_epoch) for initialization, 18 | 19 | train_end_epoch: 20 | The ending epoch of training. 21 | We recommend you to train ICNet for 50~60 epochs. 22 | 23 | train_device: 24 | Index of the GPU used for training. 25 | 26 | train_doc_path: 27 | The file (".txt") path used to save the training information. 28 | 29 | train_roots: 30 | A dictionary containing image, GT and SISM folder paths of the training dataset. 31 | train_roots = {'img': image folder path of training dataset, 32 | 'gt': GT folder path of training dataset, 33 | 'sism': SISM folder path of training dataset} 34 | """ 35 | 36 | vgg_path = './vgg16_feat.pth' 37 | ckpt_root = './ckpt/' 38 | train_init_epoch = 0 39 | train_end_epoch = 61 40 | train_device = '0' 41 | train_doc_path = './training.txt' 42 | learning_rate = 1e-5 43 | weight_decay = 1e-4 44 | train_batch_size = 10 45 | train_num_thread = 4 46 | 47 | # An example to build "train_roots". 48 | train_roots = {'img': '/mnt/jwd/data/COCO9213/img_bilinear_224/', 49 | 'gt': '/mnt/jwd/data/COCO9213/gt_bilinear_224/', 50 | 'sism': '/mnt/jwd/data/EGNet-SISMs/COCO9213/'} 51 | # ------------- end ------------- 52 | 53 | if __name__ == '__main__': 54 | os.environ['CUDA_VISIBLE_DEVICES'] = train_device 55 | solver = Solver() 56 | solver.train(roots=train_roots, 57 | vgg_path=vgg_path, 58 | init_epoch=train_init_epoch, 59 | end_epoch=train_end_epoch, 60 | learning_rate=learning_rate, 61 | batch_size=train_batch_size, 62 | weight_decay=weight_decay, 63 | ckpt_root=ckpt_root, 64 | doc_path=train_doc_path, 65 | num_thread=train_num_thread, 66 | pin=False) -------------------------------------------------------------------------------- /ICNet/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import time 4 | 5 | """ 6 | mkdir: 7 | Create a folder if "path" does not exist. 8 | """ 9 | def mkdir(path): 10 | if os.path.exists(path) == False: 11 | os.makedirs(path) 12 | 13 | """ 14 | write_doc: 15 | Write "content" into the file(".txt") in "path". 16 | """ 17 | def write_doc(path, content): 18 | with open(path, 'a') as file: 19 | file.write(content) 20 | 21 | """ 22 | get_time: 23 | Obtain the current time. 24 | """ 25 | def get_time(): 26 | torch.cuda.synchronize() 27 | return time.time() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 blanclist 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # **ICNet: Intra-saliency Correlation Network for Co-Saliency Detection** 2 | 3 | This repository is the official PyTorch implementation of our *NeurIPS(2020)* paper. 4 | 5 | You can switch the branch to "CN", to view README.md (Chinese version) and obtain codes with Chinese comments. 6 | 7 | (您可以将 branch 切换到 "CN",以查看中文版 README.md 并获取带有中文注释的代码) 8 | 9 |
10 | 11 | ## Training Datasets 12 | 13 | **Our training set is a subset of the *COCO* dataset, containing 9213 images.** 14 | 15 | * ***COCO9213-os.zip*** (images with original size, 4.53GB), [GoogleDrive](https://drive.google.com/file/d/1fOfSX_CtWizDapB0OeTJxAydL2yDOP5H/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1wOxdP6EQEqMwjg3_v1z2-A) (fetch code: 5183). 16 | 17 | * ***COCO9213.zip*** (images resized to 224*224, 943MB), [GoogleDrive](https://drive.google.com/file/d/1GbA_WKvJm04Z1tR8pTSzBdYVQ75avg4f/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1r-qCLeG3L6i-OrBfKrXANg) (fetch code: 8d7z). 18 | 19 | ## Test Datasets 20 | 21 | ### Used in our paper: 22 | 23 | * ***MSRC*** (7 groups, 233 images) ''Object Categorization by Learned Universal Visual Dictionary, *ICCV(2005)*'' 24 | 25 | * ***iCoseg*** (38 groups, 643 images) ''iCoseg: Interactive Co-segmentation with Intelligent Scribble Guidance, *CVPR(2010)*'' 26 | 27 | * ***Cosal2015*** (50 groups, 2015 images) Detection of Co-salient Objects by Looking Deep and Wide, *IJCV(2016)*'' 28 | 29 | You can download them from: 30 | 31 | * ***test-datasets*** (resized to 224*224, 77MB), [GoogleDrive](https://drive.google.com/drive/folders/1bjI2msek72dOejmK796tXyjFPIE27267?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1KX7m0g9mgACoTMgkbIjRvw) (fetch code: oq5w). 32 | 33 | * ***test-datasets-os*** (original sizes, 142MB), [GoogleDrive](https://drive.google.com/drive/folders/1p--uTLIF-2hRIJk9Xmys9ftTdXrWYslS?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1kDv7icEDT5pPwQQJkHkgpA) (fetch code: ujdl). 34 | 35 | ### Released recently: 36 | 37 | * **[*CoSOD3k*](http://dpfan.net/CoSOD3K/)** (160 groups, 3316 images) ''Taking a Deeper Look at the Co-salient Object Detection, *CVPR(2020)*'' 38 | 39 | * **[*CoCA*](http://zhaozhang.net/coca.html)** (80 groups, 1295 images) ''Gradient-Induced Co-Saliency Detection, *ECCV(2020)*'' 40 | 41 | ## Pre-trained Model 42 | 43 | We provide pre-trained ICNet based on SISMs produced by pre-trained [EGNet](https://github.com/JXingZhao/EGNet) (VGG16-based). 44 | 45 | * ***ICNet_vgg16.pth*** (70MB), [GoogleDrive](https://drive.google.com/file/d/1wcT_XmwlshbLqCiJetmzQwi1ZNAzxiSU/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1__iiBcAI2S-Ns9MZnZwp8g) (fetch code: nkj9). 46 | 47 | ## Prediction Results 48 | 49 | We release the co-saliency maps (predictions) generated by our ICNet on 5 benchmark datasets: 50 | 51 | ***MSRC***, ***iCoseg***, ***Cosal2015***, ***CoCA***, and ***CoSOD3k***. 52 | 53 | * ***cosal-maps.zip*** (results of size 224*224, 20MB), [GoogleDrive](https://drive.google.com/file/d/1q9CAzPf5U3VPa_DGxzUGI_DANCuw_WEk/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1qbPJKMTiVStqjSGYWuqSgQ) (fetch code: du5e). 54 | 55 | * ***cosal-maps-os.zip*** (results resized to original sizes, 62MB), [GoogleDrive](https://drive.google.com/file/d/1px4tPVWAgbBPMt6Rp23oNwWz8Ulj6pmX/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1WFQxeIOjOiByiFYHLpuytA) (fetch code: xwcv). 56 | 57 | ## Training and Test 58 | 59 | ### Prepare SISMs 60 | 61 | Our ICNet can be trained and tested based on SISMs produced by any off-the-shelf SOD method, but you are suggested to use the **same** SOD method to generate SISMs in training and test phases to keep the consistency. 62 | 63 | In our paper, we choose the pre-trained [EGNet](https://github.com/JXingZhao/EGNet) (VGG16-based) as the basic SOD method to produce SISMs, you can downloaded these SISMs directly from: 64 | 65 | * ***EGNet-SISMs*** (resized to 224*224, 125MB), [GoogleDrive](https://drive.google.com/drive/folders/1cGtXQI2U8pH37-mgSw3otnMsRi36QwBp?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/19Izo6i7A9DfUMzsNbVec6g) (fetch code: ae6a). 66 | 67 | ### Training 68 | 69 | 1. Download pre-trained VGG16 from: 70 | 71 | * ***vgg16_feat.pth*** (56MB), [GoogleDrive](https://drive.google.com/file/d/1ej5ngj2NYH-R-0GfYUDfuM-DNLuFolED/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1kAh7FAUPuVLI5cvtBsxh-A) (fetch code: j0zq). 72 | 73 | 2. Follow instructions in **"./ICNet/train.py"** to modify training settings. 74 | 75 | 3. Run: 76 | 77 | ``` 78 | python ./ICNet/train.py 79 | ``` 80 | 81 | ### Test 82 | 83 | 1. * Test **pre-trained** ICNet: 84 | 85 | Download pre-trained ICNet ***"ICNet_vgg16.pth"*** (the download link is given above). 86 | 87 | * Test ICNet **trained by yourself**: 88 | 89 | Choose the checkpoint file ***"Weights_i.pth"*** (saved after i-th epoch automatically) you want to load for test. 90 | 91 | 2. Follow instructions in **"./ICNet/test.py"** to modify test settings. 92 | 93 | 3. Run: 94 | 95 | ``` 96 | python ./ICNet/test.py 97 | ``` 98 | 99 | ## Evaluation 100 | 101 | The folder "./ICNet/evaluator/" contains evaluation codes implemented in PyTorch (GPU-version), the metrics include **max F-measure**, **S-measure**, and **MAE**. 102 | 103 | 1. Follow instructions in **"./ICNet/evaluate.py"** to modify evaluation settings. 104 | 105 | 2. Run: 106 | 107 | ``` 108 | python ./ICNet/evaluate.py 109 | ``` 110 | 111 | ## Compared Methods 112 | 113 | We compare our ICNet with 7 state-of-the-art Co-SOD methods: 114 | 115 | * ***CBCS*** ''Cluster-Based Co-Saliency Detection, *TIP(2013)*''​ 116 | 117 | * ***CSHS*** ''Co-Saliency Detection Based on Hierarchical Segmentation, *SPL(2014)*'' 118 | 119 | * ***CoDW*** ''Detection of Co-salient Objects by Looking Deep and Wide, *IJCV(2016)*'' 120 | 121 | * ***UCSG*** ''Unsupervised CNN-based Co-Saliency Detection with Graphical Optimization, *ECCV(2018)*'' 122 | 123 | * ***CSMG*** ''Co-saliency Detection via Mask-guided Fully Convolutional Networks with Multi-scale Label Smoothing, *CVPR(2019)*'' 124 | 125 | * ***MGLCN*** ''A Unified Multiple Graph Learning and Convolutional Network Model for Co-saliency Estimation, *ACM MM(2019)*'' 126 | 127 | * ***GICD*** ''Gradient-Induced Co-Saliency Detection, *ECCV(2020)*'' 128 | 129 | You can download predictions of these methods from: 130 | 131 | * ***compared_method*s** (original sizes, 445MB), [GoogleDrive](https://drive.google.com/drive/folders/1qdXWZQ-fF-WaCF-rat0Da7vFrAIYsj09?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/10vpubz39atkg2lz095QvSQ) (fetch code: s7pr). 132 | 133 | ## Citation 134 | 135 | *To be updated.* 136 | 137 | ## Contact 138 | 139 | If you have any questions, feel free to contact me (Wen-Da Jin) at jwd331@126.com, I will reply as soon as possible. 140 | -------------------------------------------------------------------------------- /thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blanclist/ICNet/f0d5e463839caa2e39a6fc042da22284b7d4c9d9/thumbnail.png --------------------------------------------------------------------------------