├── ICNet
    ├── dataset.py
    ├── evaluate.py
    ├── evaluator
    │   ├── __init__.py
    │   ├── dataset.py
    │   ├── evaluator.py
    │   ├── fmeasure.py
    │   ├── mae.py
    │   └── smeasure.py
    ├── loss.py
    ├── network.py
    ├── solver.py
    ├── test.py
    ├── train.py
    └── utils.py
├── LICENSE
├── README.md
└── thumbnail.png


/ICNet/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from PIL import Image, ImageFile
  3 | ImageFile.LOAD_TRUNCATED_IMAGES = True
  4 | import torch
  5 | import random
  6 | import numpy as np
  7 | from torch.utils import data
  8 | import PIL.ImageOps
  9 | import cv2
 10 | import torchvision.transforms as transforms
 11 | import matplotlib.pyplot as plt
 12 | from os.path import join
 13 | from os import listdir
 14 | 
 15 | """
 16 | build_file_paths:
 17 |     When "file_names == None and group_names == None", 
 18 |     traverse file folder to build "file_paths", "group_names", "file_names" and "indices".
 19 |     Otherwise, build "file_paths" based on given "file_names" and "group_names".
 20 | """
 21 | def build_file_paths(base, group_names=None, file_names=None, suffix='.png'):
 22 |     if file_names == None and group_names == None:
 23 |         file_paths = []
 24 |         group_names = []
 25 |         file_names = []
 26 |         indices = []
 27 |         cur_group_end_index = 0
 28 |         for group_name in listdir(base):
 29 |             group_path = join(base, group_name)
 30 |             group_file_names = listdir(group_path)
 31 |             cur_group_end_index += len(group_file_names)
 32 | 
 33 |             # Save the ending index of current group into "indices", which is prepared for "Cosal_Sampler".
 34 |             indices.append(cur_group_end_index)
 35 |             
 36 |             for file_name in group_file_names:
 37 |                 file_path = join(group_path, file_name)
 38 |                 file_paths.append(file_path)
 39 |                 group_names.append(group_name)
 40 |                 file_names.append(file_name[:str(file_name).rfind('.')])
 41 |         return file_paths, group_names, file_names, indices
 42 |     else:
 43 |         file_paths = list(map(lambda i: join(base, group_names[i], file_names[i] + suffix), range(len(file_names))))
 44 |         return file_paths
 45 | 
 46 | """
 47 | random_flip:
 48 |     Flip inputs horizontally with a possibility of 0.5.
 49 | """
 50 | def random_flip(img, gt, sism):
 51 |     datas = (img, gt, sism)
 52 |     if random.random() > 0.5:
 53 |         datas = tuple(map(lambda data: transforms.functional.hflip(data) if data is not None else None, datas))
 54 |     return datas
 55 | 
 56 | 
 57 | class ImageData(data.Dataset):
 58 |     def __init__(self, roots, request, aug_transform=None, rgb_transform=None, gray_transform=None):
 59 |         if 'img' in request == False:
 60 |             raise Exception('\'img\' must be contained in \'request\'.')
 61 | 
 62 |         self.need_gt = True if 'gt' in request else False
 63 |         self.need_file_name = True if 'file_name' in request else False
 64 |         self.need_group_name = True if 'group_name' in request else False
 65 |         self.need_sism = True if 'sism' in request else False
 66 |         self.need_size = True if 'size' in request else False
 67 | 
 68 |         img_paths, group_names, file_names, indices = build_file_paths(roots['img'])
 69 |         gt_paths = build_file_paths(roots['gt'], group_names, file_names) if self.need_gt else None
 70 |         sism_paths = build_file_paths(roots['sism'], group_names, file_names) if self.need_sism else None
 71 | 
 72 |         self.img_paths = img_paths
 73 |         self.gt_paths = gt_paths
 74 |         self.sism_paths = sism_paths
 75 |         self.file_names = file_names
 76 |         self.group_names = group_names
 77 |         self.indices = indices
 78 |         self.aug_transform = aug_transform
 79 |         self.rgb_transform = rgb_transform
 80 |         self.gray_transform = gray_transform    
 81 | 
 82 |     def __getitem__(self, item):
 83 |         img = Image.open(self.img_paths[item]).convert('RGB')
 84 |         W, H = img.size
 85 |         gt = Image.open(self.gt_paths[item]).convert('L') if self.need_gt else None
 86 |         sism = Image.open(self.sism_paths[item]).convert('L') if self.need_sism else None
 87 |         group_name = self.group_names[item] if self.need_group_name else None
 88 |         file_name = self.file_names[item] if self.need_file_name else None
 89 | 
 90 |         if self.aug_transform is not None:
 91 |             img, gt, sism = self.aug_transform(img, gt, sism)
 92 |         
 93 |         if self.rgb_transform is not None:
 94 |             img = self.rgb_transform(img)
 95 |         if self.gray_transform is not None and self.need_gt:
 96 |             gt = self.gray_transform(gt)
 97 |         if self.gray_transform is not None and self.need_sism:
 98 |             sism = self.gray_transform(sism)
 99 |         
100 |         data_item = {}
101 |         data_item['img'] = img
102 |         if self.need_gt: data_item['gt'] = gt
103 |         if self.need_sism: data_item['sism'] = sism
104 |         if self.need_file_name: data_item['file_name'] = file_name
105 |         if self.need_group_name: data_item['group_name'] = group_name
106 |         if self.need_size: data_item['size'] = (H, W)
107 |         return data_item
108 | 
109 |     def __len__(self):
110 |         return len(self.img_paths)
111 | 
112 | 
113 | """
114 | Cosal_Sampler:
115 |     Provide indices of each batch, ensuring that each batch data is extracted from the same image group (with the same category).
116 | """
117 | class Cosal_Sampler(data.Sampler):
118 |     def __init__(self, indices, shuffle, batch_size):
119 |         self.indices = indices
120 |         self.shuffle = shuffle
121 |         self.batch_size = batch_size
122 |         self.len = None
123 |         self.batches_indices = None
124 |         self.reset_batches_indices()
125 |     
126 |     def reset_batches_indices(self):
127 |         batches_indices = []
128 |         start_idx = 0
129 |         # For each image group (with same category):
130 |         for end_idx in self.indices:
131 |             # Initalize "group_indices".
132 |             group_indices = list(range(start_idx, end_idx))
133 |             
134 |             # Shuffle "group_indices" if needed.
135 |             if self.shuffle:
136 |                 np.random.shuffle(group_indices)
137 |             
138 |             # Get the size of current image group.
139 |             num = end_idx - start_idx
140 | 
141 |             # Split "group_indices" to multiple batches according to "self.batch_size",
142 |             # then append the splited indices ("batch_indices") to "batches_indices".
143 |             # Note that, when "self.batch_size == None", each image group is regarded as a batch ("batch_size = num").
144 |             idx = 0
145 |             while idx < num:
146 |                 batch_size = num if self.batch_size == None else self.batch_size
147 |                 batch_indices = group_indices[idx:idx + batch_size]
148 |                 batches_indices.append(batch_indices)
149 |                 idx += batch_size
150 |             start_idx = end_idx
151 | 
152 |         # Each entry of "batches_indices" is a list indicating indices of a specific batch,
153 |         # but neighbouring entries basically belongs to the same image group (with same category).
154 |         # Thus, shuffle "batches_indices" if needed. 
155 |         if self.shuffle:
156 |             np.random.shuffle(batches_indices)
157 |         
158 |         self.len = len(batches_indices)
159 |         self.batches_indices = batches_indices
160 | 
161 |     def __iter__(self):
162 |         if self.shuffle:
163 |             self.reset_batches_indices()
164 |         return iter(self.batches_indices)
165 | 
166 |     def __len__(self):
167 |         return self.len
168 | 
169 | 
170 | def get_loader(roots, request, batch_size, data_aug, shuffle, num_thread=4, pin=True):
171 |     aug_transform = random_flip if data_aug else None
172 |     rgb_transform = transforms.Compose([
173 |         transforms.Resize([224, 224]),
174 |         transforms.ToTensor(),
175 |         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
176 |         ])
177 |     gray_transform = transforms.Compose([
178 |         transforms.Resize([224, 224]),
179 |         transforms.ToTensor()
180 |         ])
181 |     dataset = ImageData(roots, request, aug_transform=aug_transform, rgb_transform=rgb_transform, gray_transform=gray_transform)
182 |     cosal_sampler = Cosal_Sampler(indices=dataset.indices, shuffle=shuffle, batch_size=batch_size)
183 |     data_loader = data.DataLoader(dataset=dataset, batch_sampler=cosal_sampler, num_workers=num_thread, pin_memory=pin)
184 |     return data_loader


--------------------------------------------------------------------------------
/ICNet/evaluate.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from evaluator.evaluator import evaluate_dataset
 3 | from utils import write_doc
 4 | 
 5 | """
 6 | * Note:
 7 |     The evaluation codes in "./evaluator/" are implemented in PyTorch (GPU-version) for acceleration.
 8 | 
 9 |     Since some GTs (e.g. in "Cosal2015" dataset) are of too large original sizes to be evaluated on GPU with limited memory 
10 |     (our "TITAN Xp" runs out of 12G memory when computing F-measure), the input prediction map and corresponding GT 
11 |     are resized to 224*224 by our evaluation codes before computing metrics.
12 | """
13 | 
14 | """
15 | evaluate:
16 |     Given predictions, compute multiple metrics (max F-measure, S-measure and MAE).
17 |     The evaluation results are saved in "doc_path".
18 | """
19 | def evaluate(roots, doc_path, num_thread, pin):
20 |     datasets = roots.keys()
21 |     for dataset in datasets:
22 |         # Evaluate predictions of "dataset".
23 |         results = evaluate_dataset(roots=roots[dataset], 
24 |                                    dataset=dataset,
25 |                                    batch_size=1, 
26 |                                    num_thread=num_thread, 
27 |                                    demical=True,
28 |                                    suffixes={'gt': '.png', 'pred': '.png'},
29 |                                    pin=pin)
30 |         
31 |         # Save evaluation results.
32 |         content = '{}:\n'.format(dataset)
33 |         content += 'max-Fmeasure={}'.format(results['max_f'])
34 |         content += ' '
35 |         content += 'Smeasure={}'.format(results['s'])
36 |         content += ' '
37 |         content += 'MAE={}\n'.format(results['mae'])
38 |         write_doc(doc_path, content)
39 |     content = '\n'
40 |     write_doc(doc_path, content)
41 | 
42 | """
43 | Evaluation settings (used for "evaluate.py"):
44 | 
45 | eval_device:
46 |     Index of the GPU used for evaluation.
47 | 
48 | eval_doc_path:
49 |     Path of the file (".txt") used to save the evaluation results.
50 | 
51 | eval_roots:
52 |     A dictionary including multiple sub-dictionary, 
53 |     each sub-dictionary contains the GT and prediction folder paths of a specific dataset.
54 |     Format:
55 |     eval_roots = {
56 |         name of dataset_1: {
57 |             'gt': GT folder path of dataset_1,
58 |             'pred': prediction folder path of dataset_1
59 |         },
60 |         name of dataset_2: {
61 |             'gt': GT folder path of dataset_2,
62 |             'pred': prediction folder path of dataset_2
63 |         }
64 |         .
65 |         .
66 |         .
67 |     }
68 | """
69 | 
70 | eval_device = '0'
71 | eval_doc_path = './evaluation.txt'
72 | eval_num_thread = 4
73 | 
74 | # An example to build "eval_roots".
75 | eval_roots = dict()
76 | datasets = ['MSRC', 'iCoSeg', 'CoSal2015', 'CoSOD3k', 'CoCA']
77 | 
78 | for dataset in datasets:
79 |     roots = {'gt': '/mnt/jwd/data/{}/gt_bilinear_224/'.format(dataset), 
80 |              'pred': './pred/{}/'.format(dataset)}
81 |     eval_roots[dataset] = roots
82 | # ------------- end -------------
83 | 
84 | if __name__ == "__main__":
85 |     os.environ['CUDA_VISIBLE_DEVICES'] = eval_device
86 |     evaluate(roots=eval_roots, 
87 |              doc_path=eval_doc_path,
88 |              num_thread=eval_num_thread,
89 |              pin=False)


--------------------------------------------------------------------------------
/ICNet/evaluator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blanclist/ICNet/f0d5e463839caa2e39a6fc042da22284b7d4c9d9/ICNet/evaluator/__init__.py


--------------------------------------------------------------------------------
/ICNet/evaluator/dataset.py:
--------------------------------------------------------------------------------
 1 | from os import listdir
 2 | from os.path import join
 3 | from PIL import Image, ImageFile
 4 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 5 | from torch.utils import data
 6 | import torchvision.transforms as transforms
 7 | 
 8 | def build_file_paths(roots, suffixes):
 9 |     pred_base = roots['pred']
10 |     gt_base = roots['gt']
11 |     pred_suffix = suffixes['pred']
12 |     gt_suffix = suffixes['gt']
13 |     
14 |     pred_paths = []
15 |     gt_paths = []
16 |     group_names = listdir(pred_base)
17 |     for group_name in group_names:
18 |         group_pred_names = list(filter(lambda name: name.endswith(pred_suffix), listdir(join(pred_base, group_name))))
19 |         pred_paths += list(map(lambda pred_name: join(pred_base, group_name, pred_name), group_pred_names))
20 |         gt_paths += list(map(lambda pred_name: join(gt_base, group_name, pred_name[:-len(pred_suffix)] + gt_suffix), group_pred_names))
21 |     return gt_paths, pred_paths
22 | 
23 | 
24 | class ImageData(data.Dataset):
25 |     def __init__(self, roots, suffixes):
26 |         gt_paths, pred_paths = build_file_paths(roots, suffixes)
27 | 
28 |         self.gt_paths = gt_paths
29 |         self.pred_paths = pred_paths
30 | 
31 |     def __getitem__(self, item):
32 |         gt = Image.open(self.gt_paths[item]).convert('L')
33 |         pred = Image.open(self.pred_paths[item]).convert('L')
34 | 
35 |         transform = transforms.Compose([
36 |             transforms.Resize([224, 224]),
37 |             transforms.ToTensor()
38 |         ])
39 |         gt, pred = transform(gt), transform(pred)
40 | 
41 |         data_item = {}
42 |         data_item['pred'] = pred
43 |         data_item['gt'] = gt
44 |         return data_item
45 | 
46 |     def __len__(self):
47 |         return len(self.pred_paths)
48 | 
49 | 
50 | def get_loader(roots, suffixes, batch_size, num_thread, pin=True):
51 |     dataset = ImageData(roots, suffixes)
52 |     data_loader = data.DataLoader(dataset=dataset, shuffle=False, batch_size=batch_size, num_workers=num_thread, pin_memory=pin)
53 |     return data_loader


--------------------------------------------------------------------------------
/ICNet/evaluator/evaluator.py:
--------------------------------------------------------------------------------
 1 | from .dataset import get_loader
 2 | from .smeasure import calc_smeasure
 3 | from .fmeasure import calc_p_r_fmeasure
 4 | from .mae import calc_mae
 5 | import numpy as np
 6 | import torch
 7 | from decimal import Decimal
 8 | 
 9 | def tf(data):
10 |     return float(data)
11 | 
12 | def tn(data):
13 |     return np.array(data.cpu())
14 | 
15 | def td(data):
16 |     return Decimal(data).quantize(Decimal('0.000'))
17 | 
18 | def get_n(gt, pred, n_mask):
19 |     H, W = gt.shape
20 |     HW = H * W
21 |     n_gt = gt.view(1, HW).repeat(255, 1)  # [255, HW]
22 |     n_pred = pred.view(1, HW).repeat(255, 1)  # [255, HW]
23 |     n_pred = torch.where(n_pred <= n_mask, torch.zeros_like(n_pred), torch.ones_like(n_pred))
24 |     return n_gt, n_pred
25 | 
26 | def evaluate_dataset(roots, dataset, batch_size, num_thread, demical, suffixes, pin):
27 |     with torch.no_grad():
28 |         dataloader = get_loader(roots, suffixes, batch_size, num_thread, pin=pin)
29 |         p = np.zeros(255)
30 |         r = np.zeros(255)
31 |         s = 0.0
32 |         f = np.zeros(255)
33 |         mae = 0.0
34 |         n_mask = torch.FloatTensor(np.array(range(255)) / 255.0).view(255, 1).repeat(1, 224 * 224).cuda()  # [255, HW]
35 |         for batch in dataloader:
36 |             gt, pred = batch['gt'].cuda().view(224, 224), batch['pred'].cuda().view(224, 224)
37 | 
38 |             _s = calc_smeasure(gt, pred)
39 |             _mae = calc_mae(gt, pred)
40 |             n_gt, n_pred = get_n(gt, pred, n_mask)
41 |             _p, _r, _f = calc_p_r_fmeasure(n_gt, n_pred, n_mask)
42 |             _mean_f = torch.mean(_f)
43 |             _max_f = torch.max(_f)
44 | 
45 |             _s = tf(_s)
46 |             _p = tn(_p)
47 |             _r = tn(_r)
48 |             _f = tn(_f)
49 |             _mae = tf(_mae)
50 |             _mean_f = tf(_mean_f)
51 |             _max_f = tf(_max_f)
52 | 
53 |             p += _p
54 |             r += _r
55 |             s += _s
56 |             f += _f
57 |             mae += _mae
58 |         num = len(dataloader)
59 |         p /= num
60 |         r /= num
61 |         f /= num
62 |         s, mae, mean_f, max_f = s / num, mae / num, np.mean(f), np.max(f)
63 |         if demical == True:
64 |             s, mae, mean_f, max_f = td(s), td(mae), td(mean_f), td(max_f)
65 |         
66 |         results = {'s': s, 'p': p, 'r': r, 'f': f, 
67 |                    'mae': mae, 
68 |                    'mean_f': mean_f, 'max_f': max_f}
69 |     return results


--------------------------------------------------------------------------------
/ICNet/evaluator/fmeasure.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def calc_p_r_fmeasure(n_gt, n_pred, n_mask):
 4 |     tp = torch.sum(n_pred * n_gt, dim=1)  # [255]
 5 |     tp_plus_fp = torch.sum(n_pred, dim=1)  # [255]
 6 |     temp = torch.ones_like(tp_plus_fp)
 7 |     tp_plus_fp = torch.where(tp_plus_fp == 0.0, temp, tp_plus_fp)
 8 |     tp_plus_fn = torch.sum(n_gt, dim=1)  # [255]
 9 |     tp_plus_fn = torch.where(tp_plus_fn == 0.0, temp, tp_plus_fn)
10 |     precision = tp / tp_plus_fp
11 |     recall = tp / tp_plus_fn
12 |     a = 1.3 * precision * recall
13 |     b = 0.3 * precision + recall
14 |     temp = torch.ones_like(b) * 1e31
15 |     b = torch.where(b == 0.0, temp, b)
16 |     fBetaScore = a / b
17 |     return precision, recall, fBetaScore


--------------------------------------------------------------------------------
/ICNet/evaluator/mae.py:
--------------------------------------------------------------------------------
1 | import torch
2 | 
3 | def calc_mae(gt, pred):
4 |     return torch.mean(torch.abs(gt - pred))


--------------------------------------------------------------------------------
/ICNet/evaluator/smeasure.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | def calc_smeasure(gt, pred):
 5 |     def mean(x):
 6 |         return torch.mean(x)
 7 | 
 8 |     def cov(x, y):
 9 |         mean_x = mean(x)
10 |         mean_y = mean(y)
11 |         return torch.mean((x - mean_x) * (y - mean_y))
12 | 
13 |     def ssim(x, y):
14 |         mean_x = mean(x)
15 |         mean_y = mean(y)
16 |         cov_x_x = cov(x, x)
17 |         cov_y_y = cov(y, y)
18 |         cov_x_y = cov(x, y)
19 |         a = 4.0 * mean_x * mean_y * cov_x_y
20 |         b = (mean_x ** 2 + mean_y ** 2) * (cov_x_x + cov_y_y)
21 |         return a / (b + 1e-12)
22 | 
23 |     def O(x, mask):
24 |         mean = torch.sum(x * mask) / (1e-12 + torch.sum(mask))
25 |         var = torch.sqrt(torch.sum(((x - mean) ** 2) * mask) / (1e-12 + torch.sum(mask)))
26 |         return mean * 2.0 / (1.0 + mean ** 2 + var)        
27 | 
28 |     def centroid(y):
29 |         h, w = y.shape
30 |         total = 1e-12 + torch.sum(y)
31 |         hw = int(torch.round(torch.sum(torch.sum(y, axis=0) * torch.from_numpy(np.array(range(1, 1 + w))).cuda()) / total))
32 |         hh = int(torch.round(torch.sum(torch.sum(y, axis=1) * torch.from_numpy(np.array(range(1, 1 + h))).cuda()) / total))
33 | 
34 |         area = h * w
35 |         w1 = hh * hw / area
36 |         w2 = hh * (w - hw) / area
37 |         w3 = (h - hh) * hw / area
38 |         w4 = 1.0 - w1 - w2 - w3
39 |         return hh, hw, h, w, w1, w2, w3, w4
40 | 
41 |     def seg(x, hh, hw, h, w):
42 |         x1 = x[0:hh, 0:hw]
43 |         x2 = x[0:hh, hw:w]
44 |         x3 = x[hh:h, 0:hw]
45 |         x4 = x[hh:h, hw:w]
46 |         return x1, x2, x3, x4
47 | 
48 |     def Sr(x, y):
49 |         hh, hw, h, w, w1, w2, w3, w4 = centroid(y)
50 |         x1, x2, x3, x4 = seg(x, hh, hw, h, w)
51 |         y1, y2, y3, y4 = seg(y, hh, hw, h, w)
52 |         return ssim(x1, y1) * w1 + ssim(x2, y2) * w2 + ssim(x3, y3) * w3 + ssim(x4, y4) * w4
53 | 
54 |     def So(x, y):
55 |         mu = mean(y)
56 |         return O(x, y) * mu + O(1.0 - x, 1.0 - y) * (1.0 - mu)
57 | 
58 |     def Sm(x, y):
59 |         return Sr(x, y) * 0.5 + So(x, y) * 0.5
60 | 
61 |     return Sm(pred, gt)


--------------------------------------------------------------------------------
/ICNet/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | """
 4 | IoU_loss:
 5 |     Compute IoU loss between predictions and ground-truths for training [Equation 3].
 6 | """
 7 | def IoU_loss(preds_list, gt):
 8 |     preds = torch.cat(preds_list, dim=1)
 9 |     N, C, H, W = preds.shape
10 |     min_tensor = torch.where(preds < gt, preds, gt)    # shape=[N, C, H, W]
11 |     max_tensor = torch.where(preds > gt, preds, gt)    # shape=[N, C, H, W]
12 |     min_sum = min_tensor.view(N, C, H * W).sum(dim=2)  # shape=[N, C]
13 |     max_sum = max_tensor.view(N, C, H * W).sum(dim=2)  # shape=[N, C]
14 |     loss = 1 - (min_sum / max_sum).mean()
15 |     return loss


--------------------------------------------------------------------------------
/ICNet/network.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import time
  3 | import numpy as np
  4 | import torch.nn.functional as F
  5 | from torch import nn
  6 | from torch.nn import init
  7 | from os.path import join
  8 | np.set_printoptions(suppress=True, threshold=1e5)
  9 | 
 10 | """
 11 | resize:
 12 |     Resize tensor (shape=[N, C, H, W]) to the target size (default: 224*224).
 13 | """
 14 | def resize(input, target_size=(224, 224)):
 15 |     return F.interpolate(input, (target_size[0], target_size[1]), mode='bilinear', align_corners=True)
 16 | 
 17 | """
 18 | weights_init:
 19 |     Weights initialization.
 20 | """
 21 | def weights_init(module):
 22 |     if isinstance(module, nn.Conv2d):
 23 |         init.normal_(module.weight, 0, 0.01)
 24 |         if module.bias is not None:
 25 |             init.constant_(module.bias, 0)
 26 |     elif isinstance(module, nn.BatchNorm2d):
 27 |         init.constant_(module.weight, 1)
 28 |         init.constant_(module.bias, 0)
 29 | 
 30 | 
 31 | """"
 32 | VGG16:
 33 |     VGG16 backbone.
 34 | """ 
 35 | class VGG16(nn.Module):
 36 |     def __init__(self):
 37 |         super(VGG16, self).__init__()
 38 |         layers = []
 39 |         in_channel = 3
 40 |         vgg_out_channels = (64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M')
 41 |         for out_channel in vgg_out_channels:
 42 |             if out_channel == 'M':
 43 |                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
 44 |             else:
 45 |                 conv2d = nn.Conv2d(in_channel, out_channel, 3, 1, 1)
 46 |                 layers += [conv2d, nn.ReLU(inplace=True)]
 47 |                 in_channel = out_channel
 48 |         self.vgg = nn.ModuleList(layers)
 49 |         self.table = {'conv1_1': 0, 'conv1_2': 2, 'conv1_2_mp': 4,
 50 |                       'conv2_1': 5, 'conv2_2': 7, 'conv2_2_mp': 9,
 51 |                       'conv3_1': 10, 'conv3_2': 12, 'conv3_3': 14, 'conv3_3_mp': 16,
 52 |                       'conv4_1': 17, 'conv4_2': 19, 'conv4_3': 21, 'conv4_3_mp': 23,
 53 |                       'conv5_1': 24, 'conv5_2': 26, 'conv5_3': 28, 'conv5_3_mp': 30, 'final': 31}
 54 | 
 55 |     def forward(self, feats, start_layer_name, end_layer_name):
 56 |         start_idx = self.table[start_layer_name]
 57 |         end_idx = self.table[end_layer_name]
 58 |         for idx in range(start_idx, end_idx):
 59 |             feats = self.vgg[idx](feats)
 60 |         return feats
 61 | 
 62 | 
 63 | """
 64 | Prediction:
 65 |     Compress the channel of input features to 1, then predict maps with sigmoid function.
 66 | """
 67 | class Prediction(nn.Module):
 68 |     def __init__(self, in_channel):
 69 |         super(Prediction, self).__init__()
 70 |         self.pred = nn.Sequential(nn.Conv2d(in_channel, 1, 1), nn.Sigmoid())
 71 | 
 72 |     def forward(self, feats):
 73 |         pred = self.pred(feats)
 74 |         return pred
 75 | 
 76 | 
 77 | """
 78 | Res:
 79 |     Two convolutional layers with residual structure.
 80 | """
 81 | class Res(nn.Module):
 82 |     def __init__(self, in_channel):
 83 |         super(Res, self).__init__()
 84 |         self.conv = nn.Sequential(nn.Conv2d(in_channel, in_channel, 3, 1, 1), 
 85 |                                   nn.BatchNorm2d(in_channel), nn.ReLU(inplace=True),
 86 |                                   nn.Conv2d(in_channel, in_channel, 3, 1, 1))
 87 | 
 88 |     def forward(self, feats):
 89 |         feats = feats + self.conv(feats)
 90 |         feats = F.relu(feats, inplace=True)
 91 |         return feats
 92 | 
 93 | """
 94 | Cosal_Module:
 95 |     Given features extracted from the VGG16 backbone,
 96 |     exploit SISMs to build intra cues and inter cues.
 97 | """
 98 | class Cosal_Module(nn.Module):
 99 |     def __init__(self, H, W):
100 |         super(Cosal_Module, self).__init__()
101 |         self.cosal_feat = Cosal_Sub_Module(H, W)
102 |         self.conv = nn.Sequential(nn.Conv2d(256, 128, 1), Res(128))
103 | 
104 |     def forward(self, feats, SISMs):
105 |         # Get foreground co-saliency features.
106 |         fore_cosal_feats = self.cosal_feat(feats, SISMs)
107 | 
108 |         # Get background co-saliency features.
109 |         back_cosal_feats = self.cosal_feat(feats, 1.0 - SISMs)
110 |         
111 |         # Fuse foreground and background co-saliency features
112 |         # to generate co-saliency enhanced features.
113 |         cosal_enhanced_feats = self.conv(torch.cat([fore_cosal_feats, back_cosal_feats], dim=1))
114 |         return cosal_enhanced_feats
115 | 
116 | """
117 | Cosal_Sub_Module:
118 |   * The kernel module of ICNet.
119 |     Generate foreground/background co-salient features by using SISMs.
120 | """
121 | class Cosal_Sub_Module(nn.Module):
122 |     def __init__(self, H, W):
123 |         super(Cosal_Sub_Module, self).__init__()
124 |         channel = H * W
125 |         self.conv = nn.Sequential(nn.Conv2d(channel, 128, 1), Res(128))
126 | 
127 |     def forward(self, feats, SISMs):
128 |         N, C, H, W = feats.shape
129 |         HW = H * W
130 |         
131 |         # Resize SISMs to the same size as the input feats.
132 |         SISMs = resize(SISMs, [H, W])  # shape=[N, 1, H, W]
133 |         
134 |         # NFs: L2-normalized features.
135 |         NFs = F.normalize(feats, dim=1)  # shape=[N, C, H, W]
136 | 
137 |         def CFM(SIVs, NFs):
138 |             # Compute correlation maps [Figure 4] between SIVs and pixel-wise feature vectors in NFs by inner product.
139 |             # We implement this process by ``F.conv2d()'', which takes SIVs as 1*1 kernels to convolve NFs.
140 |             correlation_maps = F.conv2d(NFs, weight=SIVs)  # shape=[N, N, H, W]
141 |             
142 |             # Vectorize and normalize correlation maps.
143 |             correlation_maps = F.normalize(correlation_maps.reshape(N, N, HW), dim=2)  # shape=[N, N, HW]
144 |             
145 |             # Compute the weight vectors [Equation 2].
146 |             correlation_matrix = torch.matmul(correlation_maps, correlation_maps.permute(0, 2, 1))  # shape=[N, N, N]
147 |             weight_vectors = correlation_matrix.sum(dim=2).softmax(dim=1)  # shape=[N, N]
148 | 
149 |             # Fuse correlation maps with the weight vectors to build co-salient attention (CSA) maps.
150 |             CSA_maps = torch.sum(correlation_maps * weight_vectors.view(N, N, 1), dim=1)  # shape=[N, HW]
151 |             
152 |             # Max-min normalize CSA maps.
153 |             min_value = torch.min(CSA_maps, dim=1, keepdim=True)[0]
154 |             max_value = torch.max(CSA_maps, dim=1, keepdim=True)[0]
155 |             CSA_maps = (CSA_maps - min_value) / (max_value - min_value + 1e-12)  # shape=[N, HW]
156 |             CSA_maps = CSA_maps.view(N, 1, H, W)  # shape=[N, 1, H, W]
157 |             return CSA_maps
158 | 
159 |         def get_SCFs(NFs):
160 |             NFs = NFs.view(N, C, HW)  # shape=[N, C, HW]
161 |             SCFs = torch.matmul(NFs.permute(0, 2, 1), NFs).view(N, -1, H, W)  # shape=[N, HW, H, W]
162 |             return SCFs
163 | 
164 |         # Compute SIVs [Section 3.2, Equation 1].
165 |         SIVs = F.normalize((NFs * SISMs).mean(dim=3).mean(dim=2), dim=1).view(N, C, 1, 1)  # shape=[N, C, 1, 1]
166 | 
167 |         # Compute co-salient attention (CSA) maps [Section 3.3].
168 |         CSA_maps = CFM(SIVs, NFs)  # shape=[N, 1, H, W]
169 | 
170 |         # Compute self-correlation features (SCFs) [Section 3.4].
171 |         SCFs = get_SCFs(NFs)  # shape=[N, HW, H, W]
172 | 
173 |         # Rearrange the channel order of SCFs to obtain RSCFs [Section 3.4].
174 |         evidence = CSA_maps.view(N, HW)  # shape=[N, HW]
175 |         indices = torch.argsort(evidence, dim=1, descending=True).view(N, HW, 1, 1).repeat(1, 1, H, W)  # shape=[N, HW, H, W]
176 |         RSCFs = torch.gather(SCFs, dim=1, index=indices)  # shape=[N, HW, H, W]
177 |         cosal_feat = self.conv(RSCFs * CSA_maps)  # shape=[N, 128, H, W]
178 |         return cosal_feat
179 | 
180 | """
181 | Decoder_Block:
182 |     U-net like decoder block that fuses co-saliency features and low-level features for upsampling. 
183 | """
184 | class Decoder_Block(nn.Module):
185 |     def __init__(self, in_channel):
186 |         super(Decoder_Block, self).__init__()
187 |         self.cmprs = nn.Conv2d(in_channel, 32, 1)
188 |         self.merge_conv = nn.Sequential(nn.Conv2d(96, 96, 3, 1, 1), nn.BatchNorm2d(96), nn.ReLU(inplace=True),
189 |                                         nn.Conv2d(96, 32, 3, 1, 1), nn.BatchNorm2d(32), nn.ReLU(inplace=True))
190 |         self.pred = Prediction(32)
191 | 
192 |     def forward(self, low_level_feats, cosal_map, SISMs, old_feats):
193 |         _, _, H, W = low_level_feats.shape
194 |         # Adjust cosal_map, SISMs and old_feats to the same spatial size as low_level_feats.
195 |         cosal_map = resize(cosal_map, [H, W])
196 |         SISMs = resize(SISMs, [H, W])
197 |         old_feats = resize(old_feats, [H, W])
198 | 
199 |         # Predict co-saliency maps with the size of H*W.
200 |         cmprs = self.cmprs(low_level_feats)
201 |         new_feats = self.merge_conv(torch.cat([cmprs * cosal_map, 
202 |                                                cmprs * SISMs, 
203 |                                                old_feats], dim=1))
204 |         new_cosal_map = self.pred(new_feats)
205 |         return new_feats, new_cosal_map
206 | 
207 | 
208 | """
209 | ICNet:
210 |     The entire ICNet.
211 |     Given a group of images and corresponding SISMs, ICNet outputs a group of co-saliency maps (predictions) at once.
212 | """
213 | class ICNet(nn.Module):
214 |     def __init__(self):
215 |         super(ICNet, self).__init__()
216 |         self.vgg = VGG16()
217 |         self.Co6 = Cosal_Module(7, 7)
218 |         self.Co5 = Cosal_Module(14, 14)
219 |         self.Co4 = Cosal_Module(28, 28)
220 |         self.conv6_cmprs = nn.Sequential(nn.MaxPool2d(2, 2), nn.Conv2d(512, 128, 1),
221 |                                          nn.Conv2d(128, 128, 3, 1, 1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
222 |                                          nn.Conv2d(128, 128, 3, 1, 1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
223 |                                          nn.Conv2d(128, 128, 3, 1, 1))
224 |         self.conv5_cmprs = nn.Conv2d(512, 256, 1)
225 |         self.conv4_cmprs = nn.Conv2d(512, 256, 1)
226 | 
227 |         self.merge_co_56 = Res(128)
228 |         self.merge_co_45 = nn.Sequential(Res(128), nn.Conv2d(128, 32, 1))
229 |         self.get_pred_4 = Prediction(32)
230 |         self.refine_3 = Decoder_Block(256)
231 |         self.refine_2 = Decoder_Block(128)
232 |         self.refine_1 = Decoder_Block(64)
233 | 
234 |     def forward(self, image_group, SISMs, is_training):
235 |         # Extract features from the VGG16 backbone.
236 |         conv1_2 = self.vgg(image_group, 'conv1_1', 'conv1_2_mp') # shape=[N, 64, 224, 224]
237 |         conv2_2 = self.vgg(conv1_2, 'conv1_2_mp', 'conv2_2_mp')  # shape=[N, 128, 112, 112]
238 |         conv3_3 = self.vgg(conv2_2, 'conv2_2_mp', 'conv3_3_mp')  # shape=[N, 256, 56, 56]
239 |         conv4_3 = self.vgg(conv3_3, 'conv3_3_mp', 'conv4_3_mp')  # shape=[N, 512, 28, 28]
240 |         conv5_3 = self.vgg(conv4_3, 'conv4_3_mp', 'conv5_3_mp')  # shape=[N, 512, 14, 14]
241 | 
242 |         # Compress the channels of high-level features.
243 |         conv6_cmprs = self.conv6_cmprs(conv5_3)  # shape=[N, 128, 7, 7]
244 |         conv5_cmprs = self.conv5_cmprs(conv5_3)  # shape=[N, 256, 14, 14]
245 |         conv4_cmprs = self.conv4_cmprs(conv4_3)  # shape=[N, 256, 28, 28]
246 | 
247 |         # Obtain co-saliancy features.
248 |         cosal_feat_6 = self.Co6(conv6_cmprs, SISMs) # shape=[N, 128, 7, 7]
249 |         cosal_feat_5 = self.Co5(conv5_cmprs, SISMs) # shape=[N, 128, 14, 14]
250 |         cosal_feat_4 = self.Co4(conv4_cmprs, SISMs) # shape=[N, 128, 28, 28]
251 |         
252 |         # Merge co-saliancy features and predict co-saliency maps with size of 28*28 (i.e., "cosal_map_4").
253 |         feat_56 = self.merge_co_56(cosal_feat_5 + resize(cosal_feat_6, [14, 14])) # shape=[N, 128, 14, 14]
254 |         feat_45 = self.merge_co_45(cosal_feat_4 + resize(feat_56, [28, 28]))      # shape=[N, 128, 28, 28]
255 |         cosal_map_4 = self.get_pred_4(feat_45)                                    # shape=[N, 1, 28, 28]
256 | 
257 |         # Obtain co-saliency maps with size of 224*224 (i.e., "cosal_map_1") by progressively upsampling.
258 |         feat_34, cosal_map_3 = self.refine_3(conv3_3, cosal_map_4, SISMs, feat_45)
259 |         feat_23, cosal_map_2 = self.refine_2(conv2_2, cosal_map_4, SISMs, feat_34)
260 |         _, cosal_map_1 = self.refine_1(conv1_2, cosal_map_4, SISMs, feat_23)      # shape=[N, 1, 224, 224]
261 | 
262 |         # Return predicted co-saliency maps.
263 |         if is_training:
264 |             preds_list = [resize(cosal_map_4), resize(cosal_map_3), resize(cosal_map_2), cosal_map_1]
265 |             return preds_list
266 |         else:
267 |             preds = cosal_map_1
268 |             return preds
269 | 


--------------------------------------------------------------------------------
/ICNet/solver.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.optim import Adam
  3 | import network
  4 | from loss import IoU_loss
  5 | import numpy as np
  6 | import cv2
  7 | from dataset import get_loader
  8 | from os.path import join
  9 | import random
 10 | from utils import mkdir, write_doc, get_time
 11 | 
 12 | 
 13 | class Solver(object):
 14 |     def __init__(self):
 15 |         self.ICNet = network.ICNet().cuda()
 16 | 
 17 |     def train(self, roots, init_epoch, end_epoch, learning_rate, batch_size, weight_decay, ckpt_root, doc_path, num_thread, pin, vgg_path=None):
 18 |         # Define Adam optimizer.
 19 |         optimizer = Adam(self.ICNet.parameters(),
 20 |                          lr=learning_rate, 
 21 |                          weight_decay=weight_decay)
 22 | 
 23 |         # Load ".pth" to initialize model.
 24 |         if init_epoch == 0:
 25 |             # From pre-trained VGG16.
 26 |             self.ICNet.apply(network.weights_init)
 27 |             self.ICNet.vgg.vgg.load_state_dict(torch.load(vgg_path))
 28 |         else:
 29 |             # From the existed checkpoint file.
 30 |             ckpt = torch.load(join(ckpt_root, 'Weights_{}.pth'.format(init_epoch)))
 31 |             self.ICNet.load_state_dict(ckpt['state_dict'])
 32 |             optimizer.load_state_dict(ckpt['optimizer'])
 33 | 
 34 |         # Define training dataloader.
 35 |         train_dataloader = get_loader(roots=roots,
 36 |                                       request=('img', 'gt', 'sism'),
 37 |                                       shuffle=True,
 38 |                                       batch_size=batch_size,
 39 |                                       data_aug=True,
 40 |                                       num_thread=num_thread,
 41 |                                       pin=pin)
 42 |         
 43 |         # Train.
 44 |         self.ICNet.train()
 45 |         for epoch in range(init_epoch + 1, end_epoch):
 46 |             start_time = get_time()
 47 |             loss_sum = 0.0
 48 | 
 49 |             for data_batch in train_dataloader:
 50 |                 self.ICNet.zero_grad()
 51 | 
 52 |                 # Obtain a batch of data.
 53 |                 img, gt, sism = data_batch['img'], data_batch['gt'], data_batch['sism']
 54 |                 img, gt, sism = img.cuda(), gt.cuda(), sism.cuda()
 55 | 
 56 |                 if len(img) == 1:
 57 |                     # Skip this iteration when training batchsize is 1 due to Batch Normalization. 
 58 |                     continue
 59 |                 
 60 |                 # Forward.
 61 |                 preds_list = self.ICNet(image_group=img,
 62 |                                         SISMs=sism, 
 63 |                                         is_training=True)
 64 |                 
 65 |                 # Compute IoU loss.
 66 |                 loss = IoU_loss(preds_list, gt)
 67 | 
 68 |                 # Backward.
 69 |                 loss.backward()
 70 |                 optimizer.step()
 71 |                 loss_sum = loss_sum + loss.detach().item()
 72 |             
 73 |             # Save the checkpoint file (".pth") after each epoch.
 74 |             mkdir(ckpt_root)
 75 |             torch.save({'optimizer': optimizer.state_dict(),
 76 |                         'state_dict': self.ICNet.state_dict()}, join(ckpt_root, 'Weights_{}.pth'.format(epoch)))
 77 |             
 78 |             # Compute average loss over the training dataset approximately.
 79 |             loss_mean = loss_sum / len(train_dataloader)
 80 |             end_time = get_time()
 81 | 
 82 |             # Record training information (".txt").
 83 |             content = 'CkptIndex={}:    TrainLoss={}    LR={}    Time={}\n'.format(epoch, loss_mean, learning_rate, end_time - start_time)
 84 |             write_doc(doc_path, content)
 85 |     
 86 |     def test(self, roots, ckpt_path, pred_root, num_thread, batch_size, original_size, pin):
 87 |         with torch.no_grad():            
 88 |             # Load the specified checkpoint file(".pth").
 89 |             state_dict = torch.load(ckpt_path)['state_dict']
 90 |             self.ICNet.load_state_dict(state_dict)
 91 |             self.ICNet.eval()
 92 |             
 93 |             # Get names of the test datasets.
 94 |             datasets = roots.keys()
 95 | 
 96 |             # Test ICNet on each dataset.
 97 |             for dataset in datasets:
 98 |                 # Define test dataloader for the current test dataset.
 99 |                 test_dataloader = get_loader(roots=roots[dataset], 
100 |                                              request=('img', 'sism', 'file_name', 'group_name', 'size'), 
101 |                                              shuffle=False,
102 |                                              data_aug=False, 
103 |                                              num_thread=num_thread, 
104 |                                              batch_size=batch_size, 
105 |                                              pin=pin)
106 | 
107 |                 # Create a folder for the current test dataset for saving predictions.
108 |                 mkdir(pred_root)
109 |                 cur_dataset_pred_root = join(pred_root, dataset)
110 |                 mkdir(cur_dataset_pred_root)
111 | 
112 |                 for data_batch in test_dataloader:
113 |                     # Obtain a batch of data.
114 |                     img, sism = data_batch['img'].cuda(), data_batch['sism'].cuda()
115 | 
116 |                     # Forward.
117 |                     preds = self.ICNet(image_group=img, 
118 |                                        SISMs=sism, 
119 |                                        is_training=False)
120 |                     
121 |                     # Create a folder for the current batch according to its "group_name" for saving predictions.
122 |                     group_name = data_batch['group_name'][0]
123 |                     cur_group_pred_root = join(cur_dataset_pred_root, group_name)
124 |                     mkdir(cur_group_pred_root)
125 | 
126 |                     # preds.shape: [N, 1, H, W]->[N, H, W, 1]
127 |                     preds = preds.permute(0, 2, 3, 1).cpu().numpy()
128 | 
129 |                     # Make paths where predictions will be saved.
130 |                     pred_paths = list(map(lambda file_name: join(cur_group_pred_root, file_name + '.png'), data_batch['file_name']))
131 |                     
132 |                     # For each prediction:
133 |                     for i, pred_path in enumerate(pred_paths):
134 |                         # Resize the prediction to the original size when "original_size == True".
135 |                         H, W = data_batch['size'][0][i], data_batch['size'][1][i]
136 |                         pred = cv2.resize(preds[i], (W, H)) if original_size else preds[i]
137 | 
138 |                         # Save the prediction.
139 |                         cv2.imwrite(pred_path, np.array(pred * 255))


--------------------------------------------------------------------------------
/ICNet/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from solver import Solver
 3 | 
 4 | """
 5 | Test settings (used for "test.py"):
 6 | 
 7 | test_device:
 8 |     Index of the GPU used for test.
 9 | 
10 | test_batch_size:
11 |     Test batchsize.
12 |   * When "test_batch_size == None", the dataloader takes the whole image group as a batch to
13 |     perform the test (regardless of the size of the image group). If your GPU does not have enough memory,
14 |     you are suggested to set "test_batch_size" with a small number (e.g. test_batch_size = 10).
15 | 
16 | pred_root:
17 |     Folder path for saving predictions (co-saliency maps).
18 | 
19 | ckpt_path:
20 |     Path of the checkpoint file (".pth") loaded for test.
21 | 
22 | original_size:
23 |     When "original_size == True", the prediction (224*224) of ICNet will be resized to the original size.
24 | 
25 | test_roots:
26 |     A dictionary including multiple sub-dictionary,
27 |     each sub-dictionary contains the image and SISM folder paths of a specific test dataset.
28 |     Format:
29 |     test_roots = {
30 |         name of dataset_1: {
31 |             'img': image folder path of dataset_1,
32 |             'sism': SISM folder path of dataset_1
33 |         },
34 |         name of dataset_2: {
35 |             'img': image folder path of dataset_2,
36 |             'sism': SISM folder path of dataset_2
37 |         }
38 |         .
39 |         .
40 |         .
41 |     }
42 | """
43 | 
44 | test_device = '0'
45 | test_batch_size = None
46 | pred_root = './pred/'
47 | ckpt_path = './ICNet_vgg16.pth'
48 | original_size = False
49 | test_num_thread = 4
50 | 
51 | # An example to build "test_roots".
52 | test_roots = dict()
53 | datasets = ['MSRC', 'iCoSeg', 'CoSal2015', 'CoSOD3k', 'CoCA']
54 | 
55 | for dataset in datasets:
56 |     roots = {'img': '/mnt/jwd/data/{}/img_bilinear_224/'.format(dataset),
57 |              'sism': '/mnt/jwd/data/EGNet-SISMs/{}/'.format(dataset)}
58 |     test_roots[dataset] = roots
59 | # ------------- end -------------
60 | 
61 | if __name__ == '__main__':
62 |     os.environ['CUDA_VISIBLE_DEVICES'] = test_device
63 |     solver = Solver()
64 |     solver.test(roots=test_roots,
65 |                 ckpt_path=ckpt_path,
66 |                 pred_root=pred_root, 
67 |                 num_thread=test_num_thread, 
68 |                 batch_size=test_batch_size, 
69 |                 original_size=original_size,
70 |                 pin=False)


--------------------------------------------------------------------------------
/ICNet/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from solver import Solver
 3 | 
 4 | """
 5 | Training settings (used for "train.py"):
 6 | 
 7 | vgg_path:
 8 |     Path of pre-trained VGG16 (".pth") used to initialize ICNet at the start of training.
 9 | 
10 | ckpt_root:
11 |     Folder path where the checkpoint files (".pth") are saved.
12 |     After the i-th training epoch, the checkpoint file is saved to "ckpt_root/Weights_{}.pth".format(i).
13 | 
14 | train_init_epoch:
15 |     The starting epoch of training.
16 |     When "train_init_epoch == 0", ICNet is initialized with pre-trained VGG16;
17 |     Otherwise, ICNet loads checkpoint file from "ckpt_root/Weights_{}.pth".format(train_init_epoch) for initialization,
18 | 
19 | train_end_epoch:
20 |     The ending epoch of training.
21 |     We recommend you to train ICNet for 50~60 epochs.
22 | 
23 | train_device:
24 |     Index of the GPU used for training.
25 | 
26 | train_doc_path:
27 |     The file (".txt") path used to save the training information.
28 | 
29 | train_roots:
30 |     A dictionary containing image, GT and SISM folder paths of the training dataset.
31 |     train_roots = {'img': image folder path of training dataset,
32 |                    'gt': GT folder path of training dataset,
33 |                    'sism': SISM folder path of training dataset}
34 | """
35 | 
36 | vgg_path = './vgg16_feat.pth'
37 | ckpt_root = './ckpt/'
38 | train_init_epoch = 0
39 | train_end_epoch = 61
40 | train_device = '0'
41 | train_doc_path = './training.txt'
42 | learning_rate = 1e-5
43 | weight_decay = 1e-4
44 | train_batch_size = 10
45 | train_num_thread = 4
46 | 
47 | # An example to build "train_roots".
48 | train_roots = {'img': '/mnt/jwd/data/COCO9213/img_bilinear_224/',
49 |                'gt': '/mnt/jwd/data/COCO9213/gt_bilinear_224/',
50 |                'sism': '/mnt/jwd/data/EGNet-SISMs/COCO9213/'}
51 | # ------------- end -------------
52 | 
53 | if __name__ == '__main__':
54 |     os.environ['CUDA_VISIBLE_DEVICES'] = train_device
55 |     solver = Solver()
56 |     solver.train(roots=train_roots,
57 |                  vgg_path=vgg_path,
58 |                  init_epoch=train_init_epoch,
59 |                  end_epoch=train_end_epoch,
60 |                  learning_rate=learning_rate,
61 |                  batch_size=train_batch_size,
62 |                  weight_decay=weight_decay,
63 |                  ckpt_root=ckpt_root,
64 |                  doc_path=train_doc_path,
65 |                  num_thread=train_num_thread,
66 |                  pin=False)


--------------------------------------------------------------------------------
/ICNet/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import time
 4 | 
 5 | """
 6 | mkdir:
 7 |     Create a folder if "path" does not exist.
 8 | """
 9 | def mkdir(path):
10 |     if os.path.exists(path) == False:
11 |         os.makedirs(path)
12 | 
13 | """
14 | write_doc:
15 |     Write "content" into the file(".txt") in "path".
16 | """
17 | def write_doc(path, content):
18 |     with open(path, 'a') as file:
19 |         file.write(content)
20 | 
21 | """
22 | get_time:
23 |     Obtain the current time.
24 | """
25 | def get_time():
26 |     torch.cuda.synchronize()
27 |     return time.time()


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 blanclist
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # **ICNet: Intra-saliency Correlation Network for Co-Saliency Detection**
  2 | 
  3 | This repository is the official PyTorch implementation of our *NeurIPS(2020)* paper.
  4 | 
  5 | You can switch the branch to "CN", to view README.md (Chinese version) and obtain codes with Chinese comments.
  6 | 
  7 | (您可以将 branch 切换到 "CN"，以查看中文版 README.md 并获取带有中文注释的代码)
  8 | 
  9 | <div align=center><img width="450" height="300" src=./thumbnail.png/></div>
 10 | 
 11 | ## Training Datasets
 12 | 
 13 | **Our training set is a subset of the *COCO* dataset, containing 9213 images.**
 14 | 
 15 | * ***COCO9213-os.zip*** (images with original size, 4.53GB), [GoogleDrive](https://drive.google.com/file/d/1fOfSX_CtWizDapB0OeTJxAydL2yDOP5H/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1wOxdP6EQEqMwjg3_v1z2-A) (fetch code: 5183).
 16 | 
 17 | * ***COCO9213.zip*** (images resized to 224*224, 943MB), [GoogleDrive](https://drive.google.com/file/d/1GbA_WKvJm04Z1tR8pTSzBdYVQ75avg4f/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1r-qCLeG3L6i-OrBfKrXANg) (fetch code: 8d7z).
 18 | 
 19 | ## Test Datasets
 20 | 
 21 | ### Used in our paper:
 22 | 
 23 | * ***MSRC*** (7 groups, 233 images) ''Object Categorization by Learned Universal Visual Dictionary, *ICCV(2005)*''
 24 | 
 25 | * ***iCoseg*** (38 groups, 643 images) ''iCoseg: Interactive Co-segmentation with Intelligent Scribble Guidance, *CVPR(2010)*''
 26 | 
 27 | * ***Cosal2015*** (50 groups, 2015 images) Detection of Co-salient Objects by Looking Deep and Wide, *IJCV(2016)*''
 28 | 
 29 | You can download them from:
 30 | 
 31 | * ***test-datasets*** (resized to 224*224, 77MB), [GoogleDrive](https://drive.google.com/drive/folders/1bjI2msek72dOejmK796tXyjFPIE27267?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1KX7m0g9mgACoTMgkbIjRvw) (fetch code: oq5w).
 32 | 
 33 | * ***test-datasets-os*** (original sizes, 142MB), [GoogleDrive](https://drive.google.com/drive/folders/1p--uTLIF-2hRIJk9Xmys9ftTdXrWYslS?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1kDv7icEDT5pPwQQJkHkgpA) (fetch code: ujdl).
 34 | 
 35 | ### Released recently:
 36 | 
 37 | * **[*CoSOD3k*](http://dpfan.net/CoSOD3K/)** (160 groups, 3316 images) ''Taking a Deeper Look at the Co-salient Object Detection, *CVPR(2020)*''
 38 | 
 39 | * **[*CoCA*](http://zhaozhang.net/coca.html)** (80 groups, 1295 images) ''Gradient-Induced Co-Saliency Detection, *ECCV(2020)*''
 40 | 
 41 | ## Pre-trained Model
 42 | 
 43 | We provide pre-trained ICNet based on SISMs produced by pre-trained [EGNet](https://github.com/JXingZhao/EGNet) (VGG16-based).
 44 | 
 45 | * ***ICNet_vgg16.pth*** (70MB), [GoogleDrive](https://drive.google.com/file/d/1wcT_XmwlshbLqCiJetmzQwi1ZNAzxiSU/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1__iiBcAI2S-Ns9MZnZwp8g) (fetch code: nkj9).
 46 | 
 47 | ## Prediction Results
 48 | 
 49 | We release the co-saliency maps (predictions) generated by our ICNet on 5 benchmark datasets:
 50 | 
 51 | ***MSRC***, ***iCoseg***, ***Cosal2015***, ***CoCA***, and ***CoSOD3k***.
 52 | 
 53 | * ***cosal-maps.zip*** (results of size 224*224, 20MB), [GoogleDrive](https://drive.google.com/file/d/1q9CAzPf5U3VPa_DGxzUGI_DANCuw_WEk/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1qbPJKMTiVStqjSGYWuqSgQ) (fetch code: du5e).
 54 | 
 55 | * ***cosal-maps-os.zip*** (results resized to original sizes, 62MB), [GoogleDrive](https://drive.google.com/file/d/1px4tPVWAgbBPMt6Rp23oNwWz8Ulj6pmX/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1WFQxeIOjOiByiFYHLpuytA) (fetch code: xwcv).
 56 | 
 57 | ## Training and Test
 58 | 
 59 | ### Prepare SISMs
 60 | 
 61 | Our ICNet can be trained and tested based on SISMs produced by any off-the-shelf SOD method, but you are suggested to use the **same** SOD method to generate SISMs in training and test phases to keep the consistency. 
 62 | 
 63 | In our paper, we choose the pre-trained [EGNet](https://github.com/JXingZhao/EGNet) (VGG16-based) as the basic SOD method to produce SISMs, you can downloaded these SISMs directly from:
 64 | 
 65 | * ***EGNet-SISMs*** (resized to 224*224, 125MB), [GoogleDrive](https://drive.google.com/drive/folders/1cGtXQI2U8pH37-mgSw3otnMsRi36QwBp?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/19Izo6i7A9DfUMzsNbVec6g) (fetch code: ae6a).
 66 | 
 67 | ### Training
 68 | 
 69 | 1. Download pre-trained VGG16 from:
 70 | 
 71 |    * ***vgg16_feat.pth*** (56MB), [GoogleDrive](https://drive.google.com/file/d/1ej5ngj2NYH-R-0GfYUDfuM-DNLuFolED/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1kAh7FAUPuVLI5cvtBsxh-A) (fetch code: j0zq).
 72 | 
 73 | 2. Follow instructions in **"./ICNet/train.py"** to modify training settings.
 74 | 
 75 | 3. Run:
 76 | 
 77 | ```
 78 | python ./ICNet/train.py
 79 | ```
 80 | 
 81 | ### Test
 82 | 
 83 | 1. * Test **pre-trained** ICNet:
 84 | 
 85 |      Download pre-trained ICNet ***"ICNet_vgg16.pth"*** (the download link is given above).
 86 | 
 87 |    * Test ICNet **trained by yourself**:
 88 | 
 89 |      Choose the checkpoint file ***"Weights_i.pth"***  (saved after i-th epoch automatically) you want to load for test.
 90 | 
 91 | 2. Follow instructions in **"./ICNet/test.py"** to modify test settings.
 92 | 
 93 | 3. Run:
 94 | 
 95 | ```
 96 | python ./ICNet/test.py
 97 | ```
 98 | 
 99 | ## Evaluation
100 | 
101 | The folder "./ICNet/evaluator/" contains evaluation codes implemented in PyTorch (GPU-version), the metrics include **max F-measure**, **S-measure**, and **MAE**. 
102 | 
103 | 1. Follow instructions in **"./ICNet/evaluate.py"** to modify evaluation settings.
104 | 
105 | 2. Run:
106 | 
107 | ```
108 | python ./ICNet/evaluate.py
109 | ```
110 | 
111 | ## Compared Methods
112 | 
113 | We compare our ICNet with 7 state-of-the-art Co-SOD methods:
114 | 
115 | * ***CBCS***		''Cluster-Based Co-Saliency Detection, *TIP(2013)*''​			  
116 | 
117 | * ***CSHS***		''Co-Saliency Detection Based on Hierarchical Segmentation, *SPL(2014)*''
118 | 
119 | * ***CoDW***		''Detection of Co-salient Objects by Looking Deep and Wide, *IJCV(2016)*''
120 | 
121 | * ***UCSG***		''Unsupervised CNN-based Co-Saliency Detection with Graphical Optimization, *ECCV(2018)*''
122 | 
123 | * ***CSMG***		''Co-saliency Detection via Mask-guided Fully Convolutional Networks with Multi-scale Label Smoothing, *CVPR(2019)*''
124 | 
125 | * ***MGLCN***		''A Unified Multiple Graph Learning and Convolutional Network Model for Co-saliency Estimation, *ACM MM(2019)*''
126 | 
127 | * ***GICD***		''Gradient-Induced Co-Saliency Detection, *ECCV(2020)*''
128 | 
129 | You can download predictions of these methods from:
130 | 
131 | * ***compared_method*s** (original sizes, 445MB), [GoogleDrive](https://drive.google.com/drive/folders/1qdXWZQ-fF-WaCF-rat0Da7vFrAIYsj09?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/10vpubz39atkg2lz095QvSQ) (fetch code: s7pr).
132 | 
133 | ## Citation
134 | 
135 | *To be updated.*
136 | 
137 | ## Contact
138 | 
139 | If you have any questions, feel free to contact me (Wen-Da Jin) at jwd331@126.com, I will reply as soon as possible.
140 | 


--------------------------------------------------------------------------------
/thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blanclist/ICNet/f0d5e463839caa2e39a6fc042da22284b7d4c9d9/thumbnail.png


--------------------------------------------------------------------------------