├── .gitignore ├── o_O_solution_report.pdf ├── metrics.py ├── README.md ├── config.py ├── main.py ├── train.py ├── model.py └── data_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | traits.md 3 | *.pt -------------------------------------------------------------------------------- /o_O_solution_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YijinHuang/pytorch-DR/HEAD/o_O_solution_report.pdf -------------------------------------------------------------------------------- /metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def classify(predict, thresholds=[0, 0.5, 1.5, 2.5, 3.5]): 6 | predict = max(predict, thresholds[0]) 7 | for i in reversed(range(len(thresholds))): 8 | if predict >= thresholds[i]: 9 | return i 10 | 11 | 12 | def quadratic_weighted_kappa(conf_mat): 13 | assert conf_mat.shape[0] == conf_mat.shape[1] 14 | cate_num = conf_mat.shape[0] 15 | 16 | # Quadratic weighted matrix 17 | weighted_matrix = np.zeros((cate_num, cate_num)) 18 | for i in range(cate_num): 19 | for j in range(cate_num): 20 | weighted_matrix[i][j] = 1 - float(((i - j)**2) / ((cate_num - 1)**2)) 21 | 22 | # Expected matrix 23 | ground_truth_count = np.sum(conf_mat, axis=1) 24 | pred_count = np.sum(conf_mat, axis=0) 25 | expected_matrix = np.outer(ground_truth_count, pred_count) 26 | 27 | # Normalization 28 | conf_mat = conf_mat / conf_mat.sum() 29 | expected_matrix = expected_matrix / expected_matrix.sum() 30 | 31 | observed = (conf_mat * weighted_matrix).sum() 32 | expected = (expected_matrix * weighted_matrix).sum() 33 | return (observed - expected) / (1 - expected) 34 | 35 | 36 | def accuracy(predictions, targets, c_matrix=None): 37 | predictions = predictions.data 38 | targets = targets.data 39 | 40 | # avoid modifying origin predictions 41 | predicted = torch.tensor( 42 | [classify(p.item()) for p in predictions] 43 | ).cuda().float() 44 | 45 | # update confusion matrix 46 | if c_matrix is not None: 47 | for i, p in enumerate(predicted): 48 | c_matrix[int(targets[i])][int(p.item())] += 1 49 | 50 | correct = (predicted == targets).sum().item() 51 | return correct / len(predicted) 52 | 53 | 54 | if __name__ == "__main__": 55 | conf_mat = np.array([ 56 | [37, 8, 5, 0, 0], 57 | [8, 32, 8, 2, 0], 58 | [6, 6, 31, 5, 2], 59 | [1, 1, 5, 39, 4], 60 | [1, 1, 4, 10, 34] 61 | ]) 62 | print(quadratic_weighted_kappa(conf_mat)) 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## pytorch-DR 2 | 3 | - Implementation of team o_O solution for the Kaggle Diabetic Retinopathy Detection Challenge in pytorch. 4 | - [Solution summary](https://www.kaggle.com/c/diabetic-retinopathy-detection/discussion/15617#latest-373487) 5 | 6 | ### Branches 7 | 8 | - Branch "master" tries to apply new techniques to improve team o_O solution. 9 | - Branch "reimplement" is the reimplementation of team o_O solution. 10 | - Branch "two_cates" is used to do binary classification (Normal/DR). 11 | 12 | ### How to use 13 | 14 | #### Data directory 15 | 16 | Your should organize your dataset as following: 17 | 18 | ``` 19 | ├── your_data_dir 20 | ├── train 21 | ├── 1 22 | ├── a.jpg 23 | ├── b.jpg 24 | ├── ... 25 | ├── 2 26 | ├── x.jpg 27 | ├── y.jpg 28 | ├── ... 29 | ├── 3 30 | ├── 4 31 | ├── 5 32 | ├── val 33 | ├── test 34 | ``` 35 | 36 | Here, `val` and `test` directory have the same structure of `train`. The name of directories 1- 5 means the severity of disease. 37 | 38 | #### Run 39 | 40 | You can run this code by: 41 | 42 | ```shell 43 | $ python main.py 44 | ``` 45 | 46 | Most of hyperparameters and configurations are in `config.py`. You should choose *SMALL_NET_CONFIG*, *MEDIUM_NET_CONFIG* or *LARGE_NET_CONFIG* as *STEM_CONFIG* in main function of `main.py`. Function `stem` will train one inference network and function `blend` will train a ensemble network which is optional. 47 | 48 | ```python 49 | def main(): 50 | # network config, you should . 51 | STEM_CONFIG = SMALL_NET_CONFIG 52 | stem(STEM_CONFIG) 53 | 54 | # blend step config 55 | # BLEND_CONFIG = BLEND_NET_CONFIG 56 | # blend(BLEND_CONFIG, STEM_CONFIG) 57 | ``` 58 | 59 | Moreover, if you want to get the final large network, you should train small and medium network first. More details are in [o_O_solution_report.pdf](https://github.com/YijinHuang/pytorch-DR/blob/reimplement/o_O_solution_report.pdf). 60 | 61 | ### Result 62 | 63 | This project is still in progress. Now a single large network can achieves 79.84% in EyePACs test set which is close to 80% that the author claims. You can use full ensemble methods that the author designed to get a better result but I haven't implement it yet. -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | SMALL_NET_CONFIG = { 2 | 'NET_SIZE': 'small', 3 | 'DATA_PATH': '../../dataset/train_data_full_512', 4 | 'SAVE_PATH': '../../result/o_O_result/o_O_small.pt', 5 | 'RECORD_PATH': '../../result/o_O_result/o_O_small.rec', 6 | 'PRETRAINED_PATH': None, 7 | 'LEARNING_RATE': 3e-3, 8 | 'INPUT_SIZE': 112, 9 | 'FEATURE_DIM': 512, 10 | 'BATCH_SIZE': 128, 11 | 'EPOCHS': 200, 12 | 'DATA_AUGMENTATION': { 13 | 'scale': (1 / 1.15, 1.15), 14 | 'stretch_ratio': (0.7561, 1.3225), # (1/(1.15*1.15) and 1.15*1.15) 15 | 'ratation': (-180, 180), 16 | 'translation_ratio': (40 / 112, 40 / 112), # 40 pixel in the report 17 | 'sigma': 0.5 18 | }, 19 | "NUM_WORKERS": 32 20 | } 21 | 22 | MEDIUM_NET_CONFIG = { 23 | 'NET_SIZE': 'medium', 24 | 'DATA_PATH': '../../dataset/train_data_full_512', 25 | 'SAVE_PATH': '../../result/o_O_result/o_O_medium.pt', 26 | 'RECORD_PATH': '../../result/o_O_result/o_O_medium.rec', 27 | 'PRETRAINED_PATH': '../../result/o_O_result/o_O_small.pt', 28 | 'LEARNING_RATE': 3e-3, 29 | 'INPUT_SIZE': 224, 30 | 'FEATURE_DIM': 1024, 31 | 'BATCH_SIZE': 128, 32 | 'EPOCHS': 200, 33 | 'DATA_AUGMENTATION': { 34 | 'scale': (1 / 1.15, 1.15), 35 | 'stretch_ratio': (0.7561, 1.3225), # (1/(1.15*1.15) and 1.15*1.15) 36 | 'ratation': (-180, 180), 37 | 'translation_ratio': (40 / 224, 40 / 224), # 40 pixel in the report 38 | 'sigma': 0.5 39 | }, 40 | "NUM_WORKERS": 32 41 | } 42 | 43 | LARGE_NET_CONFIG = { 44 | 'NET_SIZE': 'large', 45 | 'DATA_PATH': '../../dataset/train_data_full_512', 46 | 'SAVE_PATH': '../../result/o_O_result/o_O_large.pt', 47 | 'RECORD_PATH': '../../result/o_O_result/o_O_large.rec', 48 | 'PRETRAINED_PATH': '../../result/o_O_result/o_O_medium.pt', 49 | 'LEARNING_RATE': 3e-3, 50 | 'INPUT_SIZE': 448, 51 | 'FEATURE_DIM': 2048, 52 | 'BATCH_SIZE': 48, 53 | 'EPOCHS': 250, 54 | 'DATA_AUGMENTATION': { 55 | 'scale': (1 / 1.15, 1.15), 56 | 'stretch_ratio': (0.7561, 1.3225), # (1/(1.15*1.15) and 1.15*1.15) 57 | 'ratation': (-180, 180), 58 | 'translation_ratio': (40 / 448, 40 / 448), # 40 pixel in the report 59 | 'sigma': 0.5 60 | }, 61 | "NUM_WORKERS": 32 62 | } 63 | 64 | BLEND_NET_CONFIG = { 65 | 'MODEL_PATH': '../../models/o_O_large.pt', 66 | 'SOURCE_PATH': '/home/asus/Disk/yijin/ophthalmology/grade/kaggle/train_data_full_512', 67 | 'TARGET_PATH': '/home/asus/Disk/yijin/ophthalmology/grade/kaggle/train_data_full_512_blend_feature_50', 68 | 'AUGMENTATION_TIMES': 50, 69 | 'SAVE_PATH': './test.pt', 70 | 'LEARNING_RATE': 5e-4, 71 | 'FEATURE_DIM': 4096, 72 | 'BATCH_SIZE': 128, 73 | 'EPOCHS': 100 74 | } 75 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pickle 4 | import torch 5 | import numpy as np 6 | 7 | from config import * 8 | from model import o_ONet, BlendModel 9 | from train import train_stem, train_blend, evaluate 10 | from data_utils import generate_stem_dataset, generate_blend_dataset, create_blend_features 11 | 12 | 13 | def main(): 14 | # network config 15 | STEM_CONFIG = SMALL_NET_CONFIG 16 | stem(STEM_CONFIG) 17 | 18 | # blend step config 19 | # BLEND_CONFIG = BLEND_NET_CONFIG 20 | # blend(BLEND_CONFIG, STEM_CONFIG) 21 | 22 | 23 | def stem(STEM_CONFIG): 24 | # create save path 25 | save_dir = os.path.split(STEM_CONFIG['SAVE_PATH'])[0] 26 | if not os.path.exists(save_dir): 27 | os.makedirs(save_dir) 28 | 29 | # load dataset 30 | train_dataset, test_dataset, val_dataset = generate_stem_dataset( 31 | STEM_CONFIG['DATA_PATH'], 32 | STEM_CONFIG['INPUT_SIZE'], 33 | STEM_CONFIG['DATA_AUGMENTATION'] 34 | ) 35 | 36 | # train 37 | model, record_epochs, accs, losses = train_stem( 38 | net=o_ONet, 39 | train_dataset=train_dataset, 40 | val_dataset=val_dataset, 41 | net_size=STEM_CONFIG['NET_SIZE'], 42 | input_size=STEM_CONFIG['INPUT_SIZE'], 43 | feature_dim=STEM_CONFIG['FEATURE_DIM'], 44 | epochs=STEM_CONFIG['EPOCHS'], 45 | learning_rate=STEM_CONFIG['LEARNING_RATE'], 46 | batch_size=STEM_CONFIG['BATCH_SIZE'], 47 | save_path=STEM_CONFIG['SAVE_PATH'], 48 | pretrained_model=STEM_CONFIG['PRETRAINED_PATH'], 49 | num_workers=STEM_CONFIG['NUM_WORKERS'] 50 | ) 51 | pickle.dump( 52 | (record_epochs, accs, losses), 53 | open(STEM_CONFIG['RECORD_PATH'], 'wb') 54 | ) 55 | 56 | # test the stem network 57 | evaluate(STEM_CONFIG['SAVE_PATH'], test_dataset, STEM_CONFIG['NUM_WORKERS']) 58 | 59 | 60 | def blend(BLEND_CONFIG, STEM_CONFIG): 61 | # create save path 62 | save_dir = os.path.split(BLEND_CONFIG['SAVE_PATH'])[0] 63 | if not os.path.exists(save_dir): 64 | os.makedirs(save_dir) 65 | 66 | # create features with different data augmentation 67 | create_blend_features( 68 | BLEND_CONFIG['MODEL_PATH'], 69 | BLEND_CONFIG['SOURCE_PATH'], 70 | BLEND_CONFIG['TARGET_PATH'], 71 | STEM_CONFIG['INPUT_SIZE'], 72 | STEM_CONFIG['DATA_AUGMENTATION'], 73 | BLEND_CONFIG['AUGMENTATION_TIMES'] 74 | ) 75 | # generate dataset 76 | train_dataset, test_dataset, val_dataset = generate_blend_dataset(BLEND_CONFIG['TARGET_PATH']) 77 | 78 | train_blend( 79 | BlendModel, 80 | train_dataset, 81 | val_dataset, 82 | BLEND_CONFIG['FEATURE_DIM'], 83 | BLEND_CONFIG['EPOCHS'], 84 | BLEND_CONFIG['LEARNING_RATE'], 85 | BLEND_CONFIG['BATCH_SIZE'], 86 | BLEND_CONFIG['SAVE_PATH'] 87 | ) 88 | 89 | # test the stem network 90 | evaluate(BLEND_CONFIG['SAVE_PATH'], test_dataset) 91 | 92 | 93 | if __name__ == '__main__': 94 | main() 95 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from tqdm import tqdm 4 | from torch import nn 5 | from torch.utils.data import DataLoader 6 | 7 | from metrics import classify, accuracy, quadratic_weighted_kappa 8 | from data_utils import ScheduledWeightedSampler, PeculiarSampler, EvaluationTransformer 9 | 10 | 11 | def train_stem(net, train_dataset, val_dataset, net_size, input_size, feature_dim, epochs, 12 | learning_rate, batch_size, save_path, pretrained_model=None, num_workers=8): 13 | # create dataloader 14 | train_targets = [sampler[1] for sampler in train_dataset.samples] 15 | weighted_sampler = ScheduledWeightedSampler(len(train_dataset), train_targets, replacement=True) 16 | train_loader = DataLoader( 17 | train_dataset, 18 | batch_size=batch_size, 19 | sampler=weighted_sampler, 20 | num_workers=num_workers, 21 | drop_last=True 22 | ) 23 | 24 | # avoid IndexError when use multiple gpus 25 | val_batch_size = batch_size if len(val_dataset) % batch_size >= 2 * torch.cuda.device_count() else batch_size - 2 26 | val_loader = DataLoader( 27 | val_dataset, 28 | batch_size=val_batch_size, 29 | num_workers=num_workers, 30 | shuffle=False 31 | ) 32 | 33 | # define model 34 | model = net(net_size, input_size, feature_dim).cuda() 35 | gpu_num = torch.cuda.device_count() 36 | if gpu_num > 1: 37 | model = torch.nn.DataParallel(model) 38 | print_msg('Use {} gpus to train.'.format(gpu_num)) 39 | module = model.module if isinstance(model, torch.nn.DataParallel) else model 40 | 41 | # print model config 42 | print_msg('Trainable layers: ', ['{}\t{}'.format(k, v) for k, v in module.layer_configs()]) 43 | 44 | # load pretrained weights 45 | if pretrained_model: 46 | loaded_dict = module.load_weights(pretrained_model, ['fc', 'dense']) 47 | print_msg('Loaded weights from {}: '.format(pretrained_model), sorted(loaded_dict.keys())) 48 | 49 | # define loss and optimizier 50 | MSELoss = torch.nn.MSELoss() 51 | optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=0.0005) 52 | 53 | # learning rate decay 54 | milestones = [150, 220] 55 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) 56 | 57 | # train 58 | record_epochs, accs, losses = train( 59 | model, 60 | train_loader, 61 | val_loader, 62 | MSELoss, 63 | optimizer, 64 | epochs, 65 | save_path, 66 | weighted_sampler, 67 | lr_scheduler 68 | ) 69 | return model, record_epochs, accs, losses 70 | 71 | 72 | def train_blend(net, train_dataset, val_dataset, feature_dim, 73 | epochs, learning_rate, batch_size, save_path): 74 | # create dataloader 75 | train_targets = [sampler[1] for sampler in train_dataset.samples] 76 | weighted_sampler = PeculiarSampler(len(train_dataset), train_targets, batch_size, replacement=False) 77 | train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=weighted_sampler, drop_last=True) 78 | 79 | # avoid IndexError when use multiple gpus 80 | val_batch_size = batch_size if len(val_dataset) % batch_size >= 2 * torch.cuda.device_count() else batch_size - 2 81 | val_loader = DataLoader(val_dataset, batch_size=val_batch_size, shuffle=False) 82 | 83 | # define model 84 | model = net(feature_dim).cuda() 85 | 86 | # define loss and optimizier 87 | MSELoss = torch.nn.MSELoss() 88 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.005) 89 | 90 | def regularize_loss(model, X, y_pred, y): 91 | loss = 0 92 | for param in list(model.children())[0].parameters(): 93 | loss += 2e-5 * torch.sum(torch.abs(param)) 94 | return loss 95 | 96 | # learning rate decay 97 | milestones = [60, 80, 90] 98 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) 99 | 100 | # train 101 | record_epochs, accs, losses = train( 102 | model, 103 | train_loader, 104 | val_loader, 105 | MSELoss, 106 | optimizer, 107 | epochs, 108 | save_path, 109 | weighted_sampler, 110 | lr_scheduler, 111 | regularize_loss 112 | ) 113 | return model, record_epochs, accs, losses 114 | 115 | 116 | def train(model, train_loader, val_loader, loss_function, optimizer, epochs, save_path, 117 | weighted_sampler=None, lr_scheduler=None, extra_loss=None): 118 | max_kappa = 0 119 | record_epochs, accs, losses = [], [], [] 120 | model.train() 121 | for epoch in range(1, epochs + 1): 122 | epoch_loss = 0 123 | correct = 0 124 | total = 0 125 | progress = tqdm(enumerate(train_loader)) 126 | for step, train_data in progress: 127 | X, y = train_data 128 | X, y = X.cuda(), y.float().cuda() 129 | 130 | # forward 131 | y_pred = model(X) 132 | loss = loss_function(y_pred, y) 133 | if extra_loss: 134 | loss += extra_loss(model, X, y_pred, y) 135 | 136 | # backward 137 | optimizer.zero_grad() 138 | loss.backward() 139 | optimizer.step() 140 | 141 | # metrics 142 | epoch_loss += loss.item() 143 | total += y.size(0) 144 | correct += accuracy(y_pred, y) * y.size(0) 145 | avg_loss = epoch_loss / (step + 1) 146 | avg_acc = correct / total 147 | progress.set_description( 148 | 'epoch: {}, loss: {:.6f}, acc: {:.4f}' 149 | .format(epoch, avg_loss, avg_acc) 150 | ) 151 | 152 | # save model 153 | c_matrix = np.zeros((5, 5), dtype=int) 154 | acc = _eval(model, val_loader, c_matrix) 155 | kappa = quadratic_weighted_kappa(c_matrix) 156 | print('validation accuracy: {}, kappa: {}'.format(acc, kappa)) 157 | if kappa > max_kappa: 158 | torch.save(model, save_path) 159 | max_kappa = kappa 160 | print_msg('Model save at {}'.format(save_path)) 161 | 162 | # record 163 | record_epochs.append(epoch) 164 | accs.append(acc) 165 | losses.append(avg_loss) 166 | 167 | # resampling weight update 168 | if weighted_sampler: 169 | weighted_sampler.step() 170 | 171 | # learning rate update 172 | if lr_scheduler: 173 | lr_scheduler.step() 174 | if epoch in lr_scheduler.milestones: 175 | print_msg('Learning rate decayed to {}'.format(lr_scheduler.get_lr()[0])) 176 | 177 | print('Best validation accuracy: {}'.format(max_kappa)) 178 | return record_epochs, accs, losses 179 | 180 | 181 | def evaluate(model_path, test_dataset, num_workers=8): 182 | c_matrix = np.zeros((5, 5), dtype=int) 183 | 184 | trained_model = torch.load(model_path).cuda() 185 | test_loader = DataLoader(test_dataset, batch_size=32, num_workers=num_workers, shuffle=False) 186 | test_acc = _eval(trained_model, test_loader, c_matrix) 187 | print('========================================') 188 | print('Finished! test acc: {}'.format(test_acc)) 189 | print('Confusion Matrix:') 190 | print(c_matrix) 191 | print('quadratic kappa: {}'.format(quadratic_weighted_kappa(c_matrix))) 192 | print('========================================') 193 | 194 | 195 | def _eval(model, dataloader, c_matrix=None): 196 | model.eval() 197 | torch.set_grad_enabled(False) 198 | 199 | correct = 0 200 | total = 0 201 | for test_data in dataloader: 202 | X, y = test_data 203 | X, y = X.cuda(), y.float().cuda() 204 | 205 | y_pred = model(X) 206 | total += y.size(0) 207 | correct += accuracy(y_pred, y, c_matrix) * y.size(0) 208 | acc = round(correct / total, 4) 209 | 210 | model.train() 211 | torch.set_grad_enabled(True) 212 | return acc 213 | 214 | 215 | def print_msg(msg, appendixs=[]): 216 | max_len = len(max([msg, *appendixs], key=len)) 217 | print('=' * max_len) 218 | print(msg) 219 | for appendix in appendixs: 220 | print(appendix) 221 | print('=' * max_len) 222 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class o_ONet(nn.Module): 7 | def __init__(self, net_size, input_size, feature_dim): 8 | super(o_ONet, self).__init__() 9 | 10 | # require inputs width and height in each layer because of the using of untied biases. 11 | sizes = self.cal_sizes(net_size, input_size) 12 | 13 | # named layers 14 | self.conv = nn.Sequential() 15 | if net_size in ['small', 'medium', 'large']: 16 | # 1-11 layers 17 | small_conv = nn.Sequential( 18 | self.basic_conv2d(3, 32, sizes[0], sizes[0], kernel_size=5, stride=2, padding=2), 19 | self.basic_conv2d(32, 32, sizes[0], sizes[0], kernel_size=3, stride=1, padding=1), 20 | nn.MaxPool2d(kernel_size=3, stride=2, padding=0), 21 | self.basic_conv2d(32, 64, sizes[1], sizes[1], kernel_size=5, stride=2, padding=2), 22 | self.basic_conv2d(64, 64, sizes[1], sizes[1], kernel_size=3, stride=1, padding=1), 23 | self.basic_conv2d(64, 64, sizes[1], sizes[1], kernel_size=3, stride=1, padding=1), 24 | nn.MaxPool2d(kernel_size=3, stride=2, padding=0), 25 | self.basic_conv2d(64, 128, sizes[2], sizes[2], kernel_size=3, stride=1, padding=1), 26 | self.basic_conv2d(128, 128, sizes[2], sizes[2], kernel_size=3, stride=1, padding=1), 27 | self.basic_conv2d(128, 128, sizes[2], sizes[2], kernel_size=3, stride=1, padding=1), 28 | ) 29 | self.conv.add_module('small_conv', small_conv) 30 | 31 | if net_size in ['medium', 'large']: 32 | # 12-15 layers 33 | medium_conv = nn.Sequential( 34 | nn.MaxPool2d(kernel_size=3, stride=2, padding=0), 35 | self.basic_conv2d(128, 256, sizes[3], sizes[3], kernel_size=3, stride=1, padding=1), 36 | self.basic_conv2d(256, 256, sizes[3], sizes[3], kernel_size=3, stride=1, padding=1), 37 | self.basic_conv2d(256, 256, sizes[3], sizes[3], kernel_size=3, stride=1, padding=1), 38 | ) 39 | self.conv.add_module('medium_conv', medium_conv) 40 | 41 | if net_size in ['large']: 42 | # 16-18 layers 43 | large_conv = nn.Sequential( 44 | nn.MaxPool2d(kernel_size=3, stride=2, padding=0), 45 | self.basic_conv2d(256, 512, sizes[4], sizes[4], kernel_size=3, stride=1, padding=1), 46 | self.basic_conv2d(512, 512, sizes[4], sizes[4], kernel_size=3, stride=1, padding=1), 47 | ) 48 | self.conv.add_module('large_conv', large_conv) 49 | 50 | # RMSPooling layer 51 | self.conv.add_module('rmspool', RMSPool(3, 3)) 52 | 53 | # regression part 54 | self.fc = nn.Sequential( 55 | nn.Dropout(p=0.5), 56 | nn.Linear(feature_dim, 1024), 57 | nn.MaxPool1d(kernel_size=2, stride=2), 58 | nn.LeakyReLU(negative_slope=0.01), 59 | nn.Dropout(p=0.5), 60 | nn.Linear(512, 1024), 61 | nn.MaxPool1d(kernel_size=2, stride=2), 62 | nn.LeakyReLU(negative_slope=0.01), 63 | nn.Linear(512, 1) 64 | ) 65 | 66 | # initial parameters 67 | for m in self.modules(): 68 | if isinstance(m, Conv2dUntiedBias) or isinstance(m, nn.Linear): 69 | nn.init.orthogonal_(m.weight, 1) 70 | nn.init.constant_(m.bias, 0.05) 71 | 72 | def basic_conv2d(self, in_channels, out_channels, height, width, kernel_size, stride, padding): 73 | return nn.Sequential( 74 | Conv2dUntiedBias(in_channels, out_channels, height, width, kernel_size, stride, padding), 75 | nn.LeakyReLU(negative_slope=0.01) 76 | ) 77 | 78 | def forward(self, x): 79 | features = self.conv(x) 80 | # reshape to satisify maxpool1d input shape requirement 81 | features = features.view(features.size(0), 1, -1) 82 | predict = self.fc(features) 83 | predict = torch.squeeze(predict) 84 | return predict 85 | 86 | # load part of pretrained_model like o_O solution \ 87 | # using multi-scale image to train model by setting type to part \ 88 | # or load full weights by setting type to full. 89 | def load_weights(self, pretrained_model_path, exclude=[]): 90 | pretrained_model = torch.load(pretrained_model_path) 91 | pretrained_dict = pretrained_model.state_dict() 92 | if isinstance(pretrained_model, nn.DataParallel): 93 | pretrained_dict = {key[7:]: value for key, value in pretrained_dict.items()} 94 | model_dict = self.state_dict() 95 | 96 | # exclude 97 | for name in list(pretrained_dict.keys()): 98 | # using untied biases will make it unable to reload. 99 | if name in model_dict.keys() and pretrained_dict[name].shape != model_dict[name].shape: 100 | pretrained_dict.pop(name) 101 | continue 102 | for e in exclude: 103 | if e in name: 104 | pretrained_dict.pop(name) 105 | break 106 | 107 | # load weights 108 | model_dict.update(pretrained_dict) 109 | self.load_state_dict(model_dict) 110 | 111 | return pretrained_dict 112 | 113 | def layer_configs(self): 114 | model_dict = self.state_dict() 115 | return [(tensor, model_dict[tensor].size()) for tensor in model_dict] 116 | 117 | def cal_sizes(self, net_size, input_size): 118 | sizes = [] 119 | if net_size in ['small', 'medium', 'large']: 120 | sizes.append(self._reduce_size(input_size, 5, 2, 2)) 121 | after_maxpool = self._reduce_size(sizes[-1], 3, 0, 2) 122 | sizes.append(self._reduce_size(after_maxpool, 5, 2, 2)) 123 | after_maxpool = self._reduce_size(sizes[-1], 3, 0, 2) 124 | sizes.append(self._reduce_size(after_maxpool, 3, 1, 1)) 125 | if net_size in ['medium', 'large']: 126 | after_maxpool = self._reduce_size(sizes[-1], 3, 0, 2) 127 | sizes.append(self._reduce_size(after_maxpool, 3, 1, 1)) 128 | if net_size in ['large']: 129 | after_maxpool = self._reduce_size(sizes[-1], 3, 0, 2) 130 | sizes.append(self._reduce_size(after_maxpool, 3, 1, 1)) 131 | 132 | return sizes 133 | 134 | def _reduce_size(self, input_size, kernel_size, padding, stride): 135 | return (input_size + (2 * padding) - (kernel_size - 1) - 1) // stride + 1 136 | 137 | 138 | class BlendModel(nn.Module): 139 | def __init__(self, feature_dim): 140 | super(BlendModel, self).__init__() 141 | 142 | # regression 143 | self.dense_1 = nn.Linear(feature_dim, 32) 144 | self.dense_2 = nn.Linear(16, 32) 145 | self.dense_3 = nn.Linear(16, 1) 146 | self.max_pool = nn.MaxPool1d(kernel_size=2, stride=2) 147 | self.relu = nn.ReLU() 148 | 149 | # initial parameters 150 | for m in self.modules(): 151 | if isinstance(m, Conv2dUntiedBias) or isinstance(m, nn.Linear): 152 | nn.init.orthogonal_(m.weight, 1) 153 | nn.init.constant_(m.bias, 0.01) 154 | 155 | def forward(self, x): 156 | # reshape to satisify requirement of max pooing api 157 | x = x.view(x.size(0), 1, -1) 158 | 159 | x = self.dense_1(x) 160 | x = self.max_pool(x) 161 | x = self.relu(x) 162 | 163 | x = self.dense_2(x) 164 | x = self.max_pool(x) 165 | x = self.relu(x) 166 | 167 | predict = self.dense_3(x) 168 | return predict.squeeze() 169 | 170 | def layer_configs(self): 171 | model_dict = self.state_dict() 172 | return [(tensor, model_dict[tensor].size()) for tensor in model_dict] 173 | 174 | 175 | class RMSPool(nn.Module): 176 | def __init__(self, kernel_size, stride): 177 | super(RMSPool, self).__init__() 178 | self.kernel_size = kernel_size 179 | self.stride = stride 180 | 181 | def forward(self, x): 182 | x = torch.pow(x, 2) 183 | x = F.avg_pool2d(x, kernel_size=self.kernel_size, stride=self.stride) 184 | x = torch.sqrt(x) 185 | return x 186 | 187 | 188 | class Conv2dUntiedBias(nn.Module): 189 | def __init__(self, in_channels, out_channels, height, width, kernel_size, stride=1, padding=0): 190 | super(Conv2dUntiedBias, self).__init__() 191 | 192 | self.in_channels = in_channels 193 | self.out_channels = out_channels 194 | self.kernel_size = (kernel_size, kernel_size) 195 | self.stride = (stride, stride) 196 | self.padding = (padding, padding) 197 | self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, kernel_size, kernel_size)) 198 | self.bias = nn.Parameter(torch.Tensor(out_channels, height, width)) 199 | 200 | def forward(self, x): 201 | output = F.conv2d(x, self.weight, None, self.stride, self.padding) 202 | output += self.bias.unsqueeze(0).repeat(x.size(0), 1, 1, 1) 203 | return output 204 | -------------------------------------------------------------------------------- /data_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import random 4 | 5 | import torch 6 | import numpy as np 7 | from torch import nn 8 | from PIL import Image 9 | from tqdm import tqdm 10 | from torch.utils.data import Dataset 11 | from torch.utils.data.sampler import Sampler 12 | from torchvision import transforms, datasets 13 | from torchvision.transforms import functional as F 14 | 15 | 16 | # channel means and standard deviations of kaggle dataset, computed by origin author 17 | MEAN = [108.64628601 / 255, 75.86886597 / 255, 54.34005737 / 255] 18 | STD = [70.53946096 / 255, 51.71475228 / 255, 43.03428563 / 255] 19 | 20 | # for color augmentation, computed by origin author 21 | U = torch.tensor([[-0.56543481, 0.71983482, 0.40240142], 22 | [-0.5989477, -0.02304967, -0.80036049], 23 | [-0.56694071, -0.6935729, 0.44423429]], dtype=torch.float32) 24 | EV = torch.tensor([1.65513492, 0.48450358, 0.1565086], dtype=torch.float32) 25 | 26 | # set of resampling weights that yields balanced classes, computed by origin author 27 | BALANCE_WEIGHTS = torch.tensor([1.3609453700116234, 14.378223495702006, 28 | 6.637566137566138, 40.235967926689575, 29 | 49.612994350282484], dtype=torch.double) 30 | FINAL_WEIGHTS = torch.as_tensor([1, 2, 2, 2, 2], dtype=torch.double) 31 | 32 | 33 | def generate_stem_dataset(data_path, input_size, data_aug): 34 | train_transform = transforms.Compose([ 35 | transforms.RandomResizedCrop( 36 | size=input_size, 37 | scale=data_aug['scale'], 38 | ratio=data_aug['stretch_ratio'] 39 | ), 40 | transforms.RandomAffine( 41 | degrees=data_aug['ratation'], 42 | translate=data_aug['translation_ratio'], 43 | scale=None, 44 | shear=None 45 | ), 46 | transforms.RandomHorizontalFlip(), 47 | transforms.RandomVerticalFlip(), 48 | transforms.ToTensor(), 49 | transforms.Normalize(tuple(MEAN), tuple(STD)), 50 | KrizhevskyColorAugmentation(sigma=data_aug['sigma']) 51 | ]) 52 | 53 | test_transform = transforms.Compose([ 54 | transforms.Resize((input_size,input_size)), 55 | transforms.ToTensor(), 56 | transforms.Normalize(tuple(MEAN), tuple(STD)) 57 | ]) 58 | 59 | def load_image(x): 60 | return Image.open(x) 61 | 62 | return generate_dataset(data_path, load_image, ('jpg', 'jpeg'), train_transform, test_transform) 63 | 64 | 65 | def generate_blend_dataset(data_path): 66 | def load_tensor(x): 67 | return torch.load(x) 68 | 69 | return generate_dataset(data_path, load_tensor, ('pt',), None, None) 70 | 71 | 72 | def generate_dataset(data_path, loader, extensions, train_transform, test_transform): 73 | train_path = os.path.join(data_path, 'train') 74 | test_path = os.path.join(data_path, 'test') 75 | val_path = os.path.join(data_path, 'val') 76 | 77 | train_dataset = datasets.DatasetFolder(train_path, loader, extensions, transform=train_transform) 78 | test_dataset = datasets.DatasetFolder(test_path, loader, extensions, transform=test_transform) 79 | val_dataset = datasets.DatasetFolder(val_path, loader, extensions, transform=test_transform) 80 | 81 | return train_dataset, test_dataset, val_dataset 82 | 83 | 84 | def create_blend_features(model_path, source_path, target_path, input_size, data_aug, aug_times): 85 | trained_model = torch.load(model_path).cuda() 86 | torch.set_grad_enabled(False) 87 | 88 | # feature extractor before dense layers 89 | feature_extractor = nn.Sequential(list(trained_model.children())[0]) 90 | feature_extractor.eval() 91 | 92 | # random data augmentation 93 | transformer = EvaluationTransformer(input_size, data_aug, aug_times) 94 | transformer.create_transform_params() 95 | 96 | dataloaders = generate_dataset(source_path, lambda x: x, ('jpg', 'jpeg'), None, None) 97 | for dataloader in dataloaders: 98 | for sample in tqdm(dataloader): 99 | filepath, y = sample 100 | X = transformer.transform(filepath).cuda() 101 | 102 | feature_mean = feature_extractor(X).mean(dim=0) 103 | feature_std = feature_extractor(X).std(dim=0) 104 | blend_feature = torch.stack((feature_mean, feature_std)) 105 | blend_feature = blend_feature.view(1, -1) 106 | 107 | new_filepath = filepath.replace(source_path, target_path, 1) 108 | new_filepath = os.path.splitext(new_filepath)[0] + '.pt' 109 | target_dir = os.path.split(new_filepath)[0] 110 | if not os.path.exists(target_dir): 111 | os.makedirs(target_dir) 112 | torch.save(blend_feature, new_filepath) 113 | 114 | 115 | class EvaluationTransformer(): 116 | def __init__(self, input_size, data_aug, aug_times): 117 | self.input_size = input_size if isinstance(input_size, tuple) else (input_size, input_size) 118 | self.data_aug = data_aug 119 | self.aug_times = aug_times 120 | self.transform_params = { 121 | 'Crop': [], 122 | 'Affine': [], 123 | 'Horizontal_Flip': [], 124 | 'Vertical_Flip': [], 125 | 'ColorAugmentation': [] 126 | } 127 | 128 | def transform(self, filepath): 129 | transform_params = self.transform_params 130 | imgs = [] 131 | 132 | source = Image.open(filepath) 133 | for i in range(self.aug_times): 134 | img = F.resized_crop(source, *transform_params['Crop'][i], self.input_size, Image.BILINEAR) 135 | img = F.affine(img, *transform_params['Affine'][i], resample=False, fillcolor=0) 136 | if transform_params['Horizontal_Flip'][i]: 137 | img = F.hflip(img) 138 | if transform_params['Vertical_Flip'][i]: 139 | img = F.vflip(img) 140 | img = transforms.ToTensor()(img) 141 | img = transforms.Normalize(tuple(MEAN), tuple(STD))(img) 142 | img = KrizhevskyColorAugmentation()(img, transform_params['ColorAugmentation'][i]) 143 | imgs.append(img) 144 | 145 | return torch.stack(imgs) 146 | 147 | def multi_transform(self, filepaths): 148 | imgs = [] 149 | for filepath in filepaths: 150 | imgs.append(self.transform(filepath)) 151 | 152 | return imgs 153 | 154 | def create_transform_params(self): 155 | input_size = self.input_size 156 | data_aug = self.data_aug 157 | aug_times = self.aug_times 158 | transform_params = self.transform_params 159 | 160 | for _ in range(aug_times): 161 | # crop 162 | i, j, h, w = self.create_crop_params( 163 | input_size, 164 | data_aug['scale'], 165 | data_aug['stretch_ratio'] 166 | ) 167 | transform_params['Crop'].append((i, j, h, w)) 168 | 169 | # affine 170 | angle, translations, scale, shear = self.create_affine_params( 171 | data_aug['ratation'], 172 | data_aug['translation_ratio'], 173 | None, 174 | None 175 | ) 176 | transform_params['Affine'].append((angle, translations, scale, shear)) 177 | 178 | # horizontal flip 179 | hflip = random.random() < 0.5 180 | transform_params['Vertical_Flip'].append(hflip) 181 | 182 | # vertical flip 183 | vflip = random.random() < 0.5 184 | transform_params['Horizontal_Flip'].append(vflip) 185 | 186 | # color augmentation 187 | mean = torch.tensor([0.0]) 188 | deviation = torch.tensor([0.5]) 189 | color_vector = torch.distributions.Normal(mean, deviation).sample((3,)).squeeze() 190 | transform_params['ColorAugmentation'].append(color_vector) 191 | 192 | def create_crop_params(self, input_size, scale, ratio): 193 | area = input_size[0] * input_size[1] 194 | 195 | for attempt in range(10): 196 | target_area = random.uniform(*scale) * area 197 | log_ratio = (math.log(ratio[0]), math.log(ratio[1])) 198 | aspect_ratio = math.exp(random.uniform(*log_ratio)) 199 | 200 | w = int(round(math.sqrt(target_area * aspect_ratio))) 201 | h = int(round(math.sqrt(target_area / aspect_ratio))) 202 | 203 | if w <= input_size[0] and h <= input_size[1]: 204 | i = random.randint(0, input_size[1] - h) 205 | j = random.randint(0, input_size[0] - w) 206 | return i, j, h, w 207 | 208 | # Fallback to central crop 209 | in_ratio = input_size[0] / input_size[1] 210 | if (in_ratio < min(ratio)): 211 | w = input_size[0] 212 | h = w / min(ratio) 213 | elif (in_ratio > max(ratio)): 214 | h = input_size[1] 215 | w = h * max(ratio) 216 | else: # whole image 217 | w = input_size[0] 218 | h = input_size[1] 219 | i = (input_size[1] - h) // 2 220 | j = (input_size[0] - w) // 2 221 | return i, j, h, w 222 | 223 | def create_affine_params(self, degrees, translate, scale_ranges, shears): 224 | angle = random.uniform(degrees[0], degrees[1]) 225 | if translate is not None: 226 | max_dx = translate[0] * self.input_size[0] 227 | max_dy = translate[1] * self.input_size[1] 228 | translations = (np.round(random.uniform(-max_dx, max_dx)), 229 | np.round(random.uniform(-max_dy, max_dy))) 230 | else: 231 | translations = (0, 0) 232 | 233 | if scale_ranges is not None: 234 | scale = random.uniform(scale_ranges[0], scale_ranges[1]) 235 | else: 236 | scale = 1.0 237 | 238 | if shears is not None: 239 | shear = random.uniform(shears[0], shears[1]) 240 | else: 241 | shear = 0.0 242 | 243 | return angle, translations, scale, shear 244 | 245 | 246 | class ScheduledWeightedSampler(Sampler): 247 | def __init__(self, num_samples, train_targets, initial_weight=BALANCE_WEIGHTS, 248 | final_weight=FINAL_WEIGHTS, replacement=True): 249 | self.num_samples = num_samples 250 | self.train_targets = train_targets 251 | self.replacement = replacement 252 | 253 | self.epoch = 0 254 | self.w0 = initial_weight 255 | self.wf = final_weight 256 | self.train_sample_weight = torch.zeros(len(train_targets), dtype=torch.double) 257 | self.assign_weight(initial_weight) 258 | 259 | def step(self): 260 | self.epoch += 1 261 | factor = 0.975**(self.epoch - 1) 262 | self.weights = factor * self.w0 + (1 - factor) * self.wf 263 | self.assign_weight(self.weights) 264 | 265 | def assign_weight(self, weights): 266 | for i, _class in enumerate(self.train_targets): 267 | self.train_sample_weight[i] = weights[_class] 268 | 269 | def __iter__(self): 270 | return iter(torch.multinomial(self.train_sample_weight, self.num_samples, self.replacement).tolist()) 271 | 272 | def __len__(self): 273 | return self.num_samples 274 | 275 | 276 | class PeculiarSampler(Sampler): 277 | def __init__(self, num_samples, train_targets, batch_size, balance_weight=BALANCE_WEIGHTS, replacement=True): 278 | self.num_samples = num_samples 279 | self.train_targets = train_targets 280 | self.batch_size = batch_size 281 | self.replacement = replacement 282 | 283 | self.epoch = 0 284 | self.args = list(range(num_samples)) 285 | self.train_sample_weight = torch.zeros(len(train_targets), dtype=torch.double) 286 | for i, _class in enumerate(self.train_targets): 287 | self.train_sample_weight[i] = balance_weight[_class] 288 | 289 | self.epoch_samples = [] 290 | 291 | def step(self): 292 | self.epoch_samples = [] 293 | 294 | batch_size = self.batch_size 295 | batch_num = self.num_samples // self.batch_size 296 | for i in range(batch_num): 297 | r = random.random() 298 | if r < 0.2: 299 | self.epoch_samples += torch.multinomial(self.train_sample_weight, batch_size, self.replacement).tolist() 300 | elif r < 0.5: 301 | self.epoch_samples += random.sample(self.args, batch_size) 302 | else: 303 | self.epoch_samples += list(range(i * batch_size, (i + 1) * batch_size)) 304 | 305 | def __iter__(self): 306 | return iter(self.epoch_samples) 307 | 308 | def __len__(self): 309 | return self.num_samples 310 | 311 | 312 | class KrizhevskyColorAugmentation(object): 313 | def __init__(self, sigma=0.5): 314 | self.sigma = sigma 315 | self.mean = torch.tensor([0.0]) 316 | self.deviation = torch.tensor([sigma]) 317 | 318 | def __call__(self, img, color_vec=None): 319 | sigma = self.sigma 320 | if color_vec is None: 321 | if not sigma > 0.0: 322 | color_vec = torch.zeros(3, dtype=torch.float32) 323 | else: 324 | color_vec = torch.distributions.Normal(self.mean, self.deviation).sample((3,)) 325 | color_vec = color_vec.squeeze() 326 | 327 | alpha = color_vec * EV 328 | noise = torch.matmul(U, alpha.t()) 329 | noise = noise.view((3, 1, 1)) 330 | return img + noise 331 | 332 | def __repr__(self): 333 | return self.__class__.__name__ + '(sigma={})'.format(self.sigma) 334 | 335 | 336 | if __name__ == "__main__": 337 | from config import * 338 | CONFIG = LARGE_NET_CONFIG 339 | transformer = EvaluationTransformer( 340 | CONFIG['INPUT_SIZE'], 341 | CONFIG['DATA_AUGMENTATION'], 342 | 20 343 | ) 344 | transformer.create_transform_params() 345 | imgs = transformer.transform('./36_right.jpeg') 346 | transforms.ToPILImage()(imgs[0]).show() 347 | --------------------------------------------------------------------------------