├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── combine_folds.py ├── model.py ├── nnet.py ├── predict.py ├── run.sh ├── train.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .idea/** 3 | *.pyc 4 | *.csv 5 | *.pkl 6 | *.h5 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 abhishek thakur 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | To run: 2 | 3 | 4 | ``` 5 | sh run.sh 6 | ``` 7 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhishekkrthakur/imet-collection/10d04c1c178a40ef7fecee826f86ffa687093ac9/__init__.py -------------------------------------------------------------------------------- /combine_folds.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | import torch 4 | import numpy as np 5 | import pandas as pd 6 | import os 7 | from tqdm import tqdm 8 | import joblib 9 | 10 | device = torch.device("cuda:0") 11 | folds = [0, 1, 2, 3, 4] 12 | NUM_CLASSES = 1103 13 | 14 | def fbeta_score(y_pred, y_true, thresh, device, beta=2, eps=1e-9, sigmoid=True): 15 | "Computes the f_beta between `preds` and `targets`" 16 | y_pred = torch.from_numpy(y_pred).float().to(device) 17 | y_true = torch.from_numpy(y_true).float().to(device) 18 | beta2 = beta ** 2 19 | if sigmoid: y_pred = y_pred.sigmoid() 20 | y_pred = (y_pred>thresh).float() 21 | y_true = y_true.float() 22 | TP = (y_pred*y_true).sum(dim=1) 23 | prec = TP/(y_pred.sum(dim=1)+eps) 24 | rec = TP/(y_true.sum(dim=1)+eps) 25 | res = (prec*rec)/(prec*beta2+rec+eps)*(1+beta2) 26 | return res.mean() 27 | 28 | 29 | def find_best_fixed_threshold(preds, targs, device): 30 | score = [] 31 | thrs = np.arange(0, 0.5, 0.01) 32 | for thr in tqdm(thrs): 33 | score.append(fbeta_score(preds, targs, thresh=thr, device=device)) 34 | score = np.array(score) 35 | pm = score.argmax() 36 | best_thr, best_score = thrs[pm], score[pm].item() 37 | print('thr={} F2={}'.format(best_thr, best_score)) 38 | return best_thr, best_score 39 | 40 | valid_preds = [] 41 | valid_labels = [] 42 | test_preds = [] 43 | for i in folds: 44 | if i == 0: 45 | valid_preds = joblib.load("fold{}/valid_preds.pkl".format(i)) 46 | valid_labels = joblib.load("fold{}/valid_labels.pkl".format(i)) 47 | test_preds = joblib.load("fold{}/test_preds.pkl".format(i)) 48 | else: 49 | valid_preds = np.vstack((valid_preds, joblib.load("fold{}/valid_preds.pkl".format(i)))) 50 | valid_labels = np.vstack((valid_labels, joblib.load("fold{}/valid_labels.pkl".format(i)))) 51 | test_preds += joblib.load("fold{}/test_preds.pkl".format(i)) 52 | 53 | test_preds /= len(folds) 54 | 55 | best_thr, best_score = find_best_fixed_threshold(valid_preds, valid_labels, device=device) 56 | 57 | sample = pd.read_csv("../input/sample_submission.csv") 58 | predicted = [] 59 | for i, name in tqdm(enumerate(sample['id'])): 60 | score_predict = test_preds[i, :].ravel() 61 | label_predict = np.arange(NUM_CLASSES)[score_predict >= best_thr] 62 | str_predict_label = ' '.join(str(l) for l in label_predict) 63 | predicted.append(str_predict_label) 64 | 65 | sample['attribute_ids'] = predicted 66 | sample.to_csv(os.path.join('combined_submission.csv'), index=False) 67 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import copy 4 | import torch 5 | import numpy as np 6 | from tqdm import tqdm 7 | import torch.nn as nn 8 | from apex import amp 9 | 10 | NUM_CLASSES = int(os.environ["NUM_CLASSES"]) 11 | 12 | 13 | def fbeta_score(y_pred, y_true, thresh, device, beta=2, eps=1e-9, sigmoid=True): 14 | "Computes the f_beta between `preds` and `targets`" 15 | y_pred = torch.from_numpy(y_pred).float().to(device) 16 | y_true = torch.from_numpy(y_true).float().to(device) 17 | beta2 = beta ** 2 18 | if sigmoid: y_pred = y_pred.sigmoid() 19 | y_pred = (y_pred > thresh).float() 20 | y_true = y_true.float() 21 | TP = (y_pred * y_true).sum(dim=1) 22 | prec = TP / (y_pred.sum(dim=1) + eps) 23 | rec = TP / (y_true.sum(dim=1) + eps) 24 | res = (prec * rec) / (prec * beta2 + rec + eps) * (1 + beta2) 25 | return res.mean() 26 | 27 | 28 | def find_best_fixed_threshold(preds, targs, device): 29 | score = [] 30 | thrs = np.arange(0, 0.5, 0.01) 31 | for thr in tqdm(thrs): 32 | score.append(fbeta_score(preds, targs, thresh=thr, device=device)) 33 | score = np.array(score) 34 | pm = score.argmax() 35 | best_thr, best_score = thrs[pm], score[pm].item() 36 | print('thr={} F2={}'.format(best_thr, best_score)) 37 | return best_thr, best_score 38 | 39 | 40 | def train_model(model, data_loader, dataset_sizes, device, optimizer, scheduler, num_epochs, fold_name, use_amp=True): 41 | since = time.time() 42 | criterion = nn.BCEWithLogitsLoss() 43 | best_model_wts = copy.deepcopy(model.state_dict()) 44 | best_loss = 99999999 45 | all_scores = [] 46 | best_score = -np.inf 47 | for epoch in range(num_epochs): 48 | print('Epoch {}/{}'.format(epoch, num_epochs - 1)) 49 | print('-' * 10) 50 | 51 | # Each epoch has a training and validation phase 52 | valid_preds = np.zeros((dataset_sizes["val"], NUM_CLASSES)) 53 | valid_labels = np.zeros((dataset_sizes["val"], NUM_CLASSES)) 54 | val_bs = data_loader["val"].batch_size 55 | for phase in ['train', 'val']: 56 | if phase == 'train': 57 | scheduler.step(best_score) 58 | model.train() # Set model to training mode 59 | else: 60 | model.eval() # Set model to evaluate mode 61 | 62 | running_loss = 0.0 63 | 64 | # Iterate over data. 65 | tk0 = tqdm(data_loader[phase], total=int(dataset_sizes[phase] / data_loader[phase].batch_size)) 66 | counter = 0 67 | for bi, d in enumerate(tk0): 68 | inputs = d["image"] 69 | labels = d["labels"] 70 | inputs = inputs.to(device, dtype=torch.float) 71 | labels = labels.to(device, dtype=torch.float) 72 | 73 | # zero the parameter gradients 74 | optimizer.zero_grad() 75 | 76 | # forward 77 | # track history if only in train 78 | with torch.set_grad_enabled(phase == 'train'): 79 | outputs = model(inputs) 80 | loss = criterion(outputs, labels) 81 | 82 | # backward + optimize only if in training phase 83 | if phase == 'train': 84 | if use_amp is True: 85 | with amp.scale_loss(loss, optimizer) as scaled_loss: 86 | scaled_loss.backward() 87 | else: 88 | loss.backward() 89 | optimizer.step() 90 | 91 | running_loss += loss.item() * inputs.size(0) 92 | counter += 1 93 | tk0.set_postfix(loss=(running_loss / (counter * data_loader[phase].batch_size))) 94 | 95 | if phase == "val": 96 | valid_labels[bi * val_bs:(bi + 1) * val_bs, :] = labels.detach().cpu().squeeze().numpy() 97 | valid_preds[bi * val_bs:(bi + 1) * val_bs, :] = outputs.detach().cpu().squeeze().numpy() 98 | 99 | epoch_loss = running_loss / dataset_sizes[phase] 100 | if phase == "val": 101 | best_thr, score = find_best_fixed_threshold(valid_preds, valid_labels, device) 102 | all_scores.append(score) 103 | if score > best_score: 104 | best_score = score 105 | best_model_wts = copy.deepcopy(model.state_dict()) 106 | torch.save(model.state_dict(), os.path.join(fold_name, "model.bin")) 107 | 108 | print('{} Loss: {:.4f}'.format(phase, epoch_loss)) 109 | 110 | if len(all_scores[-5:]) == 5: 111 | if best_score not in all_scores[-5:]: 112 | break 113 | if len(np.unique(all_scores)) == 1: 114 | break 115 | if abs(min(all_scores[-5:]) - max(all_scores[-5:])) < 0.001: 116 | break 117 | print(all_scores[-5:]) 118 | 119 | time_elapsed = time.time() - since 120 | print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) 121 | print('Best val loss: {:4f}'.format(best_loss)) 122 | 123 | # load best model weights 124 | model.load_state_dict(best_model_wts) 125 | return model 126 | -------------------------------------------------------------------------------- /nnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | import torch 4 | import torch.nn as nn 5 | import pretrainedmodels 6 | import os 7 | 8 | 9 | MODEL_NAME = os.environ["MODEL_NAME"] 10 | TRAINING_BATCH_SIZE = int(os.environ["TRAINING_BATCH_SIZE"]) 11 | TEST_BATCH_SIZE = int(os.environ["TEST_BATCH_SIZE"]) 12 | NUM_CLASSES = int(os.environ["NUM_CLASSES"]) 13 | IMAGE_SIZE = int(os.environ["IMAGE_SIZE"]) 14 | EPOCHS = int(os.environ["EPOCHS"]) 15 | 16 | device = torch.device("cuda:0") 17 | model_ft = pretrainedmodels.__dict__[MODEL_NAME](pretrained='imagenet') 18 | 19 | model_ft.avg_pool = nn.AdaptiveAvgPool2d(1) 20 | model_ft.last_linear = nn.Sequential( 21 | nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 22 | nn.Dropout(p=0.25), 23 | nn.Linear(in_features=2048, out_features=2048, bias=True), 24 | nn.ReLU(), 25 | nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 26 | nn.Dropout(p=0.5), 27 | nn.Linear(in_features=2048, out_features=NUM_CLASSES, bias=True), 28 | ) 29 | 30 | print(model_ft) 31 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | import torch 4 | import numpy as np 5 | import pandas as pd 6 | from torchvision import transforms 7 | import os 8 | from tqdm import tqdm 9 | import joblib 10 | from utils import CollectionsDataset, CollectionsDatasetTest 11 | from model import find_best_fixed_threshold 12 | import argparse 13 | from nnet import model_ft 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("--fold", default=-1) 17 | args = parser.parse_args() 18 | 19 | BASE_DIR = "../input/" 20 | FOLD = int(args.fold) 21 | if FOLD == -1: 22 | FOLD = 0 23 | 24 | MODEL_NAME = os.environ["MODEL_NAME"] 25 | TRAINING_BATCH_SIZE = int(os.environ["TRAINING_BATCH_SIZE"]) 26 | TEST_BATCH_SIZE = int(os.environ["TEST_BATCH_SIZE"]) 27 | NUM_CLASSES = int(os.environ["NUM_CLASSES"]) 28 | IMAGE_SIZE = int(os.environ["IMAGE_SIZE"]) 29 | FOLD_NAME = "fold{0}".format(FOLD) 30 | 31 | if FOLD == 0: 32 | training_folds = [1, 2, 3, 4] 33 | val_folds = [0] 34 | elif FOLD == 1: 35 | training_folds = [0, 2, 3, 4] 36 | val_folds = [1] 37 | elif FOLD == 2: 38 | training_folds = [0, 1, 3, 4] 39 | val_folds = [2] 40 | elif FOLD == 3: 41 | training_folds = [0, 1, 2, 4] 42 | val_folds = [3] 43 | else: 44 | training_folds = [0, 1, 2, 3] 45 | val_folds = [4] 46 | 47 | device = torch.device("cuda:0") 48 | IMG_MEAN = model_ft.mean 49 | IMG_STD = model_ft.std 50 | 51 | 52 | test_transform=transforms.Compose([ 53 | transforms.Resize(IMAGE_SIZE), 54 | transforms.CenterCrop(IMAGE_SIZE), 55 | transforms.ToTensor(), 56 | transforms.Normalize(IMG_MEAN,IMG_STD) 57 | ]) 58 | 59 | val_transform = transforms.Compose([ 60 | transforms.Resize(IMAGE_SIZE), 61 | transforms.CenterCrop(IMAGE_SIZE), 62 | transforms.ToTensor(), 63 | transforms.Normalize(IMG_MEAN, IMG_STD) 64 | ]) 65 | 66 | valid_dataset = CollectionsDataset(csv_file='../input/folds.csv', 67 | root_dir='../input/train/', 68 | num_classes=NUM_CLASSES, 69 | image_size=IMAGE_SIZE, 70 | folds=val_folds, 71 | transform=val_transform) 72 | 73 | 74 | valid_dataset_loader = torch.utils.data.DataLoader(valid_dataset, 75 | batch_size=TEST_BATCH_SIZE, 76 | shuffle=False, 77 | num_workers=4) 78 | 79 | test_dataset = CollectionsDatasetTest(csv_file='../input/sample_submission.csv', 80 | root_dir='../input/test/', 81 | image_size=IMAGE_SIZE, 82 | transform=test_transform) 83 | 84 | test_dataset_loader = torch.utils.data.DataLoader(test_dataset, 85 | batch_size=TEST_BATCH_SIZE, 86 | shuffle=False, 87 | num_workers=4) 88 | 89 | 90 | model_ft.load_state_dict(torch.load(os.path.join(FOLD_NAME, "model.bin"))) 91 | model_ft = model_ft.to(device) 92 | 93 | for param in model_ft.parameters(): 94 | param.requires_grad = False 95 | 96 | 97 | model_ft.eval() 98 | valid_preds = np.zeros((len(valid_dataset), NUM_CLASSES)) 99 | valid_labels = np.zeros((len(valid_dataset), NUM_CLASSES)) 100 | tk0 = tqdm(valid_dataset_loader) 101 | for i, _batch in enumerate(tk0): 102 | x_batch = _batch["image"] 103 | y_batch = _batch["labels"] 104 | pred = model_ft(x_batch.to(device)) 105 | valid_labels[i * TEST_BATCH_SIZE:(i + 1) * TEST_BATCH_SIZE, :] = y_batch.detach().cpu().squeeze().numpy() 106 | valid_preds[i * TEST_BATCH_SIZE:(i + 1) * TEST_BATCH_SIZE, :] = pred.detach().cpu().squeeze().numpy() 107 | 108 | best_thr, best_score = find_best_fixed_threshold(valid_preds, valid_labels, device=device) 109 | 110 | test_preds = np.zeros((len(test_dataset), NUM_CLASSES)) 111 | tk0 = tqdm(test_dataset_loader) 112 | for i, x_batch in enumerate(tk0): 113 | x_batch = x_batch["image"] 114 | pred = model_ft(x_batch.to(device)) 115 | test_preds[i * TEST_BATCH_SIZE:(i + 1) * TEST_BATCH_SIZE, :] = pred.detach().cpu().squeeze().numpy() 116 | 117 | 118 | test_preds = torch.from_numpy(test_preds).float().to(device).sigmoid() 119 | test_preds = test_preds.detach().cpu().squeeze().numpy() 120 | 121 | sample = pd.read_csv("../input/sample_submission.csv") 122 | predicted = [] 123 | for i, name in tqdm(enumerate(sample['id'])): 124 | score_predict = test_preds[i, :].ravel() 125 | label_predict = np.arange(NUM_CLASSES)[score_predict >= best_thr] 126 | str_predict_label = ' '.join(str(l) for l in label_predict) 127 | predicted.append(str_predict_label) 128 | 129 | sample['attribute_ids'] = predicted 130 | 131 | # save all the stuff 132 | sample.to_csv(os.path.join(FOLD_NAME, 'submission.csv'), index=False) 133 | joblib.dump(valid_preds, os.path.join(FOLD_NAME, "valid_preds.pkl")) 134 | joblib.dump(test_preds, os.path.join(FOLD_NAME, "test_preds.pkl")) 135 | joblib.dump(valid_labels, os.path.join(FOLD_NAME, "valid_labels.pkl")) 136 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | export MODEL_NAME="se_resnext101_32x4d" 4 | export TRAINING_BATCH_SIZE=64 5 | export TEST_BATCH_SIZE=32 6 | export NUM_CLASSES=1103 7 | export IMAGE_SIZE=320 8 | export EPOCHS=25 9 | python3 train.py --fold 0 10 | python3 predict.py --fold 0 11 | python3 train.py --fold 1 12 | python3 predict.py --fold 1 13 | python3 train.py --fold 2 14 | python3 predict.py --fold 2 15 | python3 train.py --fold 3 16 | python3 predict.py --fold 3 17 | python3 train.py --fold 4 18 | python3 predict.py --fold 4 19 | python3 combine_folds.py 20 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | import torch 4 | import torch.optim as optim 5 | from torchvision import transforms 6 | from torch.optim import lr_scheduler 7 | from utils import CollectionsDataset 8 | from model import train_model 9 | import os 10 | import argparse 11 | from apex import amp 12 | from nnet import model_ft 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("--fold", default=-1) 16 | args = parser.parse_args() 17 | 18 | BASE_DIR = "../input/" 19 | FOLD = int(args.fold) 20 | if FOLD == -1: 21 | FOLD = 0 22 | 23 | if FOLD == 0: 24 | training_folds = [1, 2, 3, 4] 25 | val_folds = [0] 26 | elif FOLD == 1: 27 | training_folds = [0, 2, 3, 4] 28 | val_folds = [1] 29 | elif FOLD == 2: 30 | training_folds = [0, 1, 3, 4] 31 | val_folds = [2] 32 | elif FOLD == 3: 33 | training_folds = [0, 1, 2, 4] 34 | val_folds = [3] 35 | else: 36 | training_folds = [0, 1, 2, 3] 37 | val_folds = [4] 38 | 39 | FOLD_NAME = "fold{0}".format(FOLD) 40 | 41 | MODEL_NAME = os.environ["MODEL_NAME"] 42 | TRAINING_BATCH_SIZE = int(os.environ["TRAINING_BATCH_SIZE"]) 43 | TEST_BATCH_SIZE = int(os.environ["TEST_BATCH_SIZE"]) 44 | NUM_CLASSES = int(os.environ["NUM_CLASSES"]) 45 | IMAGE_SIZE = int(os.environ["IMAGE_SIZE"]) 46 | EPOCHS = int(os.environ["EPOCHS"]) 47 | 48 | if not os.path.exists(FOLD_NAME): 49 | os.makedirs(FOLD_NAME) 50 | 51 | device = torch.device("cuda:0") 52 | IMG_MEAN = model_ft.mean 53 | IMG_STD = model_ft.std 54 | 55 | train_transform = transforms.Compose([ 56 | transforms.RandomResizedCrop((IMAGE_SIZE, IMAGE_SIZE)), 57 | transforms.RandomHorizontalFlip(), 58 | transforms.ToTensor(), 59 | transforms.Normalize(IMG_MEAN, IMG_STD) 60 | ]) 61 | 62 | val_transform = transforms.Compose([ 63 | transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)), 64 | transforms.ToTensor(), 65 | transforms.Normalize(IMG_MEAN, IMG_STD) 66 | ]) 67 | 68 | train_dataset = CollectionsDataset(csv_file='../input/folds.csv', 69 | root_dir='../input/train/', 70 | num_classes=NUM_CLASSES, 71 | image_size=IMAGE_SIZE, 72 | folds=training_folds, 73 | transform=train_transform) 74 | 75 | valid_dataset = CollectionsDataset(csv_file='../input/folds.csv', 76 | root_dir='../input/train/', 77 | num_classes=NUM_CLASSES, 78 | image_size=IMAGE_SIZE, 79 | folds=val_folds, 80 | transform=val_transform) 81 | 82 | train_dataset_loader = torch.utils.data.DataLoader(train_dataset, 83 | batch_size=TRAINING_BATCH_SIZE, 84 | shuffle=True, 85 | num_workers=4) 86 | 87 | valid_dataset_loader = torch.utils.data.DataLoader(valid_dataset, 88 | batch_size=TEST_BATCH_SIZE, 89 | shuffle=False, 90 | num_workers=4) 91 | 92 | model_ft = model_ft.to(device) 93 | 94 | lr_min = 1e-4 95 | lr_max = 1e-3 96 | 97 | plist = [{'params': model_ft.layer0.parameters()}, 98 | {'params': model_ft.layer1.parameters()}, 99 | {'params': model_ft.layer2.parameters()}, 100 | {'params': model_ft.layer3.parameters(), 'lr': lr_min}, 101 | {'params': model_ft.layer4.parameters(), 'lr': lr_min, 'weight': 0.001}, 102 | {'params': model_ft.last_linear.parameters(), 'lr': lr_max} 103 | ] 104 | 105 | optimizer_ft = optim.Adam(plist, lr=0.001) 106 | model_ft, optimizer_ft = amp.initialize(model_ft, optimizer_ft, opt_level="O1", verbosity=0) 107 | lr_sch = lr_scheduler.ReduceLROnPlateau(optimizer_ft, verbose=True, factor=0.3, mode="max", patience=1, threshold=0.01) 108 | 109 | dataset_sizes = {} 110 | dataset_sizes["train"] = len(train_dataset) 111 | dataset_sizes["val"] = len(valid_dataset) 112 | 113 | data_loader = {} 114 | data_loader["train"] = train_dataset_loader 115 | data_loader["val"] = valid_dataset_loader 116 | 117 | model_ft = train_model(model_ft, 118 | data_loader, 119 | dataset_sizes, 120 | device, 121 | optimizer_ft, 122 | lr_sch, 123 | num_epochs=EPOCHS, 124 | fold_name=FOLD_NAME, 125 | use_amp=True) 126 | torch.save(model_ft.state_dict(), os.path.join(FOLD_NAME, "model.bin")) 127 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import torch 3 | from PIL import Image, ImageFile 4 | from torch.utils.data import Dataset 5 | import os 6 | 7 | device = torch.device("cuda:0") 8 | ImageFile.LOAD_TRUNCATED_IMAGES = True 9 | 10 | 11 | class CollectionsDataset(Dataset): 12 | 13 | def __init__(self, csv_file, root_dir, num_classes, image_size, folds=None, transform=None): 14 | if folds is None: 15 | folds = [] 16 | self.data = pd.read_csv(csv_file) 17 | if len(folds) > 0: 18 | self.data = self.data[self.data.fold.isin(folds)].reset_index(drop=True) 19 | self.root_dir = root_dir 20 | self.transform = transform 21 | self.num_classes = num_classes 22 | self.image_size = image_size 23 | 24 | def __len__(self): 25 | return len(self.data) 26 | 27 | def __getitem__(self, idx): 28 | img_name = os.path.join(self.root_dir, self.data.loc[idx, 'id'] + '.png') 29 | image = Image.open(img_name) 30 | labels = self.data.loc[idx, 'attribute_ids'] 31 | labels = labels.split() 32 | 33 | label_tensor = torch.zeros(self.num_classes) 34 | for i in labels: 35 | label_tensor[int(i)] = 1 36 | 37 | if self.transform: 38 | image = self.transform(image) 39 | 40 | 41 | return {'image': image, 42 | 'labels': label_tensor 43 | } 44 | 45 | 46 | class CollectionsDatasetTest(Dataset): 47 | 48 | def __init__(self, csv_file, root_dir, image_size, transform=None): 49 | self.data = pd.read_csv(csv_file) 50 | self.root_dir = root_dir 51 | self.transform = transform 52 | self.image_size = image_size 53 | 54 | def __len__(self): 55 | return len(self.data) 56 | 57 | def __getitem__(self, idx): 58 | img_name = os.path.join(self.root_dir, self.data.loc[idx, 'id'] + '.png') 59 | image = Image.open(img_name) 60 | if self.transform: 61 | image = self.transform(image) 62 | 63 | return {'image': image} 64 | --------------------------------------------------------------------------------