├── media ├── agg1.jpg ├── chips.jpg ├── haze1.jpg ├── pc1.jpg ├── river.jpg ├── road.jpg ├── chipdesc.jpg ├── cloudy_1.jpg └── habitation1.jpg ├── .gitignore ├── src ├── p_logger.py ├── p_model_selection.py ├── k_model_selection.py ├── p_validation.py ├── p_training.py ├── p2_loss.py ├── p_prediction.py ├── p_dataload.py ├── p_sampler.py ├── p2_validation.py ├── p2_prediction.py ├── p2_dataload.py ├── p_metrics.py ├── p2_metrics.py ├── p3_neuroRNN.py ├── p_data_augmentation.py ├── p_neuro.py ├── _deprecated.py └── k_dataloader.py ├── compute-mean-std.py ├── adjust_prediction.py ├── baseline ├── unfinished_attempts │ ├── 000-Mxnet-ResNet-baseline-TODO.py │ ├── 002-Keras-Inception-Transfer.py │ └── 000-Mxnet-Resnet-extraction-XGBoost-MultiLabel-TODO.ipynb ├── 001-keras-baseline-0.80752.py ├── pytorch_scatter_gather_onehotencoding.ipynb ├── 003-pytorch-kernel-baseline.ipynb └── RNN_experiment_1.ipynb ├── Ideas.txt ├── README.md ├── pytorch_predict_only.py ├── main_keras.py ├── main_pytorch-baseline.py ├── main_pytorch.py └── Dual_Feed_Image_Label.ipynb /media/agg1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/agg1.jpg -------------------------------------------------------------------------------- /media/chips.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/chips.jpg -------------------------------------------------------------------------------- /media/haze1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/haze1.jpg -------------------------------------------------------------------------------- /media/pc1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/pc1.jpg -------------------------------------------------------------------------------- /media/river.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/river.jpg -------------------------------------------------------------------------------- /media/road.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/road.jpg -------------------------------------------------------------------------------- /media/chipdesc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/chipdesc.jpg -------------------------------------------------------------------------------- /media/cloudy_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/cloudy_1.jpg -------------------------------------------------------------------------------- /media/habitation1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/habitation1.jpg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | out/ 3 | core 4 | EDA/ 5 | kernels/ 6 | pretrained-models/ 7 | tmp/ 8 | .ipynb_checkpoints/ 9 | __pycache__/ 10 | snapshots/ 11 | zoo/ 12 | -------------------------------------------------------------------------------- /src/p_logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | def setup_logs(save_dir, run_name): 5 | # initialize logger 6 | logger = logging.getLogger("Planet-Amazon") 7 | logger.setLevel(logging.INFO) 8 | 9 | # create the logging file handler 10 | log_file = os.path.join(save_dir, run_name + ".log") 11 | fh = logging.FileHandler(log_file) 12 | 13 | # create the logging console handler 14 | ch = logging.StreamHandler() 15 | 16 | # format 17 | formatter = logging.Formatter("%(asctime)s - %(message)s") 18 | fh.setFormatter(formatter) 19 | 20 | # add handlers to logger object 21 | logger.addHandler(fh) 22 | logger.addHandler(ch) 23 | 24 | return logger -------------------------------------------------------------------------------- /src/p_model_selection.py: -------------------------------------------------------------------------------- 1 | import random 2 | from math import floor 3 | 4 | def train_valid_split(dataset, test_size = 0.25, shuffle = False, random_seed = 0): 5 | """ Return a list of splitted indices from a DataSet. 6 | Indices can be used with DataLoader to build a train and validation set. 7 | 8 | Arguments: 9 | A Dataset 10 | A test_size, as a float between 0 and 1 (percentage split) or as an int (fixed number split) 11 | Shuffling True or False 12 | Random seed 13 | """ 14 | length = dataset.__len__() 15 | indices = list(range(1,length)) 16 | 17 | if shuffle == True: 18 | random.seed(random_seed) 19 | random.shuffle(indices) 20 | 21 | if type(test_size) is float: 22 | split = floor(test_size * length) 23 | elif type(test_size) is int: 24 | split = test_size 25 | else: 26 | raise ValueError('%s should be an int or a float' % str) 27 | return indices[split:], indices[:split] -------------------------------------------------------------------------------- /src/k_model_selection.py: -------------------------------------------------------------------------------- 1 | import random 2 | from math import floor 3 | 4 | def train_valid_split(dataframe, test_size = 0.25, shuffle = False, random_seed = 0): 5 | """ Return a list of splitted indices from a DataSet. 6 | Indices can be used with DataLoader to build a train and validation set. 7 | 8 | Arguments: 9 | A Dataframe 10 | A test_size, as a float between 0 and 1 (percentage split) or as an int (fixed number split) 11 | Shuffling True or False 12 | Random seed 13 | """ 14 | length = len(dataframe.index) 15 | indices = list(range(1,length)) 16 | 17 | if shuffle == True: 18 | random.seed(random_seed) 19 | random.shuffle(indices) 20 | 21 | if type(test_size) is float: 22 | split = floor(test_size * length) 23 | elif type(test_size) is int: 24 | split = test_size 25 | else: 26 | raise ValueError('%s should be an int or a float' % str) 27 | return indices[split:], indices[:split] -------------------------------------------------------------------------------- /compute-mean-std.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import pandas as pd 4 | from tqdm import tqdm 5 | 6 | RESOLUTION = 96 # Ideally we shouldn't be resizing but I'm lacking memory 7 | 8 | if __name__ == "__main__": 9 | data = [] 10 | df_train = pd.read_csv('./data/train.csv') 11 | 12 | for file in tqdm(df_train['image_name'], miniters=256): 13 | img = cv2.imread('./data/train-jpg/{}.jpg'.format(file)) 14 | data.append(cv2.resize(img,(RESOLUTION,RESOLUTION))) 15 | 16 | data = np.array(data, np.float32) / 255 # Must use float32 at least otherwise we get over float16 limits 17 | print("Shape: ", data.shape) 18 | 19 | means = [] 20 | stdevs = [] 21 | for i in range(3): 22 | pixels = data[:,:,:,i].ravel() 23 | means.append(np.mean(pixels)) 24 | stdevs.append(np.std(pixels)) 25 | 26 | print("means: {}".format(means)) 27 | print("stdevs: {}".format(stdevs)) 28 | print('transforms.Normalize(mean = {}, std = {})'.format(means, stdevs)) -------------------------------------------------------------------------------- /adjust_prediction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import pandas as pd 4 | import torch 5 | from src.p_dataload import KaggleAmazonDataset 6 | 7 | 8 | ## Load MultiLabelBinarizer config 9 | X_train = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg') 10 | mlb = X_train.getLabelEncoder() 11 | 12 | ## Load sample submission: 13 | df_test = pd.read_csv('./data/sample_submission_v2.csv') 14 | 15 | ## Load raw prediction (proba): 16 | subm_proba = np.loadtxt('./out/2017-05-12_1223-resnet50-L2reg-new-data-raw-pred-0.922374050536.csv', 17 | delimiter=';') 18 | 19 | ## Load threshold: 20 | model_path = './snapshots/2017-05-12_1223-resnet50-L2reg-new-data-model_best.pth' 21 | checkpoint = torch.load(model_path) 22 | threshold = checkpoint['threshold'] 23 | 24 | ## Force single weather: TODO check if cloudy is alone 25 | weather = subm_proba[:, 0:4] 26 | indices = np.argmax(weather, axis=1) 27 | new_weather = np.eye(4)[indices] 28 | subm_proba[:,0:4] = new_weather 29 | 30 | predictions = subm_proba > threshold 31 | 32 | result = pd.DataFrame({ 33 | 'image_name': df_test['image_name'], 34 | 'tags': mlb.inverse_transform(predictions) 35 | }) 36 | result['tags'] = result['tags'].apply(lambda tags: " ".join(tags)) 37 | 38 | result_path = './out/2017-05-12_1223-resnet50-L2reg-new-data-adjusted-pred-0.922374050536.csv' 39 | result.to_csv(result_path, index=False) -------------------------------------------------------------------------------- /src/p_validation.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Variable 2 | import numpy as np 3 | import logging 4 | import torch.nn.functional as F 5 | from tqdm import tqdm 6 | 7 | from src.p_metrics import best_f2_score 8 | 9 | ## Get the same logger from main" 10 | logger = logging.getLogger("Planet-Amazon") 11 | 12 | ################################################## 13 | #### Validate function 14 | def validate(epoch,valid_loader,model,loss_func,mlb): 15 | ## Volatile variables do not save intermediate results and build graphs for backprop, achieving massive memory savings. 16 | 17 | model.eval() 18 | total_loss = 0 19 | predictions = [] 20 | true_labels = [] 21 | 22 | logger.info("Starting Validation") 23 | for batch_idx, (data, target) in enumerate(tqdm(valid_loader)): 24 | true_labels.append(target.cpu().numpy()) 25 | 26 | data, target = data.cuda(async=True), target.cuda(async=True) 27 | data, target = Variable(data, volatile=True), Variable(target, volatile=True) 28 | 29 | pred = model(data) 30 | predictions.append(F.sigmoid(pred).data.cpu().numpy()) 31 | 32 | total_loss += loss_func(pred,target).data[0] 33 | 34 | avg_loss = total_loss / len(valid_loader) 35 | 36 | predictions = np.vstack(predictions) 37 | true_labels = np.vstack(true_labels) 38 | 39 | score, threshold = best_f2_score(true_labels, predictions) 40 | logger.info("Corresponding tags\n{}".format(mlb.classes_)) 41 | 42 | logger.info("===> Validation - Avg. loss: {:.4f}\tF2 Score: {:.4f}".format(avg_loss,score)) 43 | return score, avg_loss, threshold -------------------------------------------------------------------------------- /src/p_training.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Variable 2 | import torch 3 | import os 4 | import logging 5 | 6 | ## Get the same logger from main" 7 | logger = logging.getLogger("Planet-Amazon") 8 | 9 | 10 | def lr_scheduler(optimizer, epoch, init_lr=0.01, lr_decay_epoch=7): 11 | """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs.""" 12 | lr = init_lr * (0.1**(epoch // lr_decay_epoch)) 13 | 14 | if epoch % lr_decay_epoch == 0: 15 | logger.info('LR is set to {}'.format(lr)) 16 | 17 | for param_group in optimizer.param_groups: 18 | param_group['lr'] = lr 19 | 20 | return optimizer 21 | 22 | def train(epoch,train_loader,model,loss_func, optimizer): 23 | model.train() 24 | optimizer = lr_scheduler(optimizer, epoch) 25 | 26 | 27 | for batch_idx, (data, target) in enumerate(train_loader): 28 | data, target = data.cuda(async=True), target.cuda(async=True) 29 | data, target = Variable(data), Variable(target, requires_grad=False) 30 | optimizer.zero_grad() 31 | output = model(data) 32 | loss = loss_func(output,target) 33 | loss.backward() 34 | optimizer.step() 35 | if batch_idx % 100 == 0: 36 | logger.info('Train Epoch: {:03d} [{:05d}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 37 | epoch, batch_idx * len(data), len(train_loader) * len(data), 38 | 100. * batch_idx / len(train_loader), loss.data[0])) 39 | 40 | def snapshot(dir_path, run_name, is_best, state): 41 | snapshot_file = os.path.join(dir_path, 42 | run_name + '-model_best.pth') 43 | if is_best: 44 | torch.save(state, snapshot_file) 45 | logger.info("Snapshot saved to {}".format(snapshot_file)) -------------------------------------------------------------------------------- /baseline/unfinished_attempts/000-Mxnet-ResNet-baseline-TODO.py: -------------------------------------------------------------------------------- 1 | import xgboost as xgb 2 | import cv2 3 | import mxnet as mx 4 | import os 5 | import numpy as np 6 | from timeit import default_timer as timer 7 | from sklearn.model_selection import train_test_split 8 | 9 | SRC_IMAGES = '../data/train-jpg/' 10 | SRCDIR = os.listdir(SRC_IMAGES) 11 | TMPDIR = './tmp/' 12 | 13 | def get_extractor(): 14 | model = mx.model.FeedForward.load('../pretrained-models/resnet-50', 0, ctx=mx.gpu(), numpy_batch_size=1) 15 | fea_symbol = model.symbol.get_internals()["flatten0_output"] 16 | feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=fea_symbol, numpy_batch_size=64, 17 | arg_params=model.arg_params, aux_params=model.aux_params, 18 | allow_extra_params=True) 19 | 20 | return feature_extractor 21 | 22 | 23 | def prepare_image_batch(image): 24 | img = SRC_IMAGES + image 25 | img = cv2.imread(img) 26 | img = 255.0 / np.amax(img) * img 27 | # img = cv2.equalizeHist(img.astype(np.uint8)) 28 | img = cv2.resize(img.astype(np.int16), (224, 224)) 29 | img = img.reshape(1,3,224,224) 30 | 31 | return img 32 | 33 | def calc_features(): 34 | net = get_extractor() 35 | n=1 36 | for image in SRCDIR: 37 | print("Doing image %s/%s: %s" % (n, len(SRCDIR), image)) 38 | img = prepare_image_batch(image) 39 | print(img.shape) 40 | feats = net.predict(img) 41 | print("Prediction features have shape:") 42 | print(feats.shape) 43 | np.save(TMPDIR+image, feats) 44 | 45 | n+=1 46 | 47 | 48 | if __name__ == '__main__': 49 | start_time = timer() 50 | calc_features() 51 | # make_submit() 52 | end_time = timer() 53 | print("Elapsed time: %s" % (end_time - start_time)) -------------------------------------------------------------------------------- /src/p2_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | from torch.nn.modules.loss import _WeightedLoss 3 | import torch 4 | from torch.autograd import Variable 5 | 6 | # If needed to code the categorical cross entropy from scratch: https://github.com/twitter/torch-autograd/blob/master/src/loss/init.lua 7 | 8 | class ConvolutedLoss(_WeightedLoss): 9 | """ Treat the weather as MultiClassification (only one label possible) 10 | Treat the rest as Multilabel 11 | ==> Multi-Task learning 12 | """ 13 | def __init__(self, weight=None, size_average=True): 14 | super(ConvolutedLoss, self).__init__(size_average) 15 | if weight is None: 16 | self.register_buffer('weight_weather', None) 17 | self.register_buffer('weight_other', None) 18 | else: 19 | self.register_buffer('weight_weather', weight[:4]) # Weather conditions are the first 4 20 | self.register_buffer('weight_other', weight[4:]) 21 | 22 | def forward(self, input, target): 23 | # Cross-Entropy wants categorical not one-hot 24 | # Reverse one hot 25 | weather_targets = Variable(torch.arange(0,4).expand(target.size(0),4).masked_select(target[:,:4].data.byte().cpu()).long().cuda(), requires_grad = False) 26 | 27 | loss_weather = F.cross_entropy(input[:,:4], 28 | weather_targets, 29 | self.weight_weather, 30 | self.size_average) 31 | loss_other = F.binary_cross_entropy(F.sigmoid(input[:,4:]), 32 | target[:,4:], 33 | self.weight_other, 34 | self.size_average) 35 | 36 | return (loss_weather * 4/17) + (loss_other * 13/17) 37 | -------------------------------------------------------------------------------- /src/p_prediction.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Variable 2 | import numpy as np 3 | import logging 4 | import os 5 | import pandas as pd 6 | import torch.nn.functional as F 7 | from tqdm import tqdm 8 | 9 | ## Get the same logger from main" 10 | logger = logging.getLogger("Planet-Amazon") 11 | 12 | ################################################## 13 | #### Prediction function 14 | def predict(test_loader, model): 15 | model.eval() 16 | predictions = [] 17 | 18 | logger.info("Starting Prediction") 19 | for batch_idx, (data, _) in enumerate(tqdm(test_loader)): 20 | data = data.cuda(async=True) 21 | data = Variable(data, volatile=True) 22 | 23 | pred = F.sigmoid(model(data)) 24 | predictions.append(pred.data.cpu().numpy()) 25 | 26 | predictions = np.vstack(predictions) 27 | 28 | logger.info("===> Raw predictions done. Here is a snippet") 29 | logger.info(predictions) 30 | return predictions 31 | 32 | def output(predictions, threshold, X_test, mlb, dir_path, run_name, accuracy): 33 | 34 | raw_pred_path = os.path.join(dir_path, run_name + '-raw-pred-'+str(accuracy)+'.csv') 35 | np.savetxt(raw_pred_path,predictions,delimiter=";") 36 | logger.info("Raw predictions saved to {}".format(raw_pred_path)) 37 | 38 | predictions = predictions > threshold 39 | 40 | result = pd.DataFrame({ 41 | 'image_name': X_test.X, 42 | 'tags': mlb.inverse_transform(predictions) 43 | }) 44 | result['tags'] = result['tags'].apply(lambda tags: " ".join(tags)) 45 | 46 | logger.info("===> Final predictions done. Here is a snippet") 47 | logger.info(result) 48 | 49 | result_path = os.path.join(dir_path, run_name + '-final-pred-'+str(accuracy)+'.csv') 50 | result.to_csv(result_path, index=False) 51 | logger.info("Final predictions saved to {}".format(result_path)) -------------------------------------------------------------------------------- /src/p_dataload.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data.dataset import Dataset 2 | from torchvision import transforms 3 | import pandas as pd 4 | import os 5 | from PIL import Image # Replace by accimage when ready 6 | from PIL.Image import FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM, ROTATE_90, ROTATE_180, ROTATE_270 7 | from PIL.ImageEnhance import Color, Contrast, Brightness, Sharpness 8 | from sklearn.preprocessing import MultiLabelBinarizer 9 | from torch import np, from_numpy # Numpy like wrapper 10 | 11 | class KaggleAmazonDataset(Dataset): 12 | """Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition. 13 | 14 | Arguments: 15 | A CSV file path 16 | Path to image folder 17 | Extension of images 18 | """ 19 | 20 | def __init__(self, csv_path, img_path, img_ext, transform=None): 21 | 22 | self.df = pd.read_csv(csv_path) 23 | assert self.df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \ 24 | "Some images referenced in the CSV file were not found" 25 | 26 | self.mlb = MultiLabelBinarizer() 27 | self.img_path = img_path 28 | self.img_ext = img_ext 29 | self.transform = transform 30 | 31 | self.X = self.df['image_name'] 32 | self.y = self.mlb.fit_transform(self.df['tags'].str.split()).astype(np.float32) 33 | 34 | def X(self): 35 | return self.X 36 | 37 | def __getitem__(self, index): 38 | img = Image.open(self.img_path + self.X[index] + self.img_ext) 39 | img = img.convert('RGB') 40 | if self.transform is not None: 41 | img = self.transform(img) 42 | 43 | label = from_numpy(self.y[index]) 44 | return img, label 45 | 46 | def __len__(self): 47 | return len(self.df.index) 48 | 49 | def getLabelEncoder(self): 50 | return self.mlb 51 | 52 | def getDF(self): 53 | return self.df -------------------------------------------------------------------------------- /src/p_sampler.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data.sampler import Sampler 2 | import numpy as np 3 | import pandas as pd 4 | 5 | class SubsetSampler(Sampler): 6 | """Samples elements from a given list of indices. 7 | 8 | Arguments: 9 | indices (list): a list of indices 10 | """ 11 | 12 | def __init__(self, indices): 13 | self.num_samples = len(indices) 14 | self.indices = indices 15 | 16 | def __iter__(self): 17 | return iter(self.indices) 18 | 19 | def __len__(self): 20 | return self.num_samples 21 | 22 | def balance_weights(df_source, col_target, mlb): 23 | """ Compute balanced weights from a Multilabel dataframe 24 | 25 | Arguments: 26 | Dataframe 27 | The name of the column with the target labels 28 | A MultiLabelBinarizer to one-hot-encode/decode the label column 29 | 30 | Returns: 31 | A Pandas Series with balanced weights 32 | """ 33 | 34 | # Create a working copy of the dataframe 35 | df = df_source.copy(deep=True) 36 | 37 | df_labels = mlb.transform(df[col_target].str.split(" ")) 38 | 39 | ## Next 4 lines won't be needed when axis argument is added to np.unique in NumPy 1.13 40 | ncols = df_labels.shape[1] 41 | dtype = df_labels.dtype.descr * ncols 42 | struct = df_labels.view(dtype) 43 | uniq_labels, uniq_counts = np.unique(struct, return_counts=True) 44 | 45 | uniq_labels = uniq_labels.view(df_labels.dtype).reshape(-1, ncols) 46 | 47 | ## We convert the One-Hot-Encoded labels as string to store them in a dataframe and join on them 48 | df_stats = pd.DataFrame({ 49 | 'target':np.apply_along_axis(np.array_str, 1, uniq_labels), 50 | 'freq':uniq_counts 51 | }) 52 | 53 | df['target'] = np.apply_along_axis(np.array_str, 1, df_labels) 54 | 55 | ## Join the dataframe to add frequency 56 | df = df.merge(df_stats,how='left',on='target') 57 | 58 | ## Compute balanced weights 59 | weights = 1 / df['freq'].astype(np.float) 60 | 61 | return weights -------------------------------------------------------------------------------- /src/p2_validation.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Variable 2 | import numpy as np 3 | import logging 4 | import torch.nn.functional as F 5 | from tqdm import tqdm 6 | import torch 7 | 8 | from src.p_metrics import best_f2_score 9 | 10 | ## Get the same logger from main" 11 | logger = logging.getLogger("Planet-Amazon") 12 | 13 | ################################################## 14 | #### Validate function 15 | def validate(epoch,valid_loader,model,loss_func,mlb): 16 | ## Volatile variables do not save intermediate results and build graphs for backprop, achieving massive memory savings. 17 | 18 | model.eval() 19 | total_loss = 0 20 | predictions = [] 21 | true_labels = [] 22 | 23 | logger.info("Starting Validation") 24 | for batch_idx, (data, target) in enumerate(tqdm(valid_loader)): 25 | true_labels.append(target.cpu().numpy()) 26 | 27 | data, target = data.cuda(async=True), target.cuda(async=True) 28 | data, target = Variable(data, volatile=True), Variable(target, volatile=True) 29 | 30 | raw_pred = model(data) 31 | # Even though we use softmax for training, it doesn't give good result here 32 | # However activated neuro for weather will giv emuch larger response for much easier thresholding 33 | # pred = torch.cat( 34 | # ( 35 | # F.softmax(raw_pred[:4]), 36 | # F.sigmoid(raw_pred[4:]) 37 | # ), 0 38 | # ) 39 | pred = F.sigmoid(raw_pred) 40 | predictions.append(pred.data.cpu().numpy()) 41 | 42 | total_loss += loss_func(raw_pred,target).data[0] 43 | 44 | avg_loss = total_loss / len(valid_loader) 45 | 46 | predictions = np.vstack(predictions) 47 | true_labels = np.vstack(true_labels) 48 | 49 | score, threshold = best_f2_score(true_labels, predictions) 50 | logger.info("Corresponding tags\n{}".format(mlb.classes_)) 51 | 52 | logger.info("===> Validation - Avg. loss: {:.4f}\tF2 Score: {:.4f}".format(avg_loss,score)) 53 | return score, avg_loss, threshold -------------------------------------------------------------------------------- /src/p2_prediction.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Variable 2 | import numpy as np 3 | import logging 4 | import os 5 | import pandas as pd 6 | import torch.nn.functional as F 7 | from tqdm import tqdm 8 | import torch 9 | 10 | ## Get the same logger from main" 11 | logger = logging.getLogger("Planet-Amazon") 12 | 13 | ################################################## 14 | #### Prediction function 15 | def predict(test_loader, model): 16 | model.eval() 17 | predictions = [] 18 | 19 | logger.info("Starting Prediction") 20 | for batch_idx, (data, _) in enumerate(tqdm(test_loader)): 21 | data = data.cuda(async=True) 22 | data = Variable(data, volatile=True) 23 | 24 | raw_pred = model(data) 25 | # Even though we use softmax for training, it doesn't give good result here 26 | # However activated neuro for weather will giv emuch larger response for much easier thresholding 27 | # pred = torch.cat( 28 | # ( 29 | # F.softmax(raw_pred[:4]), 30 | # F.sigmoid(raw_pred[4:]) 31 | # ), 0 32 | # ) 33 | pred = F.sigmoid(raw_pred) 34 | predictions.append(pred.data.cpu().numpy()) 35 | 36 | predictions = np.vstack(predictions) 37 | 38 | logger.info("===> Raw predictions done. Here is a snippet") 39 | print(predictions) 40 | return predictions 41 | 42 | def output(predictions, threshold, X_test, mlb, dir_path, run_name, accuracy): 43 | 44 | raw_pred_path = os.path.join(dir_path, run_name + '-raw-pred-'+str(accuracy)+'.csv') 45 | np.savetxt(raw_pred_path,predictions,delimiter=";") 46 | logger.info("Raw predictions saved to {}".format(raw_pred_path)) 47 | 48 | predictions = predictions > threshold 49 | 50 | result = pd.DataFrame({ 51 | 'image_name': X_test.X, 52 | 'tags': mlb.inverse_transform(predictions) 53 | }) 54 | result['tags'] = result['tags'].apply(lambda tags: " ".join(tags)) 55 | 56 | logger.info("===> Final predictions done. Here is a snippet") 57 | print(result) 58 | 59 | result_path = os.path.join(dir_path, run_name + '-final-pred-'+str(accuracy)+'.csv') 60 | result.to_csv(result_path, index=False) 61 | logger.info("Final predictions saved to {}".format(result_path)) -------------------------------------------------------------------------------- /src/p2_dataload.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data.dataset import Dataset 2 | from torchvision import transforms 3 | import pandas as pd 4 | import os 5 | from PIL import Image # Replace by accimage when ready 6 | from PIL.Image import FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM, ROTATE_90, ROTATE_180, ROTATE_270 7 | from PIL.ImageEnhance import Color, Contrast, Brightness, Sharpness 8 | from sklearn.preprocessing import MultiLabelBinarizer 9 | from torch import np, from_numpy # Numpy like wrapper 10 | 11 | class KaggleAmazonDataset(Dataset): 12 | """Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition. 13 | 14 | Arguments: 15 | A CSV file path 16 | Path to image folder 17 | Extension of images 18 | """ 19 | 20 | def __init__(self, csv_path, img_path, img_ext, transform=None): 21 | 22 | self.df = pd.read_csv(csv_path) 23 | assert self.df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \ 24 | "Some images referenced in the CSV file were not found" 25 | 26 | # Ordering weather first 27 | self.mlb = MultiLabelBinarizer( 28 | classes = [ 29 | 'clear', 'cloudy', 'haze','partly_cloudy', 30 | 'agriculture','artisinal_mine','bare_ground','blooming', 31 | 'blow_down','conventional_mine','cultivation','habitation', 32 | 'primary','road','selective_logging','slash_burn','water' 33 | ] 34 | ) 35 | self.img_path = img_path 36 | self.img_ext = img_ext 37 | self.transform = transform 38 | 39 | self.X = self.df['image_name'] 40 | self.y = self.mlb.fit_transform(self.df['tags'].str.split()).astype(np.float32) 41 | 42 | def X(self): 43 | return self.X 44 | 45 | def __getitem__(self, index): 46 | img = Image.open(self.img_path + self.X[index] + self.img_ext) 47 | img = img.convert('RGB') 48 | if self.transform is not None: 49 | img = self.transform(img) 50 | 51 | label = from_numpy(self.y[index]) 52 | return img, label 53 | 54 | def __len__(self): 55 | return len(self.df.index) 56 | 57 | def getLabelEncoder(self): 58 | return self.mlb 59 | 60 | def getDF(self): 61 | return self.df -------------------------------------------------------------------------------- /src/p_metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging 3 | from sklearn.metrics import fbeta_score 4 | from scipy.optimize import fmin_l_bfgs_b, basinhopping 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.autograd import Variable 9 | from timeit import default_timer as timer 10 | 11 | 12 | ## Get the same logger from main" 13 | logger = logging.getLogger("Planet-Amazon") 14 | 15 | def best_f2_score(true_labels, predictions): 16 | 17 | def f_neg(threshold): 18 | ## Scipy tries to minimize the function so we must get its inverse 19 | return - fbeta_score(true_labels, predictions > threshold, beta=2, average='samples') 20 | 21 | # Initialization of best threshold search 22 | thr_0 = [0.20] * 17 23 | constraints = [(0.,1.)] * 17 24 | def bounds(**kwargs): 25 | x = kwargs["x_new"] 26 | tmax = bool(np.all(x <= 1)) 27 | tmin = bool(np.all(x >= 0)) 28 | return tmax and tmin 29 | 30 | # Search using L-BFGS-B, the epsilon step must be big otherwise there is no gradient 31 | minimizer_kwargs = {"method": "L-BFGS-B", 32 | "bounds":constraints, 33 | "options":{ 34 | "eps": 0.05 35 | } 36 | } 37 | 38 | # We combine L-BFGS-B with Basinhopping for stochastic search with random steps 39 | logger.info("===> Searching optimal threshold for each label") 40 | start_time = timer() 41 | 42 | opt_output = basinhopping(f_neg, thr_0, 43 | stepsize = 0.1, 44 | minimizer_kwargs=minimizer_kwargs, 45 | niter=10, 46 | accept_test=bounds) 47 | 48 | end_time = timer() 49 | logger.info("===> Optimal threshold for each label:\n{}".format(opt_output.x)) 50 | logger.info("Threshold found in: %s seconds" % (end_time - start_time)) 51 | 52 | score = - opt_output.fun 53 | return score, opt_output.x 54 | 55 | 56 | # We use real valued F2 score for training. Input can be anything between 0 and 1. 57 | # Threshold is not differentiable so we don't use it during training 58 | # We get a smooth F2 score valid for real values and not only 0/1 59 | def torch_f2_score(y_true, y_pred): 60 | return torch_fbeta_score(y_true, y_pred, 2) 61 | 62 | def torch_fbeta_score(y_true, y_pred, beta, eps=1e-9): 63 | beta2 = beta**2 64 | 65 | y_true = y_true.float() 66 | 67 | true_positive = (y_pred * y_true).sum(dim=1) 68 | precision = true_positive.div(y_pred.sum(dim=1).add(eps)) 69 | recall = true_positive.div(y_true.sum(dim=1).add(eps)) 70 | 71 | return torch.mean( 72 | (precision*recall). 73 | div(precision.mul(beta2) + recall + eps). 74 | mul(1 + beta2)) 75 | 76 | 77 | class SmoothF2Loss(nn.Module): 78 | def __init__(self): 79 | super(MeanF2Loss, self).__init__() 80 | 81 | def forward(self, input, target): 82 | return 1 - torch_f2_score(target, torch.sigmoid(input)) -------------------------------------------------------------------------------- /src/p2_metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging 3 | from sklearn.metrics import fbeta_score 4 | from scipy.optimize import fmin_l_bfgs_b, basinhopping 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.autograd import Variable 9 | from timeit import default_timer as timer 10 | 11 | 12 | ## Get the same logger from main" 13 | logger = logging.getLogger("Planet-Amazon") 14 | 15 | def best_f2_score(true_labels, predictions): 16 | 17 | def f_neg(threshold): 18 | ## Scipy tries to minimize the function so we must get its inverse 19 | return - fbeta_score(true_labels, predictions > threshold, beta=2, average='samples') 20 | 21 | # Initialization of best threshold search 22 | thr_0 = [0.20] * 17 23 | constraints = [(0.,1.)] * 17 24 | def bounds(**kwargs): 25 | x = kwargs["x_new"] 26 | tmax = bool(np.all(x <= 1)) 27 | tmin = bool(np.all(x >= 0)) 28 | return tmax and tmin 29 | 30 | # Search using L-BFGS-B, the epsilon step must be big otherwise there is no gradient 31 | minimizer_kwargs = {"method": "L-BFGS-B", 32 | "bounds":constraints, 33 | "options":{ 34 | "eps": 0.05 35 | } 36 | } 37 | 38 | # We combine L-BFGS-B with Basinhopping for stochastic search with random steps 39 | logger.info("===> Searching optimal threshold for each label") 40 | start_time = timer() 41 | 42 | opt_output = basinhopping(f_neg, thr_0, 43 | stepsize = 0.1, 44 | minimizer_kwargs=minimizer_kwargs, 45 | niter=10, 46 | accept_test=bounds) 47 | 48 | end_time = timer() 49 | logger.info("===> Optimal threshold for each label:\n{}".format(opt_output.x)) 50 | logger.info("Threshold found in: %s seconds" % (end_time - start_time)) 51 | 52 | score = - opt_output.fun 53 | return score, opt_output.x 54 | 55 | 56 | # We use real valued F2 score for training. Input can be anything between 0 and 1. 57 | # Threshold is not differentiable so we don't use it during training 58 | # We get a smooth F2 score valid for real values and not only 0/1 59 | def torch_f2_score(y_true, y_pred): 60 | return torch_fbeta_score(y_true, y_pred, 2) 61 | 62 | def torch_fbeta_score(y_true, y_pred, beta, eps=1e-9): 63 | beta2 = beta**2 64 | 65 | y_true = y_true.float() 66 | 67 | true_positive = (y_pred * y_true).sum(dim=1) 68 | precision = true_positive.div(y_pred.sum(dim=1).add(eps)) 69 | recall = true_positive.div(y_true.sum(dim=1).add(eps)) 70 | 71 | return torch.mean( 72 | (precision*recall). 73 | div(precision.mul(beta2) + recall + eps). 74 | mul(1 + beta2)) 75 | 76 | 77 | class SmoothF2Loss(nn.Module): 78 | def __init__(self): 79 | super(MeanF2Loss, self).__init__() 80 | 81 | def forward(self, input, target): 82 | return 1 - torch_f2_score(target, torch.sigmoid(input)) -------------------------------------------------------------------------------- /Ideas.txt: -------------------------------------------------------------------------------- 1 | ## Data sources: 2 | https://github.com/CreativeInquiry/terrapattern 3 | https://github.com/nealjean/predicting-poverty 4 | 5 | ## Weather 6 | https://www.kaggle.com/c/planet-understanding-the-amazon-from-space/data 7 | Each chip will have one and potentially more than one atmospheric label and zero or more common and rare labels. Chips that are labeled as cloudy should have no other labels, but there may be labeling errors. 8 | 9 | Cloud Cover Labels 10 | 11 | Clouds are a major challenge for passive satellite imaging, and daily cloud cover and rain showers in the Amazon basin can significantly complicate monitoring in the area. For this reason we have chosen to include a cloud cover label for each chip. These labels closely mirror what one would see in a local weather forecast: clear, partly cloudy, cloudy, and haze. For our purposes haze is defined as any chip where atmospheric clouds are visible but they are not so opaque as to obscure the ground. Clear scenes show no evidence of clouds, and partly cloudy scenes can show opaque cloud cover over any portion of the image. Cloudy images have 90% of the chip obscured by opaque cloud cover. 12 | 13 | => Probably you can't be clear and cloudy and partly cloudy at the same time 14 | ==> Separate the output in a softmax + Sigmoid activation? 15 | ==> Use RNN to compute the dependency? 16 | 17 | ## Deal with Imbalance: 18 | 19 | - Penalization: change cost so that NN pays more attention to underrepresented classes 20 | 21 | 22 | ## Loss function: 23 | - Which loss function for multilabel instead of BCE? 24 | - WARP loss? 25 | 26 | 27 | ## Thresholding 28 | - Remove thresholding all together with an end to end learner 29 | 30 | ## Architecture 31 | Have a RNN that understand intensity/correlation "partly" 32 | 33 | 34 | - PyTorch Image captioning (Neural Talk) 35 | - RNN+CNN Multilabel classification 36 | 37 | ## Forum: 38 | - CNN-RNN implementation 39 | https://github.com/fchollet/keras/issues/5146 40 | 41 | ## Papers: 42 | - DL - Imbalanced dataset - kNN cluster + Quintuplet hinge loss 43 | https://pdfs.semanticscholar.org/69a6/8f9cf874c69e2232f47808016c2736b90c35.pdf 44 | 45 | - Multilabel ranking 46 | https://arxiv.org/abs/1312.4894 47 | 48 | - Multilabel classification for fashion search 49 | https://openreview.net/pdf?id=HyWDCXjgx 50 | 51 | - CNN+RNN Unified Arch for multilabel 52 | https://www.ics.uci.edu/~yyang8/research/cnn-rnn/cnn-rnn-cvpr2016.pdf 53 | 54 | # Overviews: 55 | - RNN + CNN combo 56 | https://wiki.tum.de/display/lfdv/Recurrent+Neural+Networks+-+Combination+of+RNN+and+CNN 57 | 58 | # Done 59 | ===> Optimize threshold with L-BFGS-B 60 | ===> search global minimum with basinhoping ? 61 | ===> Resampling to deal with imbalance - To be done with care, thresholds became 0 and 1 for certain classes 62 | ===> Data augmentation with color and affine transforms - Zoom might have adverse effect 63 | 64 | # Done and not used 65 | ===> Tried using a smooth F2 based loss function (no threshold), the network performance on F2 score was reduced by 0.03 from the start to finish (0.85 --> 0.88) 66 | ===> Apparently cross-entropy have very nice properties when used together with sigmoid function and F2 score does not have those. 67 | -------------------------------------------------------------------------------- /baseline/001-keras-baseline-0.80752.py: -------------------------------------------------------------------------------- 1 | import numpy as np # linear algebra 2 | import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) 3 | import os 4 | import gc 5 | 6 | import keras as k 7 | from keras.models import Sequential, load_model 8 | from keras.layers import Dense, Dropout, Flatten, BatchNormalization 9 | from keras.layers import Conv2D, MaxPooling2D 10 | from sklearn.preprocessing import MultiLabelBinarizer 11 | 12 | import cv2 13 | from tqdm import tqdm 14 | 15 | RESOLUTION = 128 16 | CACHE_FILE = '001-baseline-cache.h5' 17 | THRESHOLD = 0.2 18 | 19 | df_train = pd.read_csv('../data/train.csv') 20 | 21 | mlb = MultiLabelBinarizer() 22 | X_train = [] 23 | X_test = [] 24 | df_train = pd.read_csv('../data/train.csv') 25 | y_train = mlb.fit_transform(df_train['tags'].str.split()) 26 | 27 | for file in tqdm(df_train['image_name'], miniters=256): 28 | img = cv2.imread('../data/train-jpg/{}.jpg'.format(file)) 29 | X_train.append(cv2.resize(img,(RESOLUTION,RESOLUTION))) 30 | 31 | X_train = np.array(X_train, np.float16) / 255. ## TODO load per batch to avoid memory error here 32 | 33 | print(X_train.shape) 34 | print(y_train.shape) 35 | 36 | split = 15000 37 | x_train, x_valid, y_train, y_valid = X_train[:split], X_train[split:], y_train[:split], y_train[split:] 38 | 39 | model = Sequential() 40 | model.add(Conv2D(32, kernel_size=(3, 3), 41 | activation='relu', 42 | input_shape=(RESOLUTION,RESOLUTION, 3))) 43 | model.add(BatchNormalization()) 44 | model.add(Conv2D(64, (3, 3), activation='relu')) 45 | model.add(BatchNormalization()) 46 | model.add(MaxPooling2D(pool_size=(2, 2))) 47 | model.add(BatchNormalization()) 48 | model.add(Flatten()) 49 | model.add(Dense(128, activation='relu')) 50 | model.add(BatchNormalization()) 51 | model.add(Dense(17, activation='sigmoid')) 52 | 53 | model.compile(loss='binary_crossentropy', # We NEED binary here, since categorical_crossentropy l1 norms the output before calculating loss. 54 | optimizer='adam', 55 | metrics=['accuracy']) 56 | 57 | 58 | 59 | if os.path.isfile(CACHE_FILE): 60 | print('####### Loading model from cache ######') 61 | model = load_model(CACHE_FILE) 62 | 63 | else: 64 | print('####### Cache not found, building from scratch ######') 65 | model.fit(x_train, y_train, 66 | batch_size=64, 67 | epochs=6, # Should implement early stopping 68 | verbose=1, 69 | validation_data=(x_valid, y_valid)) 70 | model.save(CACHE_FILE) 71 | 72 | from sklearn.metrics import fbeta_score 73 | 74 | p_valid = model.predict(x_valid, batch_size=128) 75 | print(y_valid) 76 | print(p_valid) 77 | print(fbeta_score(y_valid, np.array(p_valid) > THRESHOLD, beta=2, average='samples')) 78 | 79 | 80 | ######## Prediction ######## 81 | 82 | df_test = pd.read_csv('../data/sample_submission.csv') 83 | 84 | for file in tqdm(df_test['image_name'], miniters=256): 85 | img = cv2.imread('../data/test-jpg/{}.jpg'.format(file)) 86 | X_test.append(cv2.resize(img,(RESOLUTION,RESOLUTION))) 87 | 88 | 89 | X_test = np.array(X_test, np.float16) / 255. 90 | 91 | y_pred = model.predict(X_test, batch_size=128) 92 | # np.savetxt("pred-baseline.csv", y_pred, delimiter=";") 93 | 94 | df_submission = pd.DataFrame() 95 | df_submission['image_name'] = df_test['image_name'] 96 | df_submission['tags'] = [' '.join(x) for x in mlb.inverse_transform(y_pred > THRESHOLD)] 97 | 98 | df_submission.to_csv('001-baseline.csv', index=False) 99 | -------------------------------------------------------------------------------- /baseline/unfinished_attempts/002-Keras-Inception-Transfer.py: -------------------------------------------------------------------------------- 1 | from keras.applications.inception_v3 import InceptionV3 2 | 3 | from keras.models import Model, load_model 4 | from keras.layers import Dense, Flatten, Input, BatchNormalization 5 | from keras import optimizers 6 | from sklearn.model_selection import train_test_split 7 | import pandas as pd 8 | from sklearn.metrics import fbeta_score 9 | from tqdm import tqdm 10 | import cv2 11 | import numpy as np 12 | import os 13 | from sklearn.preprocessing import MultiLabelBinarizer 14 | 15 | RESOLUTION = 96 16 | CACHE_FILE = '002-inception-baseline-cache.h5' 17 | THRESHOLD = 0.2 18 | 19 | def build_model(): 20 | #Create own input format 21 | model_input = Input(shape=(RESOLUTION,RESOLUTION,3),name = 'image_input') 22 | 23 | #Load Inception v3 24 | base_model = InceptionV3(weights='imagenet', include_top=False) 25 | for layer in base_model.layers: 26 | layer.trainable = False 27 | 28 | x = base_model(model_input) 29 | feat = Flatten(name='flatten')(x) 30 | feat = Dense(128, activation='relu')(feat) 31 | feat = BatchNormalization()(feat) 32 | out = Dense(17, activation='sigmoid')(feat) 33 | model = Model(inputs=model_input, outputs=out) 34 | 35 | model.compile(loss='binary_crossentropy', 36 | optimizer=optimizers.SGD(lr=1e-4, momentum=0.9), 37 | metrics=['accuracy']) 38 | 39 | 40 | print('######## Summary ########') 41 | model.summary() 42 | print('\n\n\n######## Config ########') 43 | model.get_config() 44 | print('\n\n\n######## ###### ########') 45 | 46 | return model 47 | 48 | mlb = MultiLabelBinarizer() 49 | X_train = [] 50 | X_test = [] 51 | df_train = pd.read_csv('../data/train.csv') 52 | y_train = mlb.fit_transform(df_train['tags'].str.split()) 53 | 54 | 55 | for file in tqdm(df_train['image_name'], miniters=256): 56 | img = cv2.imread('../data/train-jpg/{}.jpg'.format(file)) 57 | X_train.append(cv2.resize(img,(RESOLUTION,RESOLUTION))) 58 | 59 | X_train = np.array(X_train, np.float16) / 255. ## TODO load per batch to avoid memory error here 60 | 61 | print(X_train.shape) 62 | print(y_train.shape) 63 | 64 | ######## Validation ######## 65 | x_trn, x_val, y_trn, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42) 66 | 67 | if os.path.isfile(CACHE_FILE): 68 | print('####### Loading model from cache ######') 69 | model = load_model(CACHE_FILE) 70 | 71 | else: 72 | print('####### Cache not found, building from scratch ######') 73 | model = build_model() 74 | model.fit(x_trn, y_trn, 75 | batch_size=64, 76 | epochs=15, 77 | verbose=1, 78 | validation_data=(x_val, y_val)) 79 | model.save(CACHE_FILE) 80 | 81 | 82 | p_valid = model.predict(x_val, batch_size=128) 83 | print(y_val) 84 | print(p_valid) 85 | print(fbeta_score(y_val, np.array(p_valid) > THRESHOLD, beta=2, average='samples')) 86 | 87 | ######## Prediction ######## 88 | 89 | df_test = pd.read_csv('../data/sample_submission.csv') 90 | 91 | for file in tqdm(df_test['image_name'], miniters=256): 92 | img = cv2.imread('../data/test-jpg/{}.jpg'.format(file)) 93 | X_test.append(cv2.resize(img,(RESOLUTION,RESOLUTION))) 94 | 95 | 96 | X_test = np.array(X_test, np.float16) / 255. 97 | 98 | y_pred = model.predict(X_test, batch_size=128) 99 | 100 | df_submission = pd.DataFrame() 101 | df_submission['image_name'] = df_test['image_name'] 102 | df_submission['tags'] = [' '.join(x) for x in mlb.inverse_transform(y_pred > THRESHOLD)] 103 | 104 | df_submission.to_csv('002-inception-baseline.csv', index=False) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Amazon Forest Computer Vision 2 | Satellite Image tagging code using PyTorch / Keras 3 | 4 | Here is a sample of images we had to work with 5 | 6 | ![](media/chipdesc.jpg) 7 | ![](media/chips.jpg) 8 | ![](media/agg1.jpg) 9 | ![](media/cloudy_1.jpg) 10 | ![](media/habitation1.jpg) 11 | ![](media/haze1.jpg) 12 | ![](media/pc1.jpg) 13 | ![](media/river.jpg) 14 | ![](media/road.jpg) 15 | 16 | _Source: https://www.kaggle.com/c/planet-understanding-the-amazon-from-space/data_ 17 | 18 | > Note: the repo was developed in May 2017 on PyTorch 0.1. PyTorch was publicly announced in January 2017 and has seen tremendous changes since then. 19 | 20 | You will find: 21 | - [A script that output the mean and stddev of your image if you want to train from scratch](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/compute-mean-std.py#L28) 22 | 23 | - [Using weighted loss function](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/main_pytorch.py#L61) 24 | 25 | - [Logging your experiment](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/main_pytorch.py#L89) 26 | 27 | - [Composing data augmentations](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/main_pytorch.py#L103), also [here](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p_data_augmentation.py#L181). 28 | Note use [Pillow-SIMD](https://python-pillow.org/pillow-perf/) instead of PIL/Pillow. It is even faster than OpenCV 29 | 30 | - [Loading from a CSV that contains image path - 61 lines yeah](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p2_dataload.py#L23) 31 | 32 | - [Equivalent in Keras - 216 lines ugh](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/k_dataloader.py). Note: so much lines were needed because by default in Keras you either have the data augmentation with ImageDataGenerator or lazy loading of images with "flow_from_directory" and there is no flow_from_csv 33 | 34 | - [Model finetuning with custom PyCaffe weights](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p_neuro.py#L139) 35 | 36 | - Train_test_split, [PyTorch version](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p_model_selection.py#L4) and [Keras version](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/k_model_selection.py#L4) 37 | 38 | - [Weighted sampling training so that the model view rare cases more often](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/main_pytorch.py#L131-L140) 39 | 40 | - [Custom Sampler creation, example for the balanced sampler](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p_sampler.py) 41 | 42 | - [Saving snapshots each epoch](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/main_pytorch.py#L171) 43 | 44 | - [Loading the best snapshot for prediction](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/pytorch_predict_only.py#L83) 45 | 46 | - [Failed word embeddings experiments](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/Embedding-RNN-Autoencoder.ipynb) to [combine image and text data](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/Dual_Feed_Image_Label.ipynb) 47 | 48 | - [Combined weighted loss function (softmax for unique weather tags, BCE for multilabel tags)](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p2_loss.py#L36) 49 | 50 | - [Selecting the best F2-threshold](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p2_metrics.py#L38) via stochastic search at the end of each epoch to [maximize validation score](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/526128239a6abcbb32fbf5b34ed8cc7a3cd87c4e/src/p2_validation.py#L49). This is then saved along model parameter. 51 | 52 | - [CNN-RNN combination (work in progress)](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p3_neuroRNN.py#L10) 53 | -------------------------------------------------------------------------------- /pytorch_predict_only.py: -------------------------------------------------------------------------------- 1 | ## Custom Imports 2 | from src.p_dataload import KaggleAmazonDataset 3 | from src.p_neuro import Net, ResNet50, DenseNet121 4 | from src.p_training import train, snapshot 5 | from src.p_validation import validate 6 | from src.p_model_selection import train_valid_split 7 | from src.p_logger import setup_logs 8 | from src.p_prediction import predict, output 9 | from src.p_data_augmentation import ColorJitter 10 | 11 | ## Utilities 12 | import random 13 | import logging 14 | import time 15 | from timeit import default_timer as timer 16 | import os 17 | 18 | ## Libraries 19 | import numpy as np 20 | 21 | ## Torch 22 | import torch.optim as optim 23 | import torch.nn.functional as F 24 | from torchvision import transforms 25 | from torch.utils.data import DataLoader 26 | from torch.utils.data.sampler import SubsetRandomSampler 27 | import torch 28 | 29 | 30 | ############################################################################ 31 | ####### CONTROL CENTER ############# STAR COMMAND ######################### 32 | 33 | # Run name 34 | run_name = "2017-05-04_1730-thresh_densenet121-predict-only" 35 | 36 | model = DenseNet121(17).cuda() 37 | batch_size = 32 38 | 39 | ## Normalization on dataset mean/std 40 | # normalize = transforms.Normalize(mean=[0.30249774, 0.34421161, 0.31507745], 41 | # std=[0.13718569, 0.14363895, 0.16695958]) 42 | 43 | ## Normalization on ImageNet mean/std for finetuning 44 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 45 | std=[0.229, 0.224, 0.225]) 46 | 47 | save_dir = './snapshots' 48 | 49 | ####### CONTROL CENTER ############# STAR COMMAND ######################### 50 | ############################################################################ 51 | 52 | if __name__ == "__main__": 53 | # Initiate timer 54 | global_timer = timer() 55 | 56 | # Setup logs 57 | logger = setup_logs(save_dir, run_name) 58 | 59 | # Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic) 60 | torch.manual_seed(1337) 61 | torch.cuda.manual_seed(1337) 62 | np.random.seed(1337) 63 | random.seed(1337) 64 | 65 | ## Normalization only for validation and test 66 | ds_transform_raw = transforms.Compose([ 67 | transforms.CenterCrop(224), 68 | transforms.ToTensor(), 69 | normalize 70 | ]) 71 | 72 | 73 | 74 | X_test = KaggleAmazonDataset('./data/sample_submission_v2.csv','./data/test-jpg/','.jpg', 75 | ds_transform_raw 76 | ) 77 | test_loader = DataLoader(X_test, 78 | batch_size=batch_size, 79 | num_workers=4, 80 | pin_memory=True) 81 | 82 | # Load model from best iteration 83 | model_path = './snapshots/2017-05-04_1730-thresh_densenet121-model_best.pth' 84 | logger.info('===> loading {} for prediction'.format(model_path)) 85 | checkpoint = torch.load(model_path) 86 | model.load_state_dict(checkpoint['state_dict']) 87 | 88 | # Predict 89 | predictions = predict(test_loader, model) # TODO load model from the best on disk 90 | 91 | # Output 92 | X_train = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg') 93 | 94 | 95 | output(predictions, 96 | checkpoint['threshold'], 97 | X_test, 98 | X_train.getLabelEncoder(), 99 | './out', 100 | '2017-05-04_1730-thresh_densenet121', 101 | checkpoint['best_score']) 102 | 103 | ########################################################## 104 | 105 | end_global_timer = timer() 106 | logger.info("################## Success #########################") 107 | logger.info("Total elapsed time: %s" % (end_global_timer - global_timer)) -------------------------------------------------------------------------------- /main_keras.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from keras.models import Sequential, load_model 5 | from keras.layers import Dense, Dropout, Flatten, BatchNormalization 6 | from keras.layers import Conv2D, MaxPooling2D 7 | 8 | from timeit import default_timer as timer 9 | from src.k_dataloader import AmazonGenerator 10 | from src.k_model_selection import train_valid_split 11 | 12 | from sklearn.metrics import fbeta_score 13 | 14 | RESOLUTION = 256 15 | 16 | if __name__ == "__main__": 17 | # Initiate timer 18 | global_timer = timer() 19 | 20 | # Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic) 21 | np.random.seed(1337) 22 | 23 | model = Sequential() 24 | model.add(Conv2D(32, kernel_size=(3, 3), 25 | activation='relu', 26 | input_shape=(RESOLUTION,RESOLUTION, 3))) 27 | model.add(BatchNormalization()) 28 | model.add(Conv2D(64, (3, 3), activation='relu')) 29 | model.add(BatchNormalization()) 30 | model.add(MaxPooling2D(pool_size=(2, 2))) 31 | model.add(BatchNormalization()) 32 | model.add(Flatten()) 33 | model.add(Dense(96, activation='relu')) 34 | model.add(BatchNormalization()) 35 | model.add(Dense(17, activation='sigmoid')) 36 | 37 | model.compile(loss='binary_crossentropy', 38 | optimizer='adam', 39 | metrics=['accuracy']) 40 | 41 | train_gen = AmazonGenerator(featurewise_center=True, 42 | featurewise_std_normalization=True, 43 | width_shift_range=0.15, 44 | horizontal_flip=True, 45 | rotation_range=15, 46 | rescale=1./255 47 | ) 48 | 49 | valid_gen = AmazonGenerator(featurewise_center=True, 50 | featurewise_std_normalization=True, 51 | rescale=1./255) 52 | 53 | # train_gen.fit_from_csv('./data/train.csv', 54 | # './data/train-jpg/', 55 | # '.jpg', 56 | # rescale=1./255, 57 | # target_size=(RESOLUTION,RESOLUTION)) 58 | 59 | # train_gen.dump_dataset_mean_std('train_256_mean.npy', 'train_256_std.npy') 60 | train_gen.load_mean_std('train_256_mean.npy', 'train_256_std.npy') 61 | valid_gen.load_mean_std('train_256_mean.npy', 'train_256_std.npy') 62 | 63 | df_train = pd.read_csv('./data/train.csv') 64 | 65 | trn_idx, val_idx = train_valid_split(df_train, 0.2) 66 | 67 | batch_size = 32 68 | 69 | x_trn = train_gen.flow_from_df(df_train.iloc[trn_idx].reset_index(), 70 | './data/train-jpg/', 71 | '.jpg', 72 | mode='fit', 73 | batch_size=batch_size) 74 | x_val = valid_gen.flow_from_df(df_train.iloc[val_idx].reset_index(), 75 | './data/train-jpg/', 76 | '.jpg', 77 | mode='predict', 78 | batch_size=batch_size) 79 | model.fit_generator(x_trn, 80 | steps_per_epoch = len(trn_idx) / batch_size, 81 | epochs=1, 82 | workers=6, pickle_safe=True 83 | ) 84 | 85 | ypreds = model.predict_generator(x_val, 86 | steps = len(val_idx)/batch_size, 87 | workers=6, pickle_safe=True 88 | ) 89 | 90 | mlb = train_gen.getLabelEncoder() 91 | predictions = ypreds > 0.2 92 | true_labels = mlb.transform(df_train['tags'].iloc[val_idx].values) 93 | 94 | score=fbeta_score(true_labels, predictions, beta=2, average='samples') 95 | 96 | end_global_timer = timer() 97 | print("################## Success #########################") 98 | print("Total elapsed time: %s" % (end_global_timer - global_timer)) -------------------------------------------------------------------------------- /src/p3_neuroRNN.py: -------------------------------------------------------------------------------- 1 | from torch import nn, ones 2 | from torch.autograd import Variable 3 | from torchvision import models 4 | from torch.nn.init import kaiming_normal 5 | from torch import np 6 | import torch 7 | import torch.nn.functional as F 8 | 9 | 10 | class GRU_ResNet50(nn.Module): 11 | ## We use ResNet weights from PyCaffe. 12 | def __init__(self, num_classes, hidden_size, num_layers): 13 | super(GRU_ResNet50, self).__init__() 14 | 15 | # Loading ResNet arch from PyTorch and weights from Pycaffe 16 | original_model = models.resnet50(pretrained=False) 17 | original_model.load_state_dict(torch.load('./zoo/resnet50.pth')) 18 | 19 | # Everything except the last linear layer 20 | self.features = nn.Sequential(*list(original_model.children())[:-1]) 21 | 22 | # Get number of features of last layer 23 | num_feats = original_model.fc.in_features 24 | 25 | self.bn = nn.BatchNorm1d(num_feats, momentum=0.01) 26 | 27 | self.hidden_size = hidden_size 28 | self.rnn = nn.GRU(input_size=num_feats, 29 | hidden_size=hidden_size, 30 | num_layers=num_layers, 31 | batch_first = True) 32 | 33 | # Plug our classifier 34 | self.classifier = nn.Sequential( 35 | nn.Linear(hidden_size, num_classes) 36 | ) 37 | 38 | # Init of last layer 39 | for m in self.classifier: 40 | kaiming_normal(m.weight) 41 | self.bn.weight.data.fill_(1) 42 | self.bn.bias.data.zero_() 43 | # How to init RNN? 44 | 45 | # Freeze those weights 46 | # for p in self.features.parameters(): 47 | # p.requires_grad = False 48 | 49 | def forward(self, x, hidden=None): 50 | f = self.features(x) 51 | f = self.bn(f.view(f.size(0), -1)) 52 | f = f.unsqueeze(1) 53 | x, hidden = self.rnn(f, hidden) 54 | x = x.view(-1, self.hidden_size) 55 | y = self.classifier(x) 56 | return y 57 | 58 | class LSTM_ResNet50(nn.Module): 59 | ## We use ResNet weights from PyCaffe. 60 | def __init__(self, num_classes, hidden_size, num_layers): 61 | super(LSTM_ResNet50, self).__init__() 62 | 63 | # Loading ResNet arch from PyTorch and weights from Pycaffe 64 | original_model = models.resnet50(pretrained=False) 65 | original_model.load_state_dict(torch.load('./zoo/resnet50.pth')) 66 | 67 | # Everything except the last linear layer 68 | self.features = nn.Sequential(*list(original_model.children())[:-1]) 69 | 70 | # Get number of features of last layer 71 | num_feats = original_model.fc.in_features 72 | 73 | self.bn = nn.BatchNorm1d(num_feats, momentum=0.01) 74 | 75 | self.hidden_size = hidden_size 76 | self.rnn = nn.LSTM(input_size=num_feats, 77 | hidden_size=hidden_size, 78 | num_layers=num_layers, 79 | batch_first = True) 80 | 81 | # Plug our classifier 82 | self.classifier = nn.Sequential( 83 | nn.Linear(hidden_size, num_classes) 84 | ) 85 | 86 | # Init of last layer 87 | for m in self.classifier: 88 | kaiming_normal(m.weight) 89 | self.bn.weight.data.fill_(1) 90 | self.bn.bias.data.zero_() 91 | 92 | # How to init RNN? 93 | 94 | # Freeze those weights 95 | # for p in self.features.parameters(): 96 | # p.requires_grad = False 97 | 98 | def forward(self, x, hidden=None): 99 | f = self.features(x) 100 | f = self.bn(f.view(f.size(0), -1)) 101 | f = f.unsqueeze(1) 102 | x, hidden = self.rnn(f, hidden) 103 | x = x.view(-1, self.hidden_size) 104 | y = self.classifier(x) 105 | return y 106 | 107 | 108 | class Skip_LSTM_RN50(nn.Module): 109 | ## We use ResNet weights from PyCaffe. 110 | def __init__(self, num_classes, hidden_size, num_layers): 111 | super(Skip_LSTM_RN50, self).__init__() 112 | 113 | # Loading ResNet arch from PyTorch and weights from Pycaffe 114 | original_model = models.resnet50(pretrained=False) 115 | original_model.load_state_dict(torch.load('./zoo/resnet50.pth')) 116 | 117 | # Everything except the last linear layer 118 | self.features = nn.Sequential(*list(original_model.children())[:-1]) 119 | 120 | # Get number of features of last layer 121 | num_feats = original_model.fc.in_features 122 | 123 | self.bn = nn.BatchNorm1d(num_feats, momentum=0.01) 124 | 125 | self.hidden_size = hidden_size 126 | self.rnn = nn.LSTM(input_size=num_feats, 127 | hidden_size=hidden_size, 128 | num_layers=num_layers, 129 | batch_first = True) 130 | 131 | # Plug our classifier 132 | self.classifier = nn.Sequential( 133 | nn.Linear(hidden_size + num_feats, num_classes) 134 | ) 135 | 136 | # Init of last layer 137 | for m in self.classifier: 138 | kaiming_normal(m.weight) 139 | self.bn.weight.data.fill_(1) 140 | self.bn.bias.data.zero_() 141 | 142 | # How to init RNN? 143 | 144 | # Freeze those weights 145 | # for p in self.features.parameters(): 146 | # p.requires_grad = False 147 | 148 | def forward(self, x, hidden=None): 149 | f = self.features(x) 150 | f = self.bn(f.view(f.size(0), -1)) 151 | x, hidden = self.rnn(f.unsqueeze(1), hidden) 152 | x = x.view(-1, self.hidden_size) 153 | c = torch.cat((x,f),1) # Skip connection to avoid the LSTM eating the whole gradients 154 | y = self.classifier(c) 155 | return y -------------------------------------------------------------------------------- /src/p_data_augmentation.py: -------------------------------------------------------------------------------- 1 | ## Additional transforms for PyTorch data augmentation 2 | ## It is very recommended to use Pillow-SIMD for speed gain in the 5x range. 3 | ## https://python-pillow.org/pillow-perf/ 4 | ## OpenCV built with IPP and TBB is also fast but inaccurate 5 | 6 | import torch 7 | import random 8 | import PIL.ImageEnhance as ie 9 | import PIL.Image as im 10 | 11 | 12 | class Lighting(object): 13 | """Lighting noise(AlexNet - style PCA - based noise)""" 14 | 15 | def __init__(self, alphastd, eigval, eigvec): 16 | self.alphastd = alphastd 17 | self.eigval = eigval 18 | self.eigvec = eigvec 19 | 20 | def __call__(self, img): 21 | if self.alphastd == 0: 22 | return img 23 | 24 | alpha = img.new().resize_(3).normal_(0, self.alphastd) 25 | rgb = self.eigvec.type_as(img).clone()\ 26 | .mul(alpha.view(1, 3).expand(3, 3))\ 27 | .mul(self.eigval.view(1, 3).expand(3, 3))\ 28 | .sum(1).squeeze() 29 | 30 | return img.add(rgb.view(3, 1, 1).expand_as(img)) 31 | 32 | 33 | class Grayscale(object): 34 | 35 | def __call__(self, img): 36 | gs = img.clone() 37 | gs[0].mul_(0.299).add_(0.587, gs[1]).add_(0.114, gs[2]) 38 | gs[1].copy_(gs[0]) 39 | gs[2].copy_(gs[0]) 40 | return gs 41 | 42 | 43 | class Saturation(object): 44 | 45 | def __init__(self, var): 46 | self.var = var 47 | 48 | def __call__(self, img): 49 | gs = Grayscale()(img) 50 | alpha = random.uniform(0, self.var) 51 | return img.lerp(gs, alpha) 52 | 53 | 54 | class Brightness(object): 55 | 56 | def __init__(self, var): 57 | self.var = var 58 | 59 | def __call__(self, img): 60 | gs = img.new().resize_as_(img).zero_() 61 | alpha = random.uniform(0, self.var) 62 | return img.lerp(gs, alpha) 63 | 64 | 65 | class Contrast(object): 66 | 67 | def __init__(self, var): 68 | self.var = var 69 | 70 | def __call__(self, img): 71 | gs = Grayscale()(img) 72 | gs.fill_(gs.mean()) 73 | alpha = random.uniform(0, self.var) 74 | return img.lerp(gs, alpha) 75 | 76 | 77 | class RandomOrder(object): 78 | """ Composes several transforms together in random order. 79 | """ 80 | 81 | def __init__(self, transforms): 82 | self.transforms = transforms 83 | 84 | def __call__(self, img): 85 | if self.transforms is None: 86 | return img 87 | order = torch.randperm(len(self.transforms)) 88 | for i in order: 89 | img = self.transforms[i](img) 90 | return img 91 | 92 | 93 | class ColorJitter(RandomOrder): 94 | 95 | def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4): 96 | self.transforms = [] 97 | if brightness != 0: 98 | self.transforms.append(Brightness(brightness)) 99 | if contrast != 0: 100 | self.transforms.append(Contrast(contrast)) 101 | if saturation != 0: 102 | self.transforms.append(Saturation(saturation)) 103 | 104 | class RandomFlip(object): 105 | """Randomly flips the given PIL.Image with a probability of 0.25 horizontal, 106 | 0.25 vertical, 107 | 0.5 as is 108 | """ 109 | 110 | def __call__(self, img): 111 | dispatcher = { 112 | 0: img, 113 | 1: img, 114 | 2: img.transpose(im.FLIP_LEFT_RIGHT), 115 | 3: img.transpose(im.FLIP_TOP_BOTTOM) 116 | } 117 | 118 | return dispatcher[random.randint(0,3)] #randint is inclusive 119 | 120 | class RandomRotate(object): 121 | """Randomly rotate the given PIL.Image with a probability of 1/6 90°, 122 | 1/6 180°, 123 | 1/6 270°, 124 | 1/2 as is 125 | """ 126 | 127 | def __call__(self, img): 128 | dispatcher = { 129 | 0: img, 130 | 1: img, 131 | 2: img, 132 | 3: img.transpose(im.ROTATE_90), 133 | 4: img.transpose(im.ROTATE_180), 134 | 5: img.transpose(im.ROTATE_270) 135 | } 136 | 137 | return dispatcher[random.randint(0,5)] #randint is inclusive 138 | 139 | class PILColorBalance(object): 140 | 141 | def __init__(self, var): 142 | self.var = var 143 | 144 | def __call__(self, img): 145 | alpha = random.uniform(1 - self.var, 1 + self.var) 146 | return ie.Color(img).enhance(alpha) 147 | 148 | class PILContrast(object): 149 | 150 | def __init__(self, var): 151 | self.var = var 152 | 153 | def __call__(self, img): 154 | alpha = random.uniform(1 - self.var, 1 + self.var) 155 | return ie.Contrast(img).enhance(alpha) 156 | 157 | 158 | class PILBrightness(object): 159 | 160 | def __init__(self, var): 161 | self.var = var 162 | 163 | def __call__(self, img): 164 | alpha = random.uniform(1 - self.var, 1 + self.var) 165 | return ie.Brightness(img).enhance(alpha) 166 | 167 | class PILSharpness(object): 168 | 169 | def __init__(self, var): 170 | self.var = var 171 | 172 | def __call__(self, img): 173 | alpha = random.uniform(1 - self.var, 1 + self.var) 174 | return ie.Sharpness(img).enhance(alpha) 175 | 176 | 177 | # Check ImageEnhancer effect: https://www.youtube.com/watch?v=_7iDTpTop04 178 | # Not documented but all enhancements can go beyond 1.0 to 2 179 | # Image must be RGB 180 | # Use Pillow-SIMD because Pillow is too slow 181 | class PowerPIL(RandomOrder): 182 | def __init__(self, rotate=True, 183 | flip=True, 184 | colorbalance=0.4, 185 | contrast=0.4, 186 | brightness=0.4, 187 | sharpness=0.4): 188 | self.transforms = [] 189 | if rotate: 190 | self.transforms.append(RandomRotate()) 191 | if flip: 192 | self.transforms.append(RandomFlip()) 193 | if brightness != 0: 194 | self.transforms.append(PILBrightness(brightness)) 195 | if contrast != 0: 196 | self.transforms.append(PILContrast(contrast)) 197 | if colorbalance != 0: 198 | self.transforms.append(PILColorBalance(colorbalance)) 199 | if sharpness != 0: 200 | self.transforms.append(PILSharpness(sharpness)) -------------------------------------------------------------------------------- /src/p_neuro.py: -------------------------------------------------------------------------------- 1 | from torch import nn, ones 2 | from torchvision import models 3 | from torch.nn.init import kaiming_normal 4 | from torch import np 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | 9 | ## Custom baseline 10 | class Net(nn.Module): 11 | def __init__(self, input_size=(3,224,224), nb_classes=17): 12 | 13 | super(Net, self).__init__() 14 | 15 | self.features = nn.Sequential( 16 | nn.Conv2d(3,32,3), 17 | nn.BatchNorm2d(32), 18 | nn.ReLU(), 19 | nn.Conv2d(32,64,3), 20 | nn.BatchNorm2d(64), 21 | nn.ReLU(), 22 | nn.MaxPool2d((3,3)) 23 | ) 24 | 25 | ## Compute linear layer size 26 | self.flat_feats = self._get_flat_feats(input_size, self.features) 27 | 28 | self.classifier = nn.Sequential( 29 | nn.Linear(self.flat_feats, 256), 30 | nn.BatchNorm1d(256), 31 | nn.ReLU(), 32 | nn.Dropout(p=0.15), 33 | nn.Linear(256, 64), 34 | nn.BatchNorm1d(64), 35 | nn.ReLU(), 36 | nn.Dropout(p=0.10), 37 | nn.Linear(64, nb_classes) 38 | ) 39 | 40 | ## Weights initialization 41 | def _weights_init(m): 42 | if isinstance(m, nn.Conv2d or nn.Linear): 43 | kaiming_normal(m.weight) 44 | elif isinstance(m, nn.BatchNorm2d or BatchNorm1d): 45 | m.weight.data.fill_(1) 46 | m.bias.data.zero_() 47 | 48 | self.apply(_weights_init) 49 | 50 | def _get_flat_feats(self, in_size, feats): 51 | f = feats(Variable(ones(1,*in_size))) 52 | return int(np.prod(f.size()[1:])) 53 | 54 | 55 | 56 | def forward(self, x): 57 | feats = self.features(x) 58 | flat_feats = feats.view(-1, self.flat_feats) 59 | out = self.classifier(flat_feats) 60 | return out 61 | 62 | 63 | ## ResNet fine-tuning 64 | class ResNet50(nn.Module): 65 | ## We use ResNet weights from PyCaffe. 66 | def __init__(self, num_classes): 67 | super(ResNet50, self).__init__() 68 | 69 | # Loading ResNet arch from PyTorch and weights from Pycaffe 70 | original_model = models.resnet50(pretrained=False) 71 | original_model.load_state_dict(torch.load('./zoo/resnet50.pth')) 72 | 73 | # Everything except the last linear layer 74 | self.features = nn.Sequential(*list(original_model.children())[:-1]) 75 | 76 | # Get number of features of last layer 77 | num_feats = original_model.fc.in_features 78 | 79 | # Plug our classifier 80 | self.classifier = nn.Sequential( 81 | nn.Linear(num_feats, num_classes) 82 | ) 83 | 84 | # Init of last layer 85 | for m in self.classifier: 86 | kaiming_normal(m.weight) 87 | 88 | # Freeze those weights 89 | # for p in self.features.parameters(): 90 | # p.requires_grad = False 91 | 92 | def forward(self, x): 93 | f = self.features(x) 94 | f = f.view(f.size(0), -1) 95 | y = self.classifier(f) 96 | return y 97 | 98 | class ResNet101(nn.Module): 99 | ## We use ResNet weights from PyCaffe. 100 | def __init__(self, num_classes): 101 | super(ResNet101, self).__init__() 102 | 103 | # Loading ResNet arch from PyTorch and weights from Pycaffe 104 | original_model = models.resnet101(pretrained=False) 105 | original_model.load_state_dict(torch.load('./zoo/resnet101.pth')) 106 | 107 | # Everything except the last linear layer 108 | self.features = nn.Sequential(*list(original_model.children())[:-1]) 109 | 110 | # Get number of features of last layer 111 | num_feats = original_model.fc.in_features 112 | 113 | # Plug our classifier 114 | self.classifier = nn.Sequential( 115 | nn.Linear(num_feats, num_classes) 116 | ) 117 | 118 | # Init of last layer 119 | for m in self.classifier: 120 | kaiming_normal(m.weight) 121 | 122 | # Freeze those weights 123 | # for p in self.features.parameters(): 124 | # p.requires_grad = False 125 | 126 | def forward(self, x): 127 | f = self.features(x) 128 | f = f.view(f.size(0), -1) 129 | y = self.classifier(f) 130 | return y 131 | 132 | class ResNet152(nn.Module): 133 | ## We use ResNet weights from PyCaffe. 134 | def __init__(self, num_classes): 135 | super(ResNet152, self).__init__() 136 | 137 | # Loading ResNet arch from PyTorch and weights from Pycaffe 138 | original_model = models.resnet152(pretrained=False) 139 | original_model.load_state_dict(torch.load('./zoo/resnet152.pth')) 140 | 141 | # Everything except the last linear layer 142 | self.features = nn.Sequential(*list(original_model.children())[:-1]) 143 | 144 | # Get number of features of last layer 145 | num_feats = original_model.fc.in_features 146 | 147 | # Plug our classifier 148 | self.classifier = nn.Sequential( 149 | nn.Linear(num_feats, num_classes) 150 | ) 151 | 152 | # Init of last layer 153 | for m in self.classifier: 154 | kaiming_normal(m.weight) 155 | 156 | # Freeze those weights 157 | # for p in self.features.parameters(): 158 | # p.requires_grad = False 159 | 160 | def forward(self, x): 161 | f = self.features(x) 162 | f = f.view(f.size(0), -1) 163 | y = self.classifier(f) 164 | return y 165 | 166 | ## VGG fine-tuning 167 | class VGG16(nn.Module): 168 | def __init__(self, nb_classes=17): 169 | super(VGG16, self).__init__() 170 | original_model = models.vgg16(pretrained=False) 171 | self.features = original_model.features 172 | self.classifier = nn.Sequential( 173 | nn.Dropout(), 174 | nn.Linear(25088, 4096), 175 | nn.ReLU(inplace=True), 176 | nn.Dropout(), 177 | nn.Linear(4096, 4096), 178 | nn.ReLU(inplace=True), 179 | nn.Linear(4096, num_classes), 180 | ) 181 | 182 | # Freeze Convolutional weights 183 | for p in self.features.parameters(): 184 | p.requires_grad = False 185 | 186 | def forward(self, x): 187 | f = self.features(x) 188 | f = f.view(f.size(0), -1) 189 | y = self.classifier(f) 190 | return y 191 | 192 | class DenseNet121(nn.Module): 193 | def __init__(self, num_classes): 194 | super(DenseNet121, self).__init__() 195 | 196 | original_model = models.densenet121(pretrained=True) 197 | 198 | # Everything except the last linear layer 199 | self.features = nn.Sequential(*list(original_model.children())[:-1]) 200 | 201 | # Get number of features of last layer 202 | num_feats = original_model.classifier.in_features 203 | 204 | # Plug our classifier 205 | self.classifier = nn.Sequential( 206 | nn.Linear(num_feats, num_classes) 207 | ) 208 | 209 | # Init of last layer 210 | for m in self.classifier: 211 | kaiming_normal(m.weight) 212 | 213 | # Freeze weights 214 | # for p in self.features.parameters(): 215 | # p.requires_grad = False 216 | 217 | def forward(self, x): 218 | f = self.features(x) 219 | out = F.relu(f, inplace=True) 220 | out = F.avg_pool2d(out, kernel_size=7).view(f.size(0), -1) 221 | out = self.classifier(out) 222 | return out -------------------------------------------------------------------------------- /main_pytorch-baseline.py: -------------------------------------------------------------------------------- 1 | ## Custom Imports 2 | from src.p_dataload import KaggleAmazonDataset 3 | from src.p_neuro import Net, ResNet50, ResNet101, DenseNet121 4 | from src.p_training import train, snapshot 5 | from src.p_validation import validate 6 | from src.p_model_selection import train_valid_split 7 | from src.p_logger import setup_logs 8 | from src.p_prediction import predict, output 9 | from src.p_data_augmentation import ColorJitter 10 | # from src.p_metrics import SmoothF2Loss 11 | from src.p_sampler import SubsetSampler, balance_weights 12 | 13 | ## Utilities 14 | import random 15 | import logging 16 | import time 17 | from timeit import default_timer as timer 18 | import os 19 | 20 | ## Libraries 21 | import numpy as np 22 | import math 23 | 24 | ## Torch 25 | import torch.optim as optim 26 | import torch.nn.functional as F 27 | from torchvision import transforms 28 | from torch.utils.data import DataLoader 29 | import torch 30 | from torchsample.transforms import Affine 31 | from torch.utils.data.sampler import WeightedRandomSampler, SubsetRandomSampler 32 | 33 | ############################################################################ 34 | ####### CONTROL CENTER ############# STAR COMMAND ######################### 35 | ## Variables setup 36 | model = ResNet50(17).cuda() 37 | # model = Net().cuda() 38 | # model = WideResNet(16, 17, 4, 0.3) 39 | # model = ResNet101(17).cuda() 40 | # model = DenseNet121(17).cuda() # Note: Until May 5 19:12 CEST DenseNet121 was actually ResNet50 :/ 41 | 42 | epochs = 30 43 | batch_size = 16 44 | 45 | # Run name 46 | run_name = time.strftime("%Y-%m-%d_%H%M-") + "BASELINE" 47 | 48 | ## Normalization on dataset mean/std 49 | # normalize = transforms.Normalize(mean=[0.30249774, 0.34421161, 0.31507745], 50 | # std=[0.13718569, 0.14363895, 0.16695958]) 51 | 52 | ## Normalization on ImageNet mean/std for finetuning 53 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 54 | std=[0.229, 0.224, 0.225]) 55 | 56 | # Note, p_training has lr_decay automated 57 | # optimizer = optim.Adam(model.parameters(), lr=0.1) # From scratch # Don't use Weight Decay with PReLU 58 | # optimizer = optim.SGD(model.parameters(), lr=1e-1, momentum=0.9, weight_decay=1e-4) # From scratch 59 | optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) # Finetuning whole model 60 | 61 | criterion = torch.nn.MultiLabelSoftMarginLoss() 62 | # criterion = SmoothF2Loss() # Using F2 directly as a cost function does 0.88 as a final cross validation. This is probably explained because cross-enropy is very efficient for sigmoid outputs (turning it into a convex problem). So keep Sigmoid + Cross entropy or something else + SmoothF2 63 | 64 | save_dir = './snapshots' 65 | 66 | ####### CONTROL CENTER ############# STAR COMMAND ######################### 67 | ############################################################################ 68 | 69 | if __name__ == "__main__": 70 | # Initiate timer 71 | global_timer = timer() 72 | 73 | # Setup logs 74 | logger = setup_logs(save_dir, run_name) 75 | 76 | # Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic) 77 | torch.manual_seed(1337) 78 | torch.cuda.manual_seed(1337) 79 | np.random.seed(1337) 80 | random.seed(1337) 81 | 82 | ############################################################## 83 | ## Loading the dataset 84 | 85 | ## Augmentation + Normalization for full training 86 | ds_transform_augmented = transforms.Compose([ 87 | transforms.RandomSizedCrop(224), 88 | transforms.RandomHorizontalFlip(), 89 | transforms.ToTensor(), 90 | ColorJitter(), 91 | normalize 92 | # Affine( 93 | # rotation_range = 15, 94 | # translation_range = (0.2,0.2), 95 | # shear_range = math.pi/6, 96 | # zoom_range=(0.7,1.4) 97 | # ) 98 | ]) 99 | 100 | ## Normalization only for validation and test 101 | ds_transform_raw = transforms.Compose([ 102 | transforms.Scale(224), 103 | transforms.ToTensor(), 104 | normalize 105 | ]) 106 | 107 | #### ######### ######## ########### ##### 108 | 109 | X_train = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg', 110 | ds_transform_augmented 111 | ) 112 | X_val = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg', 113 | ds_transform_raw 114 | ) 115 | 116 | # Resample the dataset 117 | # weights = balance_weights(X_train.getDF(), 'tags', X_train.getLabelEncoder()) 118 | # weights = np.clip(weights,0.02,0.2) # We need to let the net view the most common classes or learning is too slow 119 | 120 | # Creating a validation split 121 | train_idx, valid_idx = train_valid_split(X_train, 0.2) 122 | 123 | # weights[valid_idx] = 0 124 | 125 | # train_sampler = WeightedRandomSampler(weights, len(train_idx)) 126 | train_sampler = SubsetRandomSampler(train_idx) 127 | valid_sampler = SubsetSampler(valid_idx) 128 | 129 | ###### ########## ########## ######## ######### 130 | 131 | # Both dataloader loads from the same dataset but with different indices 132 | train_loader = DataLoader(X_train, 133 | batch_size=batch_size, 134 | sampler=train_sampler, 135 | num_workers=4, 136 | pin_memory=True) 137 | 138 | valid_loader = DataLoader(X_val, 139 | batch_size=batch_size, 140 | sampler=valid_sampler, 141 | num_workers=4, 142 | pin_memory=True) 143 | 144 | ########################################################### 145 | ## Start training 146 | best_score = 0. 147 | for epoch in range(epochs): 148 | epoch_timer = timer() 149 | 150 | # Train and validate 151 | train(epoch, train_loader, model, criterion, optimizer) 152 | score, loss, threshold = validate(epoch, valid_loader, model, criterion, X_train.getLabelEncoder()) 153 | # Save 154 | is_best = score > best_score 155 | best_score = max(score, best_score) 156 | snapshot(save_dir, run_name, is_best,{ 157 | 'epoch': epoch + 1, 158 | 'state_dict': model.state_dict(), 159 | 'best_score': best_score, 160 | 'optimizer': optimizer.state_dict(), 161 | 'threshold': threshold, 162 | 'val_loss': loss 163 | }) 164 | 165 | end_epoch_timer = timer() 166 | logger.info("#### End epoch {}, elapsed time: {}".format(epoch, end_epoch_timer - epoch_timer)) 167 | 168 | ########################################################### 169 | ## Prediction 170 | X_test = KaggleAmazonDataset('./data/sample_submission.csv','./data/test-jpg/','.jpg', 171 | ds_transform_raw 172 | ) 173 | test_loader = DataLoader(X_test, 174 | batch_size=batch_size, 175 | num_workers=4, 176 | pin_memory=True) 177 | 178 | # Load model from best iteration 179 | logger.info('===> loading best model for prediction') 180 | checkpoint = torch.load(os.path.join(save_dir, 181 | run_name + '-model_best.pth' 182 | ) 183 | ) 184 | model.load_state_dict(checkpoint['state_dict']) 185 | 186 | # Predict 187 | predictions = predict(test_loader, model) # TODO load model from the best on disk 188 | 189 | output(predictions, 190 | checkpoint['threshold'], 191 | X_test, 192 | X_train.getLabelEncoder(), 193 | './out', 194 | run_name, 195 | checkpoint['best_score']) # TODO early_stopping and use best_score 196 | 197 | ########################################################## 198 | 199 | end_global_timer = timer() 200 | logger.info("################## Success #########################") 201 | logger.info("Total elapsed time: %s" % (end_global_timer - global_timer)) 202 | -------------------------------------------------------------------------------- /main_pytorch.py: -------------------------------------------------------------------------------- 1 | ## Custom Imports 2 | from src.p2_dataload import KaggleAmazonDataset 3 | from src.p_neuro import Net, ResNet50, ResNet101, ResNet152, DenseNet121 4 | from src.p3_neuroRNN import GRU_ResNet50, LSTM_ResNet50, Skip_LSTM_RN50 5 | from src.p_training import train, snapshot 6 | #from src.p2_validation import validate 7 | from src.p_validation import validate 8 | from src.p_model_selection import train_valid_split 9 | from src.p_logger import setup_logs 10 | #from src.p2_prediction import predict, output 11 | from src.p_prediction import predict, output 12 | from src.p_data_augmentation import ColorJitter, PowerPIL 13 | # from src.p_metrics import SmoothF2Loss 14 | from src.p2_loss import ConvolutedLoss 15 | from src.p_sampler import SubsetSampler, balance_weights 16 | 17 | ## Utilities 18 | import random 19 | import logging 20 | import time 21 | from timeit import default_timer as timer 22 | import os 23 | 24 | ## Libraries 25 | import numpy as np 26 | import math 27 | 28 | ## Torch 29 | import torch.optim as optim 30 | import torch.nn.functional as F 31 | from torchvision import transforms 32 | from torch.utils.data import DataLoader 33 | import torch 34 | from torchsample.transforms import Affine 35 | from torch.utils.data.sampler import WeightedRandomSampler, SubsetRandomSampler 36 | 37 | ############################################################################ 38 | ####### CONTROL CENTER ############# STAR COMMAND ######################### 39 | ## Variables setup 40 | model = ResNet50(17).cuda() 41 | # model = ResNet152(17).cuda() 42 | 43 | # model = GRU_ResNet50(17, 128, 2).cuda() 44 | # model = LSTM_ResNet50(17, 128, 2).cuda() 45 | # model = Skip_LSTM_RN50(17, 128, 2).cuda() 46 | 47 | epochs = 16 48 | batch_size = 64 49 | 50 | # Run name 51 | run_name = time.strftime("%Y-%m-%d_%H%M-") + "resnet50-L2reg-new-data" 52 | ## Normalization on ImageNet mean/std for finetuning 53 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 54 | std=[0.229, 0.224, 0.225]) 55 | 56 | # Note, p_training has lr_decay automated 57 | optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005) # Finetuning whole model 58 | 59 | # criterion = ConvolutedLoss() 60 | criterion = torch.nn.MultiLabelSoftMarginLoss( 61 | weight = torch.Tensor([1, 4, 2, 1, 62 | 1, 3, 3, 3, 63 | 4, 4, 1, 2, 64 | 1, 1, 3, 4, 1]) 65 | ).cuda() 66 | 67 | #classes = [ 68 | # 'clear', 'cloudy', 'haze','partly_cloudy', 69 | # 'agriculture','artisinal_mine','bare_ground','blooming', 70 | # 'blow_down','conventional_mine','cultivation','habitation', 71 | # 'primary','road','selective_logging','slash_burn','water' 72 | # ] 73 | ## Frequency 74 | # [28203, 2330, 2695, 7251, 75 | # 12338, 339, 859, 332, 76 | # 98, 100, 4477, 3662, 77 | # 37840, 8076, 340, 209, 7262] 78 | 79 | save_dir = './snapshots' 80 | 81 | ####### CONTROL CENTER ############# STAR COMMAND ######################### 82 | ############################################################################ 83 | 84 | if __name__ == "__main__": 85 | # Initiate timer 86 | global_timer = timer() 87 | 88 | # Setup logs 89 | logger = setup_logs(save_dir, run_name) 90 | 91 | # Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic) 92 | torch.manual_seed(1337) 93 | torch.cuda.manual_seed(1337) 94 | np.random.seed(1337) 95 | random.seed(1337) 96 | 97 | ############################################################## 98 | ## Loading the dataset 99 | 100 | ## Augmentation + Normalization for full training 101 | ds_transform_augmented = transforms.Compose([ 102 | transforms.RandomSizedCrop(224), 103 | PowerPIL(), 104 | transforms.ToTensor(), 105 | # ColorJitter(), # Use PowerPIL instead, with PillowSIMD it's much more efficient 106 | normalize, 107 | # Affine( 108 | # rotation_range = 15, 109 | # translation_range = (0.2,0.2), 110 | # shear_range = math.pi/6, 111 | # zoom_range=(0.7,1.4) 112 | #) 113 | ]) 114 | 115 | ## Normalization only for validation and test 116 | ds_transform_raw = transforms.Compose([ 117 | transforms.Scale(224), 118 | transforms.ToTensor(), 119 | normalize 120 | ]) 121 | 122 | #### ######### ######## ########### ##### 123 | 124 | X_train = KaggleAmazonDataset('./data/train_v2.csv','./data/train-jpg/','.jpg', 125 | ds_transform_augmented 126 | ) 127 | X_val = KaggleAmazonDataset('./data/train_v2.csv','./data/train-jpg/','.jpg', 128 | ds_transform_raw 129 | ) 130 | 131 | # Resample the dataset 132 | # weights = balance_weights(X_train.getDF(), 'tags', X_train.getLabelEncoder()) 133 | # weights = np.clip(weights,0.02,0.2) # We need to let the net view the most common classes or learning is too slow 134 | 135 | # Creating a validation split 136 | train_idx, valid_idx = train_valid_split(X_train, 0.2) 137 | 138 | # weights[valid_idx] = 0 139 | 140 | # train_sampler = WeightedRandomSampler(weights, len(train_idx)) 141 | train_sampler = SubsetRandomSampler(train_idx) 142 | valid_sampler = SubsetSampler(valid_idx) 143 | 144 | ###### ########## ########## ######## ######### 145 | 146 | # Both dataloader loads from the same dataset but with different indices 147 | train_loader = DataLoader(X_train, 148 | batch_size=batch_size, 149 | sampler=train_sampler, 150 | num_workers=4, 151 | pin_memory=True) 152 | 153 | valid_loader = DataLoader(X_val, 154 | batch_size=batch_size, 155 | sampler=valid_sampler, 156 | num_workers=4, 157 | pin_memory=True) 158 | 159 | ########################################################### 160 | ## Start training 161 | best_score = 0. 162 | for epoch in range(epochs): 163 | epoch_timer = timer() 164 | 165 | # Train and validate 166 | train(epoch, train_loader, model, criterion, optimizer) 167 | score, loss, threshold = validate(epoch, valid_loader, model, criterion, X_train.getLabelEncoder()) 168 | # Save 169 | is_best = score > best_score 170 | best_score = max(score, best_score) 171 | snapshot(save_dir, run_name, is_best,{ 172 | 'epoch': epoch + 1, 173 | 'state_dict': model.state_dict(), 174 | 'best_score': best_score, 175 | 'optimizer': optimizer.state_dict(), 176 | 'threshold': threshold, 177 | 'val_loss': loss 178 | }) 179 | 180 | end_epoch_timer = timer() 181 | logger.info("#### End epoch {}, elapsed time: {}".format(epoch, end_epoch_timer - epoch_timer)) 182 | 183 | ########################################################### 184 | ## Prediction 185 | X_test = KaggleAmazonDataset('./data/sample_submission_v2.csv','./data/test-jpg/','.jpg', 186 | ds_transform_raw 187 | ) 188 | test_loader = DataLoader(X_test, 189 | batch_size=batch_size, 190 | num_workers=4, 191 | pin_memory=True) 192 | 193 | # Load model from best iteration 194 | logger.info('===> loading best model for prediction') 195 | checkpoint = torch.load(os.path.join(save_dir, 196 | run_name + '-model_best.pth' 197 | ) 198 | ) 199 | model.load_state_dict(checkpoint['state_dict']) 200 | 201 | # Predict 202 | predictions = predict(test_loader, model) # TODO load model from the best on disk 203 | 204 | output(predictions, 205 | checkpoint['threshold'], 206 | X_test, 207 | X_train.getLabelEncoder(), 208 | './out', 209 | run_name, 210 | checkpoint['best_score']) # TODO early_stopping and use best_score 211 | 212 | ########################################################## 213 | 214 | end_global_timer = timer() 215 | logger.info("################## Success #########################") 216 | logger.info("Total elapsed time: %s" % (end_global_timer - global_timer)) -------------------------------------------------------------------------------- /baseline/unfinished_attempts/000-Mxnet-Resnet-extraction-XGBoost-MultiLabel-TODO.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2017-04-21T06:06:23.865835Z", 9 | "start_time": "2017-04-21T06:06:23.863222Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import pandas as pd\n", 15 | "import numpy as np" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 6, 21 | "metadata": { 22 | "ExecuteTime": { 23 | "end_time": "2017-04-21T06:06:24.336423Z", 24 | "start_time": "2017-04-21T06:06:24.315747Z" 25 | } 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "df_train = pd.read_csv('./data/train.csv')" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 7, 35 | "metadata": { 36 | "ExecuteTime": { 37 | "end_time": "2017-04-21T06:06:24.608470Z", 38 | "start_time": "2017-04-21T06:06:24.603983Z" 39 | } 40 | }, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "Reading labels ...\n", 47 | " image_name tags\n", 48 | "0 train_0 haze primary\n", 49 | "1 train_1 agriculture clear primary water\n", 50 | "2 train_2 clear primary\n", 51 | "3 train_3 clear primary\n", 52 | "4 train_4 agriculture clear habitation primary road\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "print(\"Reading labels ...\")\n", 58 | "print(df_train.head())" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 9, 64 | "metadata": { 65 | "ExecuteTime": { 66 | "end_time": "2017-04-21T06:06:37.925413Z", 67 | "start_time": "2017-04-21T06:06:30.374043Z" 68 | } 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "X = np.array([np.load('./baseline/tmp/TMPDIR%s.jpg.npy' % str(name)) for name in df_train['image_name'].tolist()])" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 10, 78 | "metadata": { 79 | "ExecuteTime": { 80 | "end_time": "2017-04-21T06:06:37.970605Z", 81 | "start_time": "2017-04-21T06:06:37.926635Z" 82 | } 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "flatten = lambda l: [item for sublist in l for item in sublist]\n", 87 | "labels = list(set(flatten([l.split(' ') for l in df_train['tags'].values])))\n", 88 | "\n", 89 | "label_map = {l: i for i, l in enumerate(labels)}\n", 90 | "inv_label_map = {i: l for l, i in label_map.items()}\n" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 11, 96 | "metadata": { 97 | "ExecuteTime": { 98 | "end_time": "2017-04-21T06:06:37.981505Z", 99 | "start_time": "2017-04-21T06:06:37.971927Z" 100 | } 101 | }, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "{'agriculture': 16,\n", 107 | " 'artisinal_mine': 2,\n", 108 | " 'bare_ground': 10,\n", 109 | " 'blooming': 8,\n", 110 | " 'blow_down': 3,\n", 111 | " 'clear': 6,\n", 112 | " 'cloudy': 1,\n", 113 | " 'conventional_mine': 4,\n", 114 | " 'cultivation': 5,\n", 115 | " 'habitation': 11,\n", 116 | " 'haze': 15,\n", 117 | " 'partly_cloudy': 13,\n", 118 | " 'primary': 0,\n", 119 | " 'road': 9,\n", 120 | " 'selective_logging': 12,\n", 121 | " 'slash_burn': 7,\n", 122 | " 'water': 14}" 123 | ] 124 | }, 125 | "execution_count": 11, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "label_map" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 40, 137 | "metadata": { 138 | "ExecuteTime": { 139 | "end_time": "2017-04-21T06:31:30.982656Z", 140 | "start_time": "2017-04-21T06:31:30.980680Z" 141 | } 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "from sklearn.metrics import fbeta_score\n", 146 | "import xgboost as xgb" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 13, 152 | "metadata": { 153 | "ExecuteTime": { 154 | "end_time": "2017-04-21T06:06:38.132293Z", 155 | "start_time": "2017-04-21T06:06:38.128493Z" 156 | } 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "param = {}\n", 161 | "param['objective'] = 'binary:softprob'\n", 162 | "param['eta'] = 0.2\n", 163 | "param['max_depth'] = 4\n", 164 | "param['silent'] = 1\n", 165 | "param['num_class'] = 3\n", 166 | "param['eval_metric'] = \"logloss\"\n", 167 | "param['min_child_weight'] = 1\n", 168 | "param['subsample'] = 0.7\n", 169 | "param['colsample_bytree'] = 0.5\n", 170 | "param['seed'] = 1337" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 31, 176 | "metadata": { 177 | "ExecuteTime": { 178 | "end_time": "2017-04-21T06:18:58.799314Z", 179 | "start_time": "2017-04-21T06:18:58.794489Z" 180 | } 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "from sklearn.preprocessing import LabelBinarizer\n", 185 | "from sklearn.model_selection import train_test_split\n", 186 | "from sklearn.ensemble import RandomForestClassifier\n", 187 | "from sklearn.multioutput import MultiOutputClassifier" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 16, 193 | "metadata": { 194 | "ExecuteTime": { 195 | "end_time": "2017-04-21T06:10:22.617461Z", 196 | "start_time": "2017-04-21T06:10:22.614960Z" 197 | }, 198 | "collapsed": true 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "le = LabelBinarizer()" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 18, 208 | "metadata": { 209 | "ExecuteTime": { 210 | "end_time": "2017-04-21T06:10:45.783573Z", 211 | "start_time": "2017-04-21T06:10:45.649143Z" 212 | } 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "y = le.fit_transform(df_train['tags'])" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 24, 222 | "metadata": { 223 | "ExecuteTime": { 224 | "end_time": "2017-04-21T06:11:56.363157Z", 225 | "start_time": "2017-04-21T06:11:56.361001Z" 226 | } 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "plst = list(param.items())" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 27, 236 | "metadata": { 237 | "ExecuteTime": { 238 | "end_time": "2017-04-21T06:15:04.726534Z", 239 | "start_time": "2017-04-21T06:15:04.594424Z" 240 | } 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "# Create a validation set\n", 245 | "x_trn, x_val, y_trn, y_val = train_test_split(X, y, test_size=0.2, random_state=42)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 42, 251 | "metadata": { 252 | "ExecuteTime": { 253 | "end_time": "2017-04-21T06:31:47.113599Z", 254 | "start_time": "2017-04-21T06:31:47.111312Z" 255 | } 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "xgb_c = xgb.XGBClassifier(n_estimators=100)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 43, 265 | "metadata": { 266 | "ExecuteTime": { 267 | "end_time": "2017-04-21T06:31:53.203540Z", 268 | "start_time": "2017-04-21T06:31:53.201474Z" 269 | }, 270 | "collapsed": true 271 | }, 272 | "outputs": [], 273 | "source": [ 274 | "multi_target_forest = MultiOutputClassifier(xgb_c, n_jobs=-1)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 44, 280 | "metadata": { 281 | "ExecuteTime": { 282 | "end_time": "2017-04-21T06:31:54.526726Z", 283 | "start_time": "2017-04-21T06:31:54.524871Z" 284 | }, 285 | "collapsed": true 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "X = X.reshape(40479,2048)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "ExecuteTime": { 297 | "start_time": "2017-04-21T06:31:59.421Z" 298 | } 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "multi_target_forest.fit(X, y)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "ExecuteTime": { 310 | "start_time": "2017-04-21T06:34:09.608Z" 311 | }, 312 | "collapsed": true 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "from sklearn.externals import joblib\n", 317 | "joblib.dump(multi_target_forest, 'multi_target_forest.pkl') " 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": { 324 | "collapsed": true 325 | }, 326 | "outputs": [], 327 | "source": [] 328 | } 329 | ], 330 | "metadata": { 331 | "kernelspec": { 332 | "display_name": "Python 3", 333 | "language": "python", 334 | "name": "python3" 335 | }, 336 | "language_info": { 337 | "codemirror_mode": { 338 | "name": "ipython", 339 | "version": 3 340 | }, 341 | "file_extension": ".py", 342 | "mimetype": "text/x-python", 343 | "name": "python", 344 | "nbconvert_exporter": "python", 345 | "pygments_lexer": "ipython3", 346 | "version": "3.6.0" 347 | } 348 | }, 349 | "nbformat": 4, 350 | "nbformat_minor": 2 351 | } 352 | -------------------------------------------------------------------------------- /src/_deprecated.py: -------------------------------------------------------------------------------- 1 | ### From Validation.py 2 | 3 | ## DEPRECATED: Unfortunately COBYLA from Scipy can does not respect "lexical bounds". 4 | ## Beware: the following will probably overfit the threshold to the validation set 5 | ################################################################################## 6 | ## Metrics 7 | ## Given the labels imbalance we can't use the same threshold for each label. 8 | ## We could implement our own maximizer on all 17 classes but scipy.optimize already have 9 | ## 4 optimizations algorithms in C/Fortran that can work with constraints: L-BFGS-B, TNC, COBYLA and SLSQP. 10 | ## Of those only cobyla doesn't rely on 2nd order hessians which are error-prone with our function 11 | ## based on inequalities 12 | 13 | # Cobyla constraints are build by comparing return value with 0. 14 | # They must be >= 0 or be rejected 15 | 16 | def constr_sup0(x): 17 | return np.min(x) 18 | def constr_inf1(x): 19 | return 1 - np.max(x) 20 | 21 | def f2_score(true_target, predictions): 22 | 23 | def f_neg(threshold): 24 | ## Scipy tries to minimize the function so we must get its inverse 25 | return - fbeta_score(true_target, predictions > threshold, beta=2, average='samples') 26 | 27 | # Initialization of best threshold search 28 | thr_0 = np.array([0.2 for i in range(17)]) 29 | 30 | # Search 31 | thr_opt = fmin_cobyla(f_neg, thr_0, [constr_sup0,constr_inf1], disp=0) 32 | 33 | logger.info("===> Optimal threshold for each label:\n{}".format(thr_opt)) 34 | 35 | score = fbeta_score(true_target, predictions > thr_opt, beta=2, average='samples') 36 | return score, thr_opt 37 | 38 | ## The jit is slower than scikit by a few ms. Unless the optimizing loop can be JIT too it's not worth it 39 | 40 | ################################################################################## 41 | ## Metrics 42 | ## Given the labels imbalance we can't use the same threshold for each label. 43 | ## We loop on each column label independently and maximize F2 score 44 | ## Limit: might overfit 45 | ## We don't model interdependance of coefs 46 | 47 | from numba import jit 48 | 49 | 50 | # True Positive 51 | @jit(nopython=True) 52 | def true_pos(pred_labels, true_labels): 53 | return np.sum(np.logical_and(pred_labels == 1, true_labels == 1)) 54 | 55 | # True Negative 56 | @jit(nopython=True) 57 | def true_neg(pred_labels, true_labels): 58 | return np.sum(np.logical_and(pred_labels == 0, true_labels == 0)) 59 | 60 | # False Positive - Type I Error 61 | @jit(nopython=True) 62 | def false_pos(pred_labels, true_labels): 63 | return np.sum(np.logical_and(pred_labels == 1, true_labels == 0)) 64 | 65 | # False Negative - Type II Error 66 | @jit(nopython=True) 67 | def false_neg(pred_labels, true_labels): 68 | return np.sum(np.logical_and(pred_labels == 0, true_labels == 1)) 69 | 70 | @jit(nopython=True) 71 | def precision(pred_labels, true_labels): 72 | TP = true_pos(pred_labels, true_labels) 73 | FP = false_pos(pred_labels, true_labels) 74 | 75 | # Edge cases True Positives = 0, False negative = 0 76 | # No predicted labels at all 77 | # Shouldn't happen all photos must have at least one label 78 | # We return 0 so that the threshold becomes better 79 | # Should we penalize more ? 80 | if TP==0 and FP==0: return 0 81 | 82 | return TP / (TP + FP) 83 | 84 | @jit(nopython=True) 85 | def recall(pred_labels, true_labels): 86 | TP = true_pos(pred_labels, true_labels) 87 | FN = false_neg(pred_labels, true_labels) 88 | 89 | # Edge cases True Positives = 0, False negative = 0 90 | # i.e no label in the true_labels input. 91 | # Shouldn't happen all photos have at least one label 92 | 93 | return TP / (TP + FN) 94 | 95 | @jit(nopython=True) 96 | def f2_score_macro(pred_labels, true_labels): 97 | p = precision(pred_labels, true_labels) 98 | r = recall(pred_labels, true_labels) 99 | if p == 0 and r == 0: return 0 100 | return 5 * p * r / (4 * p + r) 101 | 102 | @jit 103 | def f2_score_mean(pred_labels, true_labels): 104 | # F2_score_mean accelerated by numba 105 | # Cannot force nopython mode because for loop on arrays does not work 106 | i = 0 107 | acc = 0 108 | for (x,y) in zip(pred_labels,true_labels): 109 | acc += f2_score_macro(x,y) 110 | i+=1 111 | return acc / i 112 | 113 | 114 | ### Kaggle kernel search 115 | def search_best_threshold(p_valid, y_valid, try_all=False, verbose=False): 116 | p_valid, y_valid = np.array(p_valid), np.array(y_valid) 117 | 118 | best_threshold = 0 119 | best_score = -1 120 | totry = np.arange(0,1,0.005) if try_all is False else np.unique(p_valid) 121 | for t in totry: 122 | score = fbeta_score(y_valid, p_valid > t, beta=2, average='samples') 123 | if score > best_score: 124 | best_score = score 125 | best_threshold = t 126 | logger.info("===> Optimal threshold for each label:\n{}".format(best_threshold)) 127 | return best_score, best_threshold 128 | 129 | # Search with L-BFGS-B 130 | thr_0 = np.array([0.20 for i in range(17)]) 131 | constraints = [(0.,1.) for i in range(17)] 132 | thr_opt, score_neg, dico = fmin_l_bfgs_b(f_neg, thr_0, bounds=constraints, approx_grad=True, epsilon=0.05) 133 | 134 | ## From dataload.py 135 | ################################################## 136 | ## DEPRECATED: https://discuss.pytorch.org/t/feedback-on-pytorch-for-kaggle-competitions/2252/8?u=mratsim 137 | ## Augmentation on PyTorch are done randomly at each epoch 138 | 139 | class AugmentedAmazonDataset(Dataset): 140 | """Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition. 141 | This dataset is augmented 142 | 143 | Arguments: 144 | A CSV file path 145 | Path to image folder 146 | Extension of images 147 | """ 148 | 149 | def __init__(self, csv_path, img_path, img_ext, transform=None): 150 | 151 | tmp_df = pd.read_csv(csv_path) 152 | assert tmp_df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \ 153 | "Some images referenced in the CSV file were not found" 154 | 155 | self.mlb = MultiLabelBinarizer() 156 | self.img_path = img_path 157 | self.img_ext = img_ext 158 | self.transform = transform 159 | 160 | self.X = tmp_df['image_name'] 161 | self.y = self.mlb.fit_transform(tmp_df['tags'].str.split()).astype(np.float32) 162 | self.augmentNumber = 14 # TODO, do something about this harcoded value 163 | 164 | def __getitem__(self, index): 165 | real_length = self.real_length() 166 | real_index = index % real_length 167 | 168 | img = Image.open(self.img_path + self.X[real_index] + self.img_ext) 169 | img = img.convert('RGB') 170 | 171 | ## Augmentation code 172 | if 0 <= index < real_length: 173 | pass 174 | 175 | ### Mirroring and Rotating 176 | elif real_length <= index < 2 * real_length: 177 | img = img.transpose(FLIP_LEFT_RIGHT) 178 | elif 2 * real_length <= index < 3 * real_length: 179 | img = img.transpose(FLIP_TOP_BOTTOM) 180 | elif 3 * real_length <= index < 4 * real_length: 181 | img = img.transpose(ROTATE_90) 182 | elif 4 * real_length <= index < 5 * real_length: 183 | img = img.transpose(ROTATE_180) 184 | elif 5 * real_length <= index < 6 * real_length: 185 | img = img.transpose(ROTATE_270) 186 | 187 | ### Color balance 188 | elif 6 * real_length <= index < 7 * real_length: 189 | img = Color(img).enhance(0.95) 190 | elif 7 * real_length <= index < 8 * real_length: 191 | img = Color(img).enhance(1.05) 192 | ## Contrast 193 | elif 8 * real_length <= index < 9 * real_length: 194 | img = Contrast(img).enhance(0.95) 195 | elif 9 * real_length <= index < 10 * real_length: 196 | img = Contrast(img).enhance(1.05) 197 | ## Brightness 198 | elif 10 * real_length <= index < 11 * real_length: 199 | img = Brightness(img).enhance(0.95) 200 | elif 11 * real_length <= index < 12 * real_length: 201 | img = Brightness(img).enhance(1.05) 202 | ## Sharpness 203 | elif 12 * real_length <= index < 13 * real_length: 204 | img = Sharpness(img).enhance(0.95) 205 | elif 13 * real_length <= index < 14 * real_length: 206 | img = Sharpness(img).enhance(1.05) 207 | else: 208 | raise IndexError("Index out of bounds") 209 | 210 | 211 | if self.transform is not None: 212 | img = self.transform(img) 213 | 214 | label = from_numpy(self.y[real_index]) 215 | return img, label 216 | 217 | def __len__(self): 218 | return len(self.X.index) * self.augmentNumber 219 | 220 | def real_length(self): 221 | return len(self.X.index) 222 | 223 | def getLabelEncoder(self): 224 | return self.mlb 225 | 226 | #### Usage 227 | 228 | ############################################################ 229 | # Augmented part 230 | # X_train = AugmentedAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg', 231 | # ds_transform 232 | # ) 233 | 234 | # Creating a validation split 235 | # train_idx, valid_idx = augmented_train_valid_split(X_train, 0.2) 236 | 237 | # nb_augment = X_train.augmentNumber 238 | # augmented_train_idx = [i * nb_augment + idx for idx in train_idx for i in range(0,nb_augment)] 239 | 240 | # train_sampler = SubsetRandomSampler(augmented_train_idx) 241 | # valid_sampler = SubsetRandomSampler(valid_idx) 242 | ########################################################### 243 | 244 | 245 | ################################################## 246 | ## DEPRECATED: AugmentedAmazonDataset is deprecated 247 | ## https://discuss.pytorch.org/t/feedback-on-pytorch-for-kaggle-competitions/2252/8?u=mratsim 248 | ## Augmentation on PyTorch are done randomly at each epoch 249 | 250 | 251 | def augmented_train_valid_split(dataset, test_size = 0.25, shuffle = False, random_seed = 0): 252 | """ Return a list of splitted indices from a DataSet. 253 | Indices can be used with DataLoader to build a train and validation set. 254 | 255 | Arguments: 256 | A Dataset 257 | A test_size, as a float between 0 and 1 (percentage split) or as an int (fixed number split) 258 | Shuffling True or False 259 | Random seed 260 | """ 261 | length = dataset.real_length() 262 | indices = list(range(1,length)) 263 | 264 | if shuffle == True: 265 | random.seed(random_seed) 266 | random.shuffle(indices) 267 | 268 | if type(test_size) is float: 269 | split = floor(test_size * length) 270 | elif type(test_size) is int: 271 | split = test_size 272 | else: 273 | raise ValueError('%s should be an int or a float' % str) 274 | return indices[split:], indices[:split] 275 | -------------------------------------------------------------------------------- /src/k_dataloader.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | from sklearn.preprocessing import MultiLabelBinarizer 5 | from keras.preprocessing.image import ImageDataGenerator, Iterator, load_img, img_to_array 6 | import pandas as pd 7 | import os 8 | import threading 9 | import numpy as np 10 | import keras.backend as K 11 | 12 | ## For computing mean and std 13 | from tqdm import tqdm 14 | import cv2 15 | 16 | class AmazonGenerator(ImageDataGenerator): 17 | def __init__(self, *args, **kwargs): 18 | super(AmazonGenerator, self).__init__(*args, **kwargs) 19 | self.iterator = None 20 | 21 | def flow_from_csv(self, csv_path, img_path, img_ext, 22 | mode='fit', 23 | target_size=(256, 256), 24 | color_mode='rgb', 25 | batch_size=32, shuffle=True, seed=None): 26 | 27 | self.iterator = AmazonCSVIterator(self, csv_path, 28 | img_path, img_ext, 29 | mode=mode, 30 | target_size = target_size, 31 | color_mode = color_mode, 32 | batch_size = batch_size, 33 | shuffle = shuffle, 34 | seed = seed, 35 | data_format=None) 36 | self.mlb = self.iterator.getLabelEncoder() 37 | return(self.iterator) 38 | 39 | def flow_from_df(self, dataframe, img_path, img_ext, 40 | mode='fit', 41 | target_size=(256, 256), 42 | color_mode='rgb', 43 | batch_size=32, shuffle=True, seed=None): 44 | 45 | self.iterator = AmazonDFIterator(self, dataframe,img_path, img_ext, 46 | mode=mode, 47 | target_size = target_size, 48 | color_mode = color_mode, 49 | batch_size = batch_size, 50 | shuffle = shuffle, 51 | seed = seed, 52 | data_format=None) 53 | self.mlb = self.iterator.getLabelEncoder() 54 | return(self.iterator) 55 | 56 | def getLabelEncoder(self): 57 | return self.iterator.getLabelEncoder() 58 | 59 | def fit_from_csv(self, csv_path, img_path, img_ext, rescale, target_size): 60 | '''Required for featurewise_center, featurewise_std_normalization 61 | when using images loaded from csv. 62 | 63 | # Arguments 64 | csv_path: Path to the csv with image list 65 | img_path: Directory with all images 66 | img_ext: Extension of images 67 | rescaling factor: usually we rescale images from 0-255 to 0-1 68 | resolution: A tuple of int. Images will be rescaled to that resolution before computing mean as we need to hold them all in memory. Set as big as your memory allows 69 | ''' 70 | 71 | # Computing mean and variance using Welford's algorithm for one pass only and numerical stability. 72 | df = pd.read_csv(csv_path) 73 | 74 | # Pre-allocation 75 | shape = cv2.imread(os.path.join( 76 | img_path, 77 | df['image_name'].iloc[0] + img_ext)).shape 78 | 79 | mean= np.zeros(shape, dtype=np.float32) 80 | M2= np.zeros(shape, dtype=np.float32) 81 | 82 | print('Computing mean and standard deviation on the dataset') 83 | for n, file in enumerate(tqdm(df['image_name'], miniters=256), 1): 84 | img = cv2.imread(os.path.join(img_path, file + img_ext)).astype(np.float32) 85 | img *= rescale 86 | delta = img - mean 87 | mean += delta/n 88 | delta2 = img - mean 89 | M2 += delta*delta2 90 | 91 | self.mean = mean 92 | self.std = M2 / (n-1) 93 | 94 | print("Mean has shape: " + str(self.mean.shape)) 95 | print("Std has shape: " + str(self.std.shape)) 96 | 97 | def dump_dataset_mean_std(self, path_mean, path_std): 98 | if self.mean is None or self.std is None: 99 | raise ValueError('Mean and Std must be computed before, fit the generator first') 100 | np.save(path_mean, self.mean) 101 | np.save(path_std, self.std) 102 | 103 | 104 | def load_mean_std(self, path_mean, path_std): 105 | self.mean = np.load(path_mean) 106 | self.std = np.load(path_std) 107 | print("Mean has shape: " + str(self.mean.shape)) 108 | print("Std has shape: " + str(self.std.shape)) 109 | 110 | class AmazonCSVIterator(Iterator): 111 | def __init__(self, image_data_generator, csv_path, 112 | img_path, img_ext, 113 | mode='fit', 114 | target_size=(256, 256), 115 | color_mode='rgb', 116 | batch_size=32, shuffle=True, seed=None, 117 | data_format=None): 118 | 119 | ## Common initialization routines 120 | self.target_size = tuple(target_size) 121 | if color_mode not in {'rgb', 'grayscale'}: 122 | raise ValueError('Invalid color mode:', color_mode, 123 | '; expected "rgb" or "grayscale".') 124 | self.color_mode = color_mode 125 | 126 | if data_format is None: 127 | self.data_format = K.image_data_format() 128 | 129 | if self.color_mode == 'rgb': 130 | if self.data_format == 'channels_last': 131 | self.image_shape = self.target_size + (3,) 132 | else: 133 | self.image_shape = (3,) + self.target_size 134 | else: 135 | if self.data_format == 'channels_last': 136 | self.image_shape = self.target_size + (1,) 137 | else: 138 | self.image_shape = (1,) + self.target_size 139 | 140 | self.image_data_generator = image_data_generator 141 | 142 | ## Specific to Amazon 143 | tmp_df = pd.read_csv(csv_path) 144 | assert tmp_df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \ 145 | "Some images referenced in the CSV file were not found" 146 | 147 | self.mlb = MultiLabelBinarizer() 148 | self.img_path = img_path 149 | self.img_ext = img_ext 150 | self.X = tmp_df['image_name'] 151 | self.mode = mode 152 | if mode == 'fit': 153 | self.y = self.mlb.fit_transform(tmp_df['tags'].str.split()) 154 | 155 | ## Init parent class 156 | super(AmazonCSVIterator, self).__init__(self.X.shape[0], 157 | batch_size, shuffle, seed) 158 | 159 | def next(self): 160 | """For python 2.x. 161 | # Returns The next batch. 162 | """ 163 | 164 | with self.lock: 165 | index_array, current_index, current_batch_size = next(self.index_generator) 166 | 167 | # The transformation of images is not under thread lock 168 | # so it can be done in parallel 169 | batch_x = np.zeros((current_batch_size,) + self.image_shape, dtype=K.floatx()) 170 | grayscale = self.color_mode == 'grayscale' 171 | 172 | # Build batch of images 173 | for i, j in enumerate(index_array): 174 | fpath = os.path.join(self.img_path,self.X[j] + self.img_ext) 175 | img = load_img(fpath, 176 | grayscale=grayscale, 177 | target_size=self.target_size) 178 | x = img_to_array(img, data_format=self.data_format) 179 | x = self.image_data_generator.random_transform(x) 180 | x = self.image_data_generator.standardize(x) 181 | batch_x[i] = x 182 | 183 | # Build batch of labels. 184 | if mode=='fit': 185 | batch_y = self.y[index_array] 186 | return batch_x, batch_y 187 | elif mode=='predict': 188 | return batch_x 189 | else: raise ValueError('The mode should be either \'fit\' or \'predict\'') 190 | 191 | def getLabelEncoder(self): 192 | return self.mlb 193 | 194 | class AmazonDFIterator(Iterator): 195 | def __init__(self, image_data_generator, df, img_path, img_ext, 196 | mode='fit', 197 | target_size=(256, 256), 198 | color_mode='rgb', 199 | batch_size=32, shuffle=True, seed=None, 200 | data_format=None): 201 | 202 | ## Common initialization routines 203 | self.target_size = tuple(target_size) 204 | if color_mode not in {'rgb', 'grayscale'}: 205 | raise ValueError('Invalid color mode:', color_mode, 206 | '; expected "rgb" or "grayscale".') 207 | self.color_mode = color_mode 208 | 209 | if data_format is None: 210 | self.data_format = K.image_data_format() 211 | 212 | if self.color_mode == 'rgb': 213 | if self.data_format == 'channels_last': 214 | self.image_shape = self.target_size + (3,) 215 | else: 216 | self.image_shape = (3,) + self.target_size 217 | else: 218 | if self.data_format == 'channels_last': 219 | self.image_shape = self.target_size + (1,) 220 | else: 221 | self.image_shape = (1,) + self.target_size 222 | 223 | self.image_data_generator = image_data_generator 224 | 225 | ## Specific to Amazon 226 | assert df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \ 227 | "Some images referenced in the CSV file were not found" 228 | 229 | self.mlb = MultiLabelBinarizer() 230 | self.img_path = img_path 231 | self.img_ext = img_ext 232 | self.X = df['image_name'] 233 | self.mode = mode 234 | if mode == 'fit': 235 | self.y = self.mlb.fit_transform(df['tags'].str.split()) 236 | 237 | ## Init parent class 238 | super(AmazonDFIterator, self).__init__(self.X.shape[0], 239 | batch_size, shuffle, seed) 240 | 241 | def next(self): 242 | """For python 2.x. 243 | # Returns The next batch. 244 | """ 245 | 246 | with self.lock: 247 | index_array, current_index, current_batch_size = next(self.index_generator) 248 | 249 | # The transformation of images is not under thread lock 250 | # so it can be done in parallel 251 | batch_x = np.zeros((current_batch_size,) + self.image_shape, dtype=K.floatx()) 252 | grayscale = self.color_mode == 'grayscale' 253 | 254 | # Build batch of images 255 | for i, j in enumerate(index_array): 256 | fpath = os.path.join(self.img_path,self.X[j] + self.img_ext) 257 | img = load_img(fpath, 258 | grayscale=grayscale, 259 | target_size=self.target_size) 260 | x = img_to_array(img, data_format=self.data_format) 261 | x = self.image_data_generator.random_transform(x) 262 | x = self.image_data_generator.standardize(x) 263 | batch_x[i] = x 264 | 265 | # Build batch of labels. 266 | if self.mode=='fit': 267 | batch_y = self.y[index_array] 268 | return batch_x, batch_y 269 | elif self.mode=='predict': 270 | return batch_x 271 | else: raise ValueError('The mode should be either \'fit\' or \'predict\'') 272 | 273 | def getLabelEncoder(self): 274 | return self.mlb -------------------------------------------------------------------------------- /baseline/pytorch_scatter_gather_onehotencoding.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2017-05-06T05:31:03.921480Z", 9 | "start_time": "2017-05-06T05:31:03.919143Z" 10 | }, 11 | "collapsed": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "import torch\n", 16 | "from torch import nn\n", 17 | "from torch.autograd import Variable" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 4, 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2017-05-06T05:31:05.240531Z", 26 | "start_time": "2017-05-06T05:31:04.686364Z" 27 | }, 28 | "collapsed": true 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "m = nn.LogSoftmax()\n", 33 | "loss = nn.NLLLoss()\n", 34 | "# input is of size nBatch x nClasses = 3 x 5\n", 35 | "input = Variable(torch.randn(3, 5), requires_grad=True)\n", 36 | "# each element in target has to have 0 <= value < nclasses\n", 37 | "target = Variable(torch.LongTensor([1, 0, 4]))\n", 38 | "output = loss(m(input), target)\n", 39 | "output.backward()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 35, 45 | "metadata": { 46 | "ExecuteTime": { 47 | "end_time": "2017-05-06T05:49:58.231485Z", 48 | "start_time": "2017-05-06T05:49:58.229175Z" 49 | }, 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "target2 = Variable(torch.LongTensor([[0, 1, 0, 0, 0],\n", 55 | " [1, 0, 0, 0, 0],\n", 56 | " [0, 0, 0, 0, 1]]))" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 36, 62 | "metadata": { 63 | "ExecuteTime": { 64 | "end_time": "2017-05-06T05:49:58.819374Z", 65 | "start_time": "2017-05-06T05:49:58.816471Z" 66 | } 67 | }, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/plain": [ 72 | "Variable containing:\n", 73 | " 0 1 0 0 0\n", 74 | " 1 0 0 0 0\n", 75 | " 0 0 0 0 1\n", 76 | "[torch.LongTensor of size 3x5]" 77 | ] 78 | }, 79 | "execution_count": 36, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "target2" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 23, 91 | "metadata": { 92 | "ExecuteTime": { 93 | "end_time": "2017-05-06T05:40:40.728998Z", 94 | "start_time": "2017-05-06T05:40:40.726516Z" 95 | }, 96 | "collapsed": true 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "target_onehot = Variable(torch.zeros(3, 5))" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 24, 106 | "metadata": { 107 | "ExecuteTime": { 108 | "end_time": "2017-05-06T05:40:40.983895Z", 109 | "start_time": "2017-05-06T05:40:40.980668Z" 110 | } 111 | }, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "Variable containing:\n", 117 | " 0 0 0 0 0\n", 118 | " 0 0 0 0 0\n", 119 | " 0 0 0 0 0\n", 120 | "[torch.FloatTensor of size 3x5]" 121 | ] 122 | }, 123 | "execution_count": 24, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "target_onehot" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 29, 135 | "metadata": { 136 | "ExecuteTime": { 137 | "end_time": "2017-05-06T05:42:54.036901Z", 138 | "start_time": "2017-05-06T05:42:54.033351Z" 139 | } 140 | }, 141 | "outputs": [ 142 | { 143 | "data": { 144 | "text/plain": [ 145 | "Variable containing:\n", 146 | " 1\n", 147 | " 0\n", 148 | " 4\n", 149 | "[torch.LongTensor of size 3]" 150 | ] 151 | }, 152 | "execution_count": 29, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "target" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 30, 164 | "metadata": { 165 | "ExecuteTime": { 166 | "end_time": "2017-05-06T05:43:22.951826Z", 167 | "start_time": "2017-05-06T05:43:22.949248Z" 168 | }, 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "targetv = target.view(-1,1)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 31, 179 | "metadata": { 180 | "ExecuteTime": { 181 | "end_time": "2017-05-06T05:43:32.052043Z", 182 | "start_time": "2017-05-06T05:43:32.049359Z" 183 | } 184 | }, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": [ 189 | "Variable containing:\n", 190 | " 1\n", 191 | " 0\n", 192 | " 4\n", 193 | "[torch.LongTensor of size 3x1]" 194 | ] 195 | }, 196 | "execution_count": 31, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "targetv" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 71, 208 | "metadata": { 209 | "ExecuteTime": { 210 | "end_time": "2017-05-06T06:04:15.942080Z", 211 | "start_time": "2017-05-06T06:04:15.938887Z" 212 | } 213 | }, 214 | "outputs": [ 215 | { 216 | "data": { 217 | "text/plain": [ 218 | "Variable containing:\n", 219 | " 0 1 0 0 0\n", 220 | " 1 0 0 0 0\n", 221 | " 0 0 0 0 1\n", 222 | "[torch.FloatTensor of size 3x5]" 223 | ] 224 | }, 225 | "execution_count": 71, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "target_onehot.scatter(1, targetv, 1)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 96, 237 | "metadata": { 238 | "ExecuteTime": { 239 | "end_time": "2017-05-06T06:34:33.131599Z", 240 | "start_time": "2017-05-06T06:34:33.129170Z" 241 | }, 242 | "collapsed": true 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "s = torch.arange(0,5).expand(3,5) #expand is torch broadcasting" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 97, 252 | "metadata": { 253 | "ExecuteTime": { 254 | "end_time": "2017-05-06T06:34:33.451566Z", 255 | "start_time": "2017-05-06T06:34:33.448207Z" 256 | } 257 | }, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/plain": [ 262 | "\n", 263 | " 0 1 2 3 4\n", 264 | " 0 1 2 3 4\n", 265 | " 0 1 2 3 4\n", 266 | "[torch.FloatTensor of size 3x5]" 267 | ] 268 | }, 269 | "execution_count": 97, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "s" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 88, 281 | "metadata": { 282 | "ExecuteTime": { 283 | "end_time": "2017-05-06T06:14:36.968468Z", 284 | "start_time": "2017-05-06T06:14:36.964932Z" 285 | } 286 | }, 287 | "outputs": [ 288 | { 289 | "data": { 290 | "text/plain": [ 291 | "\n", 292 | " 0 1 0 0 0\n", 293 | " 1 0 0 0 0\n", 294 | " 0 0 0 0 1\n", 295 | "[torch.FloatTensor of size 3x5]" 296 | ] 297 | }, 298 | "execution_count": 88, 299 | "metadata": {}, 300 | "output_type": "execute_result" 301 | } 302 | ], 303 | "source": [ 304 | "s.gather(1, target_onehot.data.long())" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 1, 315 | "metadata": { 316 | "ExecuteTime": { 317 | "end_time": "2017-05-06T08:19:14.558481Z", 318 | "start_time": "2017-05-06T08:19:14.437859Z" 319 | }, 320 | "collapsed": true 321 | }, 322 | "outputs": [], 323 | "source": [ 324 | "import torch\n", 325 | "from torch import nn\n", 326 | "from torch.autograd import Variable" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 4, 332 | "metadata": { 333 | "ExecuteTime": { 334 | "end_time": "2017-05-06T08:19:38.356073Z", 335 | "start_time": "2017-05-06T08:19:38.350191Z" 336 | } 337 | }, 338 | "outputs": [ 339 | { 340 | "name": "stdout", 341 | "output_type": "stream", 342 | "text": [ 343 | "Variable containing:\n", 344 | " 1\n", 345 | " 0\n", 346 | " 4\n", 347 | "[torch.LongTensor of size 3]\n", 348 | "\n", 349 | "Variable containing:\n", 350 | " 0 1 0 0 0\n", 351 | " 1 0 0 0 0\n", 352 | " 0 0 0 0 1\n", 353 | "[torch.FloatTensor of size 3x5]\n", 354 | "\n", 355 | "\n", 356 | " 0\n", 357 | " 1\n", 358 | " 2\n", 359 | " 3\n", 360 | " 4\n", 361 | "[torch.FloatTensor of size 5]\n", 362 | "\n", 363 | "\n", 364 | " 0 1 2 3 4\n", 365 | " 0 1 2 3 4\n", 366 | " 0 1 2 3 4\n", 367 | "[torch.FloatTensor of size 3x5]\n", 368 | "\n", 369 | "\n", 370 | " 1\n", 371 | " 0\n", 372 | " 4\n", 373 | "[torch.FloatTensor of size 3]\n", 374 | "\n" 375 | ] 376 | } 377 | ], 378 | "source": [ 379 | "target = Variable(torch.LongTensor([1, 0, 4]))\n", 380 | "print(target)\n", 381 | "target_onehot = Variable(torch.zeros(3, 5))\n", 382 | "target_onehot.scatter_(1, target.view(-1,1), 1) #_ for inplace\n", 383 | "print(target_onehot)\n", 384 | "\n", 385 | "val = torch.arange(0,5)\n", 386 | "print(val)\n", 387 | "val = val.expand(3,5) #expand is torch broadcasting\n", 388 | "print(val)\n", 389 | "\n", 390 | "new_target=val.masked_select(target_onehot.data.byte())\n", 391 | "print(new_target)" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 95, 397 | "metadata": { 398 | "ExecuteTime": { 399 | "end_time": "2017-05-06T06:34:27.269336Z", 400 | "start_time": "2017-05-06T06:34:27.264843Z" 401 | } 402 | }, 403 | "outputs": [ 404 | { 405 | "ename": "RuntimeError", 406 | "evalue": "inconsistent tensor size at /pkg/makepkg/python-pytorch-git/src/pytorch/torch/lib/TH/generic/THTensorMath.c:193", 407 | "output_type": "error", 408 | "traceback": [ 409 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 410 | "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", 411 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmasked_select\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget_onehot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbyte\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 412 | "\u001b[0;31mRuntimeError\u001b[0m: inconsistent tensor size at /pkg/makepkg/python-pytorch-git/src/pytorch/torch/lib/TH/generic/THTensorMath.c:193" 413 | ] 414 | } 415 | ], 416 | "source": [ 417 | "s.masked_select(target_onehot.data.byte())" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": { 424 | "collapsed": true 425 | }, 426 | "outputs": [], 427 | "source": [] 428 | } 429 | ], 430 | "metadata": { 431 | "kernelspec": { 432 | "display_name": "Python 3", 433 | "language": "python", 434 | "name": "python3" 435 | }, 436 | "language_info": { 437 | "codemirror_mode": { 438 | "name": "ipython", 439 | "version": 3 440 | }, 441 | "file_extension": ".py", 442 | "mimetype": "text/x-python", 443 | "name": "python", 444 | "nbconvert_exporter": "python", 445 | "pygments_lexer": "ipython3", 446 | "version": "3.6.0" 447 | } 448 | }, 449 | "nbformat": 4, 450 | "nbformat_minor": 2 451 | } 452 | -------------------------------------------------------------------------------- /baseline/003-pytorch-kernel-baseline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "_cell_guid": "82cb34f6-d7d9-5938-c2f9-2b231d073c04" 7 | }, 8 | "source": [ 9 | "# Starting kit for PyTorch Deep Learning\n", 10 | "\n", 11 | "Welcome to this tutorial to get started on PyTorch for this competition.\n", 12 | "PyTorch is a promising port of Facebook's Torch to Python.\n", 13 | "\n", 14 | "It's only 3 months old but has an already promising feature set.\n", 15 | "Unfortunately it's very very raw, and I had a lot of troubles to get started with very basic things:\n", 16 | "- data loading\n", 17 | "- building a basic CNN\n", 18 | "- training\n", 19 | "\n", 20 | "Hopefully this will help you getting started using PyTorch on this dataset." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "_cell_guid": "3763a794-0a61-f0ab-9215-56de74bf29df" 27 | }, 28 | "source": [ 29 | "## Importing libraries\n", 30 | "Please note that we do not import numpy but PyTorch wrapper for Numpy" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 1, 36 | "metadata": { 37 | "ExecuteTime": { 38 | "end_time": "2017-04-25T21:15:49.566831Z", 39 | "start_time": "2017-04-25T21:15:49.179045Z" 40 | }, 41 | "_cell_guid": "f3ee9f39-55e1-ee69-2bb6-25c095155e1d", 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "import pandas as pd\n", 47 | "from torch import np # Torch wrapper for Numpy\n", 48 | "\n", 49 | "import os\n", 50 | "from PIL import Image\n", 51 | "\n", 52 | "import torch\n", 53 | "from torch.utils.data.dataset import Dataset\n", 54 | "from torch.utils.data import DataLoader\n", 55 | "from torchvision import transforms\n", 56 | "from torch import nn\n", 57 | "import torch.nn.functional as F\n", 58 | "import torch.optim as optim\n", 59 | "from torch.autograd import Variable\n", 60 | "\n", 61 | "from sklearn.preprocessing import MultiLabelBinarizer" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": { 67 | "_cell_guid": "e6fde4f7-e8f3-3782-673a-62ce72b652fa" 68 | }, 69 | "source": [ 70 | "## Setting up global variables" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 2, 76 | "metadata": { 77 | "ExecuteTime": { 78 | "end_time": "2017-04-25T21:15:49.570687Z", 79 | "start_time": "2017-04-25T21:15:49.568053Z" 80 | }, 81 | "_cell_guid": "45d63034-a44c-47e8-7376-2deb00af03a9", 82 | "collapsed": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "IMG_PATH = '../data/train-jpg/'\n", 87 | "IMG_EXT = '.jpg'\n", 88 | "TRAIN_DATA = '../data/train.csv'" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "_cell_guid": "1aa8e64e-f2eb-f570-bfd3-6098638c5f40" 95 | }, 96 | "source": [ 97 | "## Loading the data - first part - DataSet\n", 98 | "\n", 99 | "This is probably the most obscure part of PyTorch. Most examples use well known datasets (MNIST ...) and have a custom loader or forces you to have a specific folder structure similar to this:\n", 100 | "\n", 101 | "* data\n", 102 | " * train\n", 103 | " * dogs\n", 104 | " * cats\n", 105 | " * validation\n", 106 | " * dogs\n", 107 | " * cats\n", 108 | " * test\n", 109 | " * test\n", 110 | "\n", 111 | "Data loading in PyTorch is in 2 parts\n", 112 | "\n", 113 | "First the data must be wrapped in a __Dataset__ class with a getitem method that from an index return X_train[index] and y_train[index] and a length method. A Dataset is basically a data storage.\n", 114 | "\n", 115 | "The following solution loads the image name from a CSV and file path + extension and can be adapted easily for most Kaggle challenges. You won't have to write your own ;).\n", 116 | "\n", 117 | "The code will:\n", 118 | "\n", 119 | "- Check that all images in CSV exist in the folder\n", 120 | "- Use ScikitLearn MultiLabelBinarizer to OneHotEncode the labels, mlb.inverse_transform(predictions) can be used to get back the textual labels from the predictions\n", 121 | "- Apply PIL transformations to the images. See [here](http://pytorch.org/docs/torchvision/transforms.html) for the supported list.\n", 122 | "- Use ToTensor() to convert from an image with color scale 0-255 to a Tensor with color scale 0-1.\n", 123 | "\n", 124 | "Note: We use PIL instead of OpenCV because it's Torch default image loader and is compatible with `ToTensor()` method. An fast loader called accimage is currently in development and was published 3 days ago [here](https://github.com/pytorch/accimage).\n", 125 | "\n", 126 | "Note 2: This only provides a mapping to the data, **the data is not loaded in memory at this point**. The next part will show you how to load only what is needed for the batch in memory. This is a huge advantage compared to kernels that must load all images at once." 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 3, 132 | "metadata": { 133 | "ExecuteTime": { 134 | "end_time": "2017-04-25T21:15:49.584068Z", 135 | "start_time": "2017-04-25T21:15:49.572082Z" 136 | }, 137 | "_cell_guid": "08a005ca-d963-5434-d60d-72d399cb7fe3", 138 | "collapsed": true 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "class KaggleAmazonDataset(Dataset):\n", 143 | " \"\"\"Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition.\n", 144 | "\n", 145 | " Arguments:\n", 146 | " A CSV file path\n", 147 | " Path to image folder\n", 148 | " Extension of images\n", 149 | " PIL transforms\n", 150 | " \"\"\"\n", 151 | "\n", 152 | " def __init__(self, csv_path, img_path, img_ext, transform=None):\n", 153 | " \n", 154 | " tmp_df = pd.read_csv(csv_path)\n", 155 | " assert tmp_df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \\\n", 156 | "\"Some images referenced in the CSV file were not found\"\n", 157 | " \n", 158 | " self.mlb = MultiLabelBinarizer()\n", 159 | " self.img_path = img_path\n", 160 | " self.img_ext = img_ext\n", 161 | " self.transform = transform\n", 162 | "\n", 163 | " self.X_train = tmp_df['image_name']\n", 164 | " self.y_train = self.mlb.fit_transform(tmp_df['tags'].str.split()).astype(np.float32)\n", 165 | "\n", 166 | " def __getitem__(self, index):\n", 167 | " img = Image.open(self.img_path + self.X_train[index] + self.img_ext)\n", 168 | " img = img.convert('RGB')\n", 169 | " if self.transform is not None:\n", 170 | " img = self.transform(img)\n", 171 | " \n", 172 | " label = torch.from_numpy(self.y_train[index])\n", 173 | " return img, label\n", 174 | "\n", 175 | " def __len__(self):\n", 176 | " return len(self.X_train.index)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 4, 182 | "metadata": { 183 | "ExecuteTime": { 184 | "end_time": "2017-04-25T21:15:49.784282Z", 185 | "start_time": "2017-04-25T21:15:49.585273Z" 186 | }, 187 | "_cell_guid": "98a20a0b-d39e-21a6-232b-990e916f6756", 188 | "collapsed": true 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "transformations = transforms.Compose([transforms.Scale(32),transforms.ToTensor()])\n", 193 | "\n", 194 | "dset_train = KaggleAmazonDataset(TRAIN_DATA,IMG_PATH,IMG_EXT,transformations)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": { 200 | "_cell_guid": "2db00aac-0fb9-a1ab-1687-f373875de6bb" 201 | }, 202 | "source": [ 203 | "## Loading the data - second part - DataLoader\n", 204 | "\n", 205 | "As was said, loading the data is in 2 parts, we provided PyTorch with a data storage, and we have to tell it how to load it. This is done with __DataLoader__\n", 206 | "\n", 207 | "The DataLoader defines how you retrieve the images + labels from the dataset. You can tell it to:\n", 208 | "\n", 209 | "* Set the batch size.\n", 210 | "* Shuffle and sample the data randomly, hence implementing __train_test_split__ (check SubsetRandomSampler [here](http://pytorch.org/docs/data.html?highlight=sampler))\n", 211 | "* Improve performance by loading data via separate thread `num_worker` and using `pin_memory` for CUDA. Documentation [here](http://pytorch.org/docs/notes/cuda.html?highlight=dataloader)." 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 5, 217 | "metadata": { 218 | "ExecuteTime": { 219 | "end_time": "2017-04-25T21:15:49.787999Z", 220 | "start_time": "2017-04-25T21:15:49.785741Z" 221 | }, 222 | "_cell_guid": "a2d57750-80fc-c8fe-9640-f276681f5549", 223 | "collapsed": true 224 | }, 225 | "outputs": [], 226 | "source": [ 227 | "train_loader = DataLoader(dset_train,\n", 228 | " batch_size=256,\n", 229 | " shuffle=True,\n", 230 | " num_workers=4 # 1 for CUDA\n", 231 | " # pin_memory=True # CUDA only\n", 232 | " )" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": { 238 | "_cell_guid": "1a27f04f-d260-46ec-698b-21aba8631f71" 239 | }, 240 | "source": [ 241 | "## Creating your Neural Network\n", 242 | "\n", 243 | "This is tricky, you need to compute yourself the in_channels and out_channels of your filters hence the 2304 input for the Dense layer. The first input 3 corresponds to the number of channels of your image, the 17 output corresponds to the number of target labels." 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 6, 249 | "metadata": { 250 | "ExecuteTime": { 251 | "end_time": "2017-04-25T21:15:51.046117Z", 252 | "start_time": "2017-04-25T21:15:49.789400Z" 253 | }, 254 | "_cell_guid": "c9a86c3d-b977-856a-7b71-5bf0cd509691", 255 | "collapsed": true 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "class Net(nn.Module):\n", 260 | " def __init__(self):\n", 261 | " super(Net, self).__init__()\n", 262 | " self.conv1 = nn.Conv2d(3, 32, kernel_size=3)\n", 263 | " self.conv2 = nn.Conv2d(32, 64, kernel_size=3)\n", 264 | " self.conv2_drop = nn.Dropout2d()\n", 265 | " self.fc1 = nn.Linear(2304, 256)\n", 266 | " self.fc2 = nn.Linear(256, 17)\n", 267 | "\n", 268 | " def forward(self, x):\n", 269 | " x = F.relu(F.max_pool2d(self.conv1(x), 2))\n", 270 | " x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))\n", 271 | " x = x.view(x.size(0), -1) # Flatten layer\n", 272 | " x = F.relu(self.fc1(x))\n", 273 | " x = F.dropout(x, training=self.training)\n", 274 | " x = self.fc2(x)\n", 275 | " return F.sigmoid(x)\n", 276 | "\n", 277 | "# model = Net() # On CPU\n", 278 | "model = Net().cuda() # On GPU" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": { 284 | "_cell_guid": "51e51a88-e8fc-467c-98cd-cab80f5e8679" 285 | }, 286 | "source": [ 287 | "## Defining your training function" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 7, 293 | "metadata": { 294 | "ExecuteTime": { 295 | "end_time": "2017-04-25T21:15:51.050255Z", 296 | "start_time": "2017-04-25T21:15:51.047755Z" 297 | }, 298 | "_cell_guid": "7c18ddb7-cd5a-86d3-b3b9-4c6bc467e7ea", 299 | "collapsed": true 300 | }, 301 | "outputs": [], 302 | "source": [ 303 | "optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 8, 309 | "metadata": { 310 | "ExecuteTime": { 311 | "end_time": "2017-04-25T21:15:51.057388Z", 312 | "start_time": "2017-04-25T21:15:51.051613Z" 313 | }, 314 | "_cell_guid": "745377b3-d942-a03a-76a9-e27cce51e01d", 315 | "collapsed": true 316 | }, 317 | "outputs": [], 318 | "source": [ 319 | "def train(epoch):\n", 320 | " model.train()\n", 321 | " for batch_idx, (data, target) in enumerate(train_loader):\n", 322 | " data, target = data.cuda(async=True), target.cuda(async=True) # On GPU\n", 323 | " data, target = Variable(data), Variable(target)\n", 324 | " optimizer.zero_grad()\n", 325 | " output = model(data)\n", 326 | " loss = F.binary_cross_entropy(output, target)\n", 327 | " loss.backward()\n", 328 | " optimizer.step()\n", 329 | " if batch_idx % 10 == 0:\n", 330 | " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", 331 | " epoch, batch_idx * len(data), len(train_loader.dataset),\n", 332 | " 100. * batch_idx / len(train_loader), loss.data[0]))" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": { 338 | "_cell_guid": "65a8fce1-f2b6-28ea-a807-216db7011267" 339 | }, 340 | "source": [ 341 | "## Training your model" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 9, 347 | "metadata": { 348 | "ExecuteTime": { 349 | "end_time": "2017-04-25T21:16:01.421896Z", 350 | "start_time": "2017-04-25T21:15:51.058525Z" 351 | }, 352 | "_cell_guid": "5e7ff060-19da-1b01-28ce-bd2e72430fee" 353 | }, 354 | "outputs": [ 355 | { 356 | "name": "stdout", 357 | "output_type": "stream", 358 | "text": [ 359 | "Train Epoch: 1 [0/40479 (0%)]\tLoss: 0.692961\n", 360 | "Train Epoch: 1 [2560/40479 (6%)]\tLoss: 0.688596\n", 361 | "Train Epoch: 1 [5120/40479 (13%)]\tLoss: 0.682759\n", 362 | "Train Epoch: 1 [7680/40479 (19%)]\tLoss: 0.676263\n", 363 | "Train Epoch: 1 [10240/40479 (25%)]\tLoss: 0.668731\n", 364 | "Train Epoch: 1 [12800/40479 (31%)]\tLoss: 0.659607\n", 365 | "Train Epoch: 1 [15360/40479 (38%)]\tLoss: 0.650362\n", 366 | "Train Epoch: 1 [17920/40479 (44%)]\tLoss: 0.635071\n", 367 | "Train Epoch: 1 [20480/40479 (50%)]\tLoss: 0.613103\n", 368 | "Train Epoch: 1 [23040/40479 (57%)]\tLoss: 0.584744\n", 369 | "Train Epoch: 1 [25600/40479 (63%)]\tLoss: 0.545705\n", 370 | "Train Epoch: 1 [28160/40479 (69%)]\tLoss: 0.500222\n", 371 | "Train Epoch: 1 [30720/40479 (75%)]\tLoss: 0.443742\n", 372 | "Train Epoch: 1 [33280/40479 (82%)]\tLoss: 0.404282\n", 373 | "Train Epoch: 1 [35840/40479 (88%)]\tLoss: 0.361815\n", 374 | "Train Epoch: 1 [38400/40479 (94%)]\tLoss: 0.338667\n" 375 | ] 376 | } 377 | ], 378 | "source": [ 379 | "for epoch in range(1, 2):\n", 380 | " train(epoch)" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": { 386 | "_cell_guid": "2e306e2f-87f0-f753-ab41-841a3b097afa" 387 | }, 388 | "source": [ 389 | "# Thank you for your attention\n", 390 | "\n", 391 | "Hopefully that will help you get started. I still have a lot to figure out in PyTorch like:\n", 392 | "\n", 393 | "* Implementing the train / validation split\n", 394 | "* Figure out data augmentation (and not just random transformations or images)\n", 395 | "* Implementing early stopping\n", 396 | "* Automating computation of intermediate layers\n", 397 | "* Improving the display of each epochs\n", 398 | "\n", 399 | "If you liked the kernel don't forget to vote and don't hesitate to comment." 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "metadata": { 406 | "_cell_guid": "16f21935-088c-8590-9233-2700afeb3922", 407 | "collapsed": true 408 | }, 409 | "outputs": [], 410 | "source": [] 411 | } 412 | ], 413 | "metadata": { 414 | "_change_revision": 0, 415 | "_is_fork": false, 416 | "kernelspec": { 417 | "display_name": "Python 3", 418 | "language": "python", 419 | "name": "python3" 420 | }, 421 | "language_info": { 422 | "codemirror_mode": { 423 | "name": "ipython", 424 | "version": 3 425 | }, 426 | "file_extension": ".py", 427 | "mimetype": "text/x-python", 428 | "name": "python", 429 | "nbconvert_exporter": "python", 430 | "pygments_lexer": "ipython3", 431 | "version": "3.6.0" 432 | } 433 | }, 434 | "nbformat": 4, 435 | "nbformat_minor": 1 436 | } 437 | -------------------------------------------------------------------------------- /Dual_Feed_Image_Label.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2017-05-08T10:52:32.499144Z", 9 | "start_time": "2017-05-08T10:52:32.352476Z" 10 | }, 11 | "collapsed": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "from torch import nn, ones\n", 16 | "from torch.autograd import Variable\n", 17 | "from torchvision import models\n", 18 | "from torch.nn.init import kaiming_normal\n", 19 | "from torch import np\n", 20 | "import torch\n", 21 | "import torch.nn.functional as F\n", 22 | "import random\n", 23 | "import numpy as np\n", 24 | "from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence\n", 25 | "from src.p_data_augmentation import PowerPIL" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": { 32 | "ExecuteTime": { 33 | "end_time": "2017-05-08T10:52:32.680830Z", 34 | "start_time": "2017-05-08T10:52:32.500741Z" 35 | }, 36 | "collapsed": true 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "from torch.utils.data.dataset import Dataset\n", 41 | "from torchvision import transforms\n", 42 | "import pandas as pd\n", 43 | "import os\n", 44 | "from PIL import Image\n", 45 | "\n", 46 | "from torch import np, from_numpy # Numpy like wrapper\n", 47 | "\n", 48 | "class ImgTagsDualFeedDataset(Dataset):\n", 49 | " \"\"\"Dataset wrapping images, labels and target labels for Kaggle - Planet Amazon from Space competition.\n", 50 | "\n", 51 | " Arguments:\n", 52 | " A CSV file path\n", 53 | " Path to image folder\n", 54 | " Extension of images\n", 55 | " \"\"\"\n", 56 | "\n", 57 | " def __init__(self, csv_path, img_path, img_ext, vocab_mapping, transform=None):\n", 58 | " \n", 59 | " self.df = pd.read_csv(csv_path)\n", 60 | " assert self.df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \\\n", 61 | "\"Some images referenced in the CSV file were not found\"\n", 62 | "\n", 63 | " self.img_path = img_path\n", 64 | " self.img_ext = img_ext\n", 65 | " self.transform = transform\n", 66 | " \n", 67 | " self.X = self.df['image_name']\n", 68 | " \n", 69 | " self.vocab_mapping = vocab_mapping\n", 70 | "\n", 71 | " self.tags = self.df['tags'].str.split()\n", 72 | " \n", 73 | " \n", 74 | " def X(self):\n", 75 | " return self.X\n", 76 | " \n", 77 | " def __getitem__(self, index):\n", 78 | " \n", 79 | " img = Image.open(self.img_path + self.X[index] + self.img_ext)\n", 80 | " img = img.convert('RGB')\n", 81 | " if self.transform is not None:\n", 82 | " img = self.transform(img)\n", 83 | " \n", 84 | " vocab = self.vocab_mapping\n", 85 | " tags = []\n", 86 | " tags.append(vocab[''])\n", 87 | " tags.extend([vocab[tag] for tag in self.tags[index]])\n", 88 | " tags.append(vocab[''])\n", 89 | " \n", 90 | " tags = torch.Tensor(tags)\n", 91 | " return img, tags\n", 92 | "\n", 93 | " def __len__(self):\n", 94 | " return len(self.df.index)\n", 95 | "\n", 96 | "\n", 97 | " def collate_fn(self, data):\n", 98 | " \"\"\"Creates mini-batch tensors for tags with variable size\n", 99 | "\n", 100 | " Args:\n", 101 | " data: list of tuple (input, target). \n", 102 | " - image: torch tensor of shape (3, ?, ?).\n", 103 | " - target: torch tensor of same shape (?); variable length.\n", 104 | " Returns:\n", 105 | " images: torch tensor of shape (batch_size, 3, ?, ?).\n", 106 | " targets: torch tensor of shape (batch_size, padded_length).\n", 107 | " lengths: list; valid length for each padded tags.\n", 108 | " \"\"\"\n", 109 | " # Sort a data list by target length (descending order).\n", 110 | " data.sort(key=lambda x: len(x[1]), reverse=True)\n", 111 | " imgs, tags = zip(*data)\n", 112 | " \n", 113 | " # Merge images (from tuple of 3D tensor to 4D tensor).\n", 114 | " imgs = torch.stack(imgs, 0)\n", 115 | "\n", 116 | " # Merge tags (from tuple of 1D tensor to 2D tensor).\n", 117 | " lengths = [len(tag) for tag in tags]\n", 118 | " targets = torch.zeros(len(tags), max(lengths)).long()\n", 119 | " for i, tag in enumerate(tags):\n", 120 | " end = lengths[i]\n", 121 | " targets[i, :end] = tag[:end] \n", 122 | " return imgs, targets, lengths\n" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 3, 128 | "metadata": { 129 | "ExecuteTime": { 130 | "end_time": "2017-05-08T10:52:32.685184Z", 131 | "start_time": "2017-05-08T10:52:32.682334Z" 132 | }, 133 | "collapsed": true 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],\n", 138 | " std=[0.229, 0.224, 0.225])\n", 139 | "ds_transform_augmented = transforms.Compose([\n", 140 | " transforms.RandomSizedCrop(224),\n", 141 | " PowerPIL(),\n", 142 | " transforms.ToTensor(),\n", 143 | " normalize,\n", 144 | " ])" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 4, 150 | "metadata": { 151 | "ExecuteTime": { 152 | "end_time": "2017-05-08T10:52:32.692774Z", 153 | "start_time": "2017-05-08T10:52:32.686438Z" 154 | } 155 | }, 156 | "outputs": [ 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "{'': 0, '': 1, 'clear': 2, 'cloudy': 3, 'haze': 4, 'partly_cloudy': 5, 'agriculture': 6, 'artisinal_mine': 7, 'bare_ground': 8, 'blooming': 9, 'blow_down': 10, 'conventional_mine': 11, 'cultivation': 12, 'habitation': 13, 'primary': 14, 'road': 15, 'selective_logging': 16, 'slash_burn': 17, 'water': 18}\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "vocab = ['','','clear', 'cloudy', 'haze','partly_cloudy',\n", 167 | " 'agriculture','artisinal_mine','bare_ground','blooming',\n", 168 | " 'blow_down','conventional_mine','cultivation','habitation',\n", 169 | " 'primary','road','selective_logging','slash_burn','water'\n", 170 | " ]\n", 171 | "\n", 172 | "word_to_ix = { word: i for i, word in enumerate(vocab) }\n", 173 | "print(word_to_ix)\n", 174 | "one_hot_mapping = {k:np.eye(19)[v] for k,v in word_to_ix.items()}\n", 175 | "# print(one_hot_mapping)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 5, 181 | "metadata": { 182 | "ExecuteTime": { 183 | "end_time": "2017-05-08T10:52:32.844270Z", 184 | "start_time": "2017-05-08T10:52:32.693843Z" 185 | }, 186 | "collapsed": true 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "X_train = ImgTagsDualFeedDataset('./data/train.csv','./data/train-jpg/','.jpg',\n", 191 | " word_to_ix,\n", 192 | " ds_transform_augmented\n", 193 | " )" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 6, 199 | "metadata": { 200 | "ExecuteTime": { 201 | "end_time": "2017-05-08T10:52:32.848407Z", 202 | "start_time": "2017-05-08T10:52:32.845707Z" 203 | }, 204 | "collapsed": true 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "train_loader = torch.utils.data.DataLoader(dataset=X_train, \n", 209 | " batch_size=64,\n", 210 | " shuffle=True,\n", 211 | " num_workers=4,\n", 212 | " pin_memory = True,\n", 213 | " collate_fn=X_train.collate_fn)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 7, 219 | "metadata": { 220 | "ExecuteTime": { 221 | "end_time": "2017-05-08T10:52:32.919202Z", 222 | "start_time": "2017-05-08T10:52:32.849855Z" 223 | } 224 | }, 225 | "outputs": [ 226 | { 227 | "data": { 228 | "text/plain": [ 229 | "(\n", 230 | " ( 0 ,.,.) = \n", 231 | " -0.8849 -0.8678 -0.7822 ... -1.5699 -1.5699 -1.5528\n", 232 | " -0.8507 -0.8335 -0.7479 ... -1.5699 -1.5699 -1.5699\n", 233 | " -0.7650 -0.7650 -0.6794 ... -1.5699 -1.5870 -1.5870\n", 234 | " ... ⋱ ... \n", 235 | " -1.4500 -1.4500 -1.4329 ... -1.5014 -1.5357 -1.5528\n", 236 | " -1.4500 -1.4500 -1.4329 ... -1.5185 -1.5357 -1.5528\n", 237 | " -1.4500 -1.4500 -1.4329 ... -1.5185 -1.5528 -1.5528\n", 238 | " \n", 239 | " ( 1 ,.,.) = \n", 240 | " -0.7577 -0.7577 -0.7052 ... -1.3704 -1.3529 -1.3529\n", 241 | " -0.7227 -0.7227 -0.6527 ... -1.3704 -1.3529 -1.3529\n", 242 | " -0.6527 -0.6352 -0.5826 ... -1.3704 -1.3529 -1.3529\n", 243 | " ... ⋱ ... \n", 244 | " -1.2304 -1.2304 -1.2129 ... -1.2654 -1.3004 -1.3004\n", 245 | " -1.2654 -1.2654 -1.2479 ... -1.2654 -1.2829 -1.3004\n", 246 | " -1.3004 -1.2829 -1.2654 ... -1.2654 -1.2829 -1.3004\n", 247 | " \n", 248 | " ( 2 ,.,.) = \n", 249 | " -0.7413 -0.7413 -0.7064 ... -1.1944 -1.1770 -1.1596\n", 250 | " -0.7413 -0.7413 -0.6715 ... -1.1944 -1.1770 -1.1770\n", 251 | " -0.7064 -0.6890 -0.6018 ... -1.1596 -1.1770 -1.1770\n", 252 | " ... ⋱ ... \n", 253 | " -1.1073 -1.1073 -1.0724 ... -1.1770 -1.1944 -1.1944\n", 254 | " -1.1247 -1.1247 -1.1073 ... -1.1596 -1.1770 -1.1770\n", 255 | " -1.1421 -1.1247 -1.1247 ... -1.1596 -1.1421 -1.1596\n", 256 | " [torch.FloatTensor of size 3x224x224], \n", 257 | " 0\n", 258 | " 6\n", 259 | " 2\n", 260 | " 14\n", 261 | " 18\n", 262 | " 1\n", 263 | " [torch.FloatTensor of size 6])" 264 | ] 265 | }, 266 | "execution_count": 7, 267 | "metadata": {}, 268 | "output_type": "execute_result" 269 | } 270 | ], 271 | "source": [ 272 | "X_train[1]" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 8, 278 | "metadata": { 279 | "ExecuteTime": { 280 | "end_time": "2017-05-08T10:52:32.960983Z", 281 | "start_time": "2017-05-08T10:52:32.920544Z" 282 | }, 283 | "collapsed": true 284 | }, 285 | "outputs": [], 286 | "source": [ 287 | "class CNN_RNN_Fused(nn.Module):\n", 288 | " def __init__(self, vocab_size, embed_dim, num_rnn_layers):\n", 289 | " super(CNN_RNN_Fused, self).__init__()\n", 290 | " \n", 291 | " ## CNN part\n", 292 | " # Loading ResNet arch from PyTorch and weights from Pycaffe\n", 293 | " original_model = models.resnet50(pretrained=False)\n", 294 | " original_model.load_state_dict(torch.load('./zoo/resnet50.pth'))\n", 295 | " \n", 296 | " # Everything except the last linear layer\n", 297 | " self.convnet = nn.Sequential(*list(original_model.children())[:-1])\n", 298 | " \n", 299 | " # Get number of features of last layer\n", 300 | " num_feats_cnn = original_model.fc.in_features\n", 301 | " \n", 302 | " ## RNN part\n", 303 | " hidden_size = embed_dim # for simplification\n", 304 | " self.vocab_size = vocab_size\n", 305 | " self.embeds = nn.Embedding(vocab_size, embed_dim) # , padding_idx=0 Ignore the (0 in vocab) for gradient\n", 306 | " self.rnn = nn.LSTM(embed_dim, hidden_size, num_rnn_layers, batch_first = True)\n", 307 | " self.num_rnn_layers = num_rnn_layers\n", 308 | " \n", 309 | " ## Projection\n", 310 | " self.prj_cnn = nn.Linear(num_feats_cnn, embed_dim)\n", 311 | " self.prj_rnn = nn.Linear(hidden_size, embed_dim)\n", 312 | " \n", 313 | "\n", 314 | " ## Prediction\n", 315 | " # link embedding and decoding weight\n", 316 | " self.fc = nn.Linear(embed_dim, vocab_size)\n", 317 | " self.fc.weight = self.embeds.weight\n", 318 | " \n", 319 | " \n", 320 | " def forward(self, img, tags, lengths, hidden=None):\n", 321 | " ## CNN\n", 322 | " cnn_feats = self.convnet(img)\n", 323 | " cnn_feats = cnn_feats.view(cnn_feats.size(0), -1)\n", 324 | " cnn_feats = self.prj_cnn(cnn_feats)\n", 325 | " \n", 326 | " tag_ids = []\n", 327 | " embed = self.embeds(tags)\n", 328 | " for _ in tags:\n", 329 | " ## RNN\n", 330 | " rnn_out, hidden = self.rnn(embed, hidden)\n", 331 | "\n", 332 | " ## Projection\n", 333 | " rnn_out = self.prj_rnn(rnn_out[:,0,:]) # Extract the first prediction from sequence\n", 334 | " fuse = cnn_feats + rnn_out\n", 335 | " fuse = self.fc(fuse)\n", 336 | " predicted = fuse.max(1)[1]\n", 337 | " tag_ids.append(predicted)\n", 338 | " packed = self.embeds(predicted)\n", 339 | " tag_ids = torch.cat(tag_ids, 1)\n", 340 | " print(tag_ids)\n", 341 | " return tag_ids.squeeze()\n", 342 | "\n", 343 | " def genTags(self, inputs, states=None):\n", 344 | " tag_ids = []\n", 345 | " inputs = self.embeds(inputs)\n", 346 | " for i in range(self.vocab_size): # maximum sampling length\n", 347 | " hiddens, states = self.rnn(inputs, states) # (batch_size, 1, hidden_size)\n", 348 | " outputs = self.fc(hiddens.squeeze(1)) # (batch_size, vocab_size)\n", 349 | " # outputs = F.softmax(outputs)\n", 350 | " predicted = outputs.max(1)[1]\n", 351 | " tag_ids.append(predicted)\n", 352 | " inputs = self.embeds(predicted)\n", 353 | " tag_ids = torch.cat(tag_ids, 1) # (batch_size, 19)\n", 354 | " return tag_ids.squeeze()" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 9, 360 | "metadata": { 361 | "ExecuteTime": { 362 | "end_time": "2017-05-08T10:52:35.557189Z", 363 | "start_time": "2017-05-08T10:52:32.962267Z" 364 | }, 365 | "collapsed": true 366 | }, 367 | "outputs": [], 368 | "source": [ 369 | "model = CNN_RNN_Fused(19, 5, 2).cuda()" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 10, 375 | "metadata": { 376 | "ExecuteTime": { 377 | "end_time": "2017-05-08T10:52:35.561880Z", 378 | "start_time": "2017-05-08T10:52:35.559070Z" 379 | }, 380 | "collapsed": true 381 | }, 382 | "outputs": [], 383 | "source": [ 384 | "criterion = nn.CrossEntropyLoss()\n", 385 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.01)" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 11, 391 | "metadata": { 392 | "ExecuteTime": { 393 | "end_time": "2017-05-08T10:52:37.039622Z", 394 | "start_time": "2017-05-08T10:52:35.563145Z" 395 | } 396 | }, 397 | "outputs": [ 398 | { 399 | "name": "stdout", 400 | "output_type": "stream", 401 | "text": [ 402 | "Variable containing:\n", 403 | " 10 10 10 ... 10 10 10\n", 404 | " 10 10 10 ... 10 10 10\n", 405 | " 0 0 0 ... 0 0 0\n", 406 | " ... ⋱ ... \n", 407 | " 0 16 16 ... 16 16 16\n", 408 | " 10 10 10 ... 10 10 10\n", 409 | " 10 10 10 ... 10 10 10\n", 410 | "[torch.cuda.LongTensor of size 64x64 (GPU 0)]\n", 411 | "\n", 412 | "Variable containing:\n", 413 | " 0\n", 414 | " 0\n", 415 | " 0\n", 416 | " 0\n", 417 | " 0\n", 418 | " 0\n", 419 | " 0\n", 420 | " 0\n", 421 | " 0\n", 422 | " 0\n", 423 | " 0\n", 424 | " 0\n", 425 | " 0\n", 426 | " 0\n", 427 | " 0\n", 428 | " 0\n", 429 | " 0\n", 430 | " 0\n", 431 | " 0\n", 432 | " 0\n", 433 | " 0\n", 434 | " 0\n", 435 | " 0\n", 436 | " 0\n", 437 | " 0\n", 438 | " 0\n", 439 | " 0\n", 440 | " 0\n", 441 | " 0\n", 442 | " 0\n", 443 | " 0\n", 444 | " 0\n", 445 | " 0\n", 446 | " 0\n", 447 | " 0\n", 448 | " 0\n", 449 | " 0\n", 450 | " 0\n", 451 | " 0\n", 452 | " 0\n", 453 | " 0\n", 454 | " 0\n", 455 | " 0\n", 456 | " 0\n", 457 | " 0\n", 458 | " 0\n", 459 | " 0\n", 460 | " 0\n", 461 | " 0\n", 462 | " 0\n", 463 | " 0\n", 464 | " 0\n", 465 | " 0\n", 466 | " 0\n", 467 | " 0\n", 468 | " 0\n", 469 | " 0\n", 470 | " 0\n", 471 | " 0\n", 472 | " 0\n", 473 | " 0\n", 474 | " 0\n", 475 | " 0\n", 476 | " 0\n", 477 | " 6\n", 478 | " 6\n", 479 | " 6\n", 480 | " 6\n", 481 | " 6\n", 482 | " 6\n", 483 | " 6\n", 484 | " 6\n", 485 | " 6\n", 486 | " 6\n", 487 | " 6\n", 488 | " 7\n", 489 | " 6\n", 490 | " 6\n", 491 | " 6\n", 492 | " 6\n", 493 | " 6\n", 494 | " 2\n", 495 | " 2\n", 496 | " 2\n", 497 | " 2\n", 498 | " 2\n", 499 | " 5\n", 500 | " 2\n", 501 | " 5\n", 502 | " 2\n", 503 | " 5\n", 504 | " 5\n", 505 | " 2\n", 506 | " 4\n", 507 | " 2\n", 508 | " 5\n", 509 | " 2\n", 510 | " 2\n", 511 | " 2\n", 512 | " 2\n", 513 | " 2\n", 514 | " 5\n", 515 | " 2\n", 516 | " 2\n", 517 | " 2\n", 518 | " 2\n", 519 | " 2\n", 520 | " 5\n", 521 | " 5\n", 522 | " 4\n", 523 | " 5\n", 524 | " 2\n", 525 | " 4\n", 526 | " 5\n", 527 | " 5\n", 528 | " 2\n", 529 | " 4\n", 530 | " 4\n", 531 | " 4\n", 532 | " 2\n", 533 | " 4\n", 534 | " 2\n", 535 | " 2\n", 536 | " 3\n", 537 | " 3\n", 538 | " 3\n", 539 | " 3\n", 540 | " 3\n", 541 | " 2\n", 542 | " 8\n", 543 | " 2\n", 544 | " 2\n", 545 | " 2\n", 546 | " 2\n", 547 | " 2\n", 548 | " 5\n", 549 | " 12\n", 550 | " 2\n", 551 | " 2\n", 552 | " 2\n", 553 | " 2\n", 554 | " 2\n", 555 | " 5\n", 556 | " 2\n", 557 | " 4\n", 558 | " 14\n", 559 | " 14\n", 560 | " 14\n", 561 | " 14\n", 562 | " 14\n", 563 | " 14\n", 564 | " 14\n", 565 | " 14\n", 566 | " 14\n", 567 | " 14\n", 568 | " 14\n", 569 | " 14\n", 570 | " 14\n", 571 | " 14\n", 572 | " 14\n", 573 | " 14\n", 574 | " 14\n", 575 | " 14\n", 576 | " 14\n", 577 | " 14\n", 578 | " 14\n", 579 | " 14\n", 580 | " 14\n", 581 | " 14\n", 582 | " 14\n", 583 | " 14\n", 584 | " 14\n", 585 | " 14\n", 586 | " 14\n", 587 | " 14\n", 588 | " 14\n", 589 | " 14\n", 590 | " 14\n", 591 | " 14\n", 592 | " 14\n", 593 | " 14\n", 594 | " 14\n", 595 | " 14\n", 596 | " 14\n", 597 | " 14\n", 598 | " 14\n", 599 | " 14\n", 600 | " 1\n", 601 | " 1\n", 602 | " 1\n", 603 | " 1\n", 604 | " 1\n", 605 | " 12\n", 606 | " 2\n", 607 | " 13\n", 608 | " 14\n", 609 | " 13\n", 610 | " 13\n", 611 | " 14\n", 612 | " 14\n", 613 | " 5\n", 614 | " 12\n", 615 | " 14\n", 616 | " 14\n", 617 | " 12\n", 618 | " 12\n", 619 | " 14\n", 620 | " 14\n", 621 | " 14\n", 622 | " 18\n", 623 | " 18\n", 624 | " 18\n", 625 | " 18\n", 626 | " 18\n", 627 | " 18\n", 628 | " 16\n", 629 | " 15\n", 630 | " 18\n", 631 | " 1\n", 632 | " 1\n", 633 | " 1\n", 634 | " 1\n", 635 | " 1\n", 636 | " 1\n", 637 | " 1\n", 638 | " 1\n", 639 | " 1\n", 640 | " 1\n", 641 | " 1\n", 642 | " 1\n", 643 | " 1\n", 644 | " 1\n", 645 | " 1\n", 646 | " 1\n", 647 | " 1\n", 648 | " 1\n", 649 | " 1\n", 650 | " 1\n", 651 | " 1\n", 652 | " 1\n", 653 | " 1\n", 654 | " 1\n", 655 | " 1\n", 656 | " 1\n", 657 | " 1\n", 658 | " 1\n", 659 | " 1\n", 660 | " 1\n", 661 | " 1\n", 662 | " 1\n", 663 | " 1\n", 664 | " 13\n", 665 | " 13\n", 666 | " 14\n", 667 | " 15\n", 668 | " 14\n", 669 | " 14\n", 670 | " 15\n", 671 | " 15\n", 672 | " 14\n", 673 | " 14\n", 674 | " 18\n", 675 | " 18\n", 676 | " 14\n", 677 | " 14\n", 678 | " 15\n", 679 | " 1\n", 680 | " 1\n", 681 | " 1\n", 682 | " 1\n", 683 | " 1\n", 684 | " 1\n", 685 | " 1\n", 686 | " 1\n", 687 | " 1\n", 688 | " 1\n", 689 | " 1\n", 690 | " 14\n", 691 | " 14\n", 692 | " 15\n", 693 | " 18\n", 694 | " 15\n", 695 | " 15\n", 696 | " 18\n", 697 | " 1\n", 698 | " 1\n", 699 | " 1\n", 700 | " 1\n", 701 | " 1\n", 702 | " 1\n", 703 | " 1\n", 704 | " 1\n", 705 | " 15\n", 706 | " 1\n", 707 | " 1\n", 708 | " 1\n", 709 | " 1\n", 710 | " 1\n", 711 | " 1\n", 712 | " 18\n", 713 | " 1\n", 714 | "[torch.cuda.LongTensor of size 301 (GPU 0)]\n", 715 | "\n" 716 | ] 717 | }, 718 | { 719 | "ename": "KeyError", 720 | "evalue": "", 721 | "output_type": "error", 722 | "traceback": [ 723 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 724 | "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", 725 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;31m# check one tag\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtargets\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtargets\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 726 | "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 206\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 207\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 727 | "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/nn/modules/loss.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input, target)\u001b[0m\n\u001b[1;32m 319\u001b[0m \u001b[0m_assert_no_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 320\u001b[0m return F.cross_entropy(input, target,\n\u001b[0;32m--> 321\u001b[0;31m self.weight, self.size_average)\n\u001b[0m\u001b[1;32m 322\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 323\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 728 | "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mcross_entropy\u001b[0;34m(input, target, weight, size_average)\u001b[0m\n\u001b[1;32m 533\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0meach\u001b[0m \u001b[0mminibatch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 534\u001b[0m \"\"\"\n\u001b[0;32m--> 535\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnll_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlog_softmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msize_average\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 536\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 537\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 729 | "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mlog_softmax\u001b[0;34m(input)\u001b[0m\n\u001b[1;32m 434\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 435\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mlog_softmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 436\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_functions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mthnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLogSoftmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 437\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 438\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 730 | "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/nn/_functions/thnn/auto.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input, *params)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 110\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtype2backend\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 111\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mparam\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 731 | "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/_thnn/__init__.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackends\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 732 | "\u001b[0;31mKeyError\u001b[0m: " 733 | ] 734 | } 735 | ], 736 | "source": [ 737 | "epoch =0\n", 738 | "for batch_idx, (img, tags, lengths) in enumerate(train_loader):\n", 739 | " img = Variable(img).cuda()\n", 740 | " tags = Variable(tags).cuda()\n", 741 | " targets = pack_padded_sequence(tags, lengths, batch_first=True)[0]\n", 742 | "\n", 743 | " \n", 744 | " model.zero_grad()\n", 745 | " \n", 746 | " # Predict one tag at a time\n", 747 | " outputs = model(img, tags, lengths)\n", 748 | " \n", 749 | " # check one tag\n", 750 | " print(targets)\n", 751 | " loss = criterion(outputs, targets)\n", 752 | " loss.backward()\n", 753 | " optimizer.step()\n", 754 | " \n", 755 | " if batch_idx % 100 == 0:\n", 756 | " print('Train Epoch: {:03d} [{:05d}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", 757 | " epoch, batch_idx * len(data), len(train_loader) * len(data),\n", 758 | " 100. * batch_idx / len(train_loader), loss.data[0]))" 759 | ] 760 | }, 761 | { 762 | "cell_type": "code", 763 | "execution_count": null, 764 | "metadata": { 765 | "collapsed": true 766 | }, 767 | "outputs": [], 768 | "source": [] 769 | } 770 | ], 771 | "metadata": { 772 | "kernelspec": { 773 | "display_name": "Python 3", 774 | "language": "python", 775 | "name": "python3" 776 | }, 777 | "language_info": { 778 | "codemirror_mode": { 779 | "name": "ipython", 780 | "version": 3 781 | }, 782 | "file_extension": ".py", 783 | "mimetype": "text/x-python", 784 | "name": "python", 785 | "nbconvert_exporter": "python", 786 | "pygments_lexer": "ipython3", 787 | "version": "3.6.0" 788 | } 789 | }, 790 | "nbformat": 4, 791 | "nbformat_minor": 2 792 | } 793 | -------------------------------------------------------------------------------- /baseline/RNN_experiment_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2017-05-06T18:57:25.579069Z", 9 | "start_time": "2017-05-06T18:57:25.455726Z" 10 | }, 11 | "collapsed": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "from torchvision import models\n", 16 | "from torch.autograd import Variable\n", 17 | "\n", 18 | "## Utilities\n", 19 | "import random\n", 20 | "\n", 21 | "## Libraries\n", 22 | "import numpy as np\n", 23 | "\n", 24 | "\n", 25 | "## Torch\n", 26 | "import torch.optim as optim\n", 27 | "import torch.nn.functional as F\n", 28 | "from torchvision import transforms\n", 29 | "from torch.utils.data import DataLoader\n", 30 | "from torch.utils.data.sampler import SubsetRandomSampler\n", 31 | "import torch\n", 32 | "import torch.nn as nn" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": { 39 | "ExecuteTime": { 40 | "end_time": "2017-05-06T18:57:25.582917Z", 41 | "start_time": "2017-05-06T18:57:25.580329Z" 42 | }, 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "## Normalization on ImageNet mean/std for finetuning\n", 48 | "normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],\n", 49 | " std=[0.229, 0.224, 0.225])\n", 50 | "\n", 51 | "save_dir = './snapshots'\n", 52 | "batch_size = 64" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": { 59 | "ExecuteTime": { 60 | "end_time": "2017-05-06T18:57:26.107434Z", 61 | "start_time": "2017-05-06T18:57:25.584027Z" 62 | }, 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "# Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic)\n", 68 | "torch.manual_seed(1337)\n", 69 | "torch.cuda.manual_seed(1337)\n", 70 | "np.random.seed(1337)\n", 71 | "random.seed(1337)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": { 78 | "ExecuteTime": { 79 | "end_time": "2017-05-06T18:57:26.114413Z", 80 | "start_time": "2017-05-06T18:57:26.109005Z" 81 | }, 82 | "collapsed": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "## Normalization only for validation and test\n", 87 | "ds_transform_raw = transforms.Compose([\n", 88 | " transforms.CenterCrop(224),\n", 89 | " transforms.ToTensor(),\n", 90 | " normalize\n", 91 | " ])" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 57, 97 | "metadata": { 98 | "ExecuteTime": { 99 | "end_time": "2017-05-06T19:12:06.896968Z", 100 | "start_time": "2017-05-06T19:12:06.885442Z" 101 | }, 102 | "collapsed": true 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "# Load model from best iteration\n", 107 | "from src.p_neuro import ResNet50\n", 108 | "\n", 109 | "class CNN(nn.Module):\n", 110 | " ## We use ResNet weights from PyCaffe.\n", 111 | " def __init__(self, embed_size):\n", 112 | " super(CNN, self).__init__()\n", 113 | " \n", 114 | " # Loading pretrained ResNet as feature extractor\n", 115 | " original_model = ResNet50(17)\n", 116 | " model_path = './snapshots/2017-05-06_1235-cloud-habitation-PowerPIL-model_best.pth'\n", 117 | " checkpoint = torch.load(model_path)\n", 118 | " original_model.load_state_dict(checkpoint['state_dict'])\n", 119 | " \n", 120 | " # Everything except the last linear layer\n", 121 | " self.features = nn.Sequential(*list(original_model.children())[:-1])\n", 122 | " \n", 123 | " # Freeze those weights\n", 124 | " for p in self.features.parameters():\n", 125 | " p.requires_grad = False\n", 126 | "\n", 127 | " # Get number of features of last layer\n", 128 | " num_feats = original_model.classifier[0].in_features\n", 129 | " \n", 130 | " self.fc = nn.Linear(num_feats, embed_size)\n", 131 | " self.bn = nn.BatchNorm1d(embed_size, momentum=0.01)\n", 132 | "\n", 133 | " def forward(self, x):\n", 134 | " f = self.features(x)\n", 135 | " f = f.view(f.size(0), -1)\n", 136 | " out = self.fc(f)\n", 137 | " out = self.bn(out)\n", 138 | " return out" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 58, 144 | "metadata": { 145 | "ExecuteTime": { 146 | "end_time": "2017-05-06T19:12:08.919347Z", 147 | "start_time": "2017-05-06T19:12:07.431937Z" 148 | }, 149 | "collapsed": true 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "encoderCNN = CNN(2048).cuda()" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 59, 159 | "metadata": { 160 | "ExecuteTime": { 161 | "end_time": "2017-05-06T19:12:08.922481Z", 162 | "start_time": "2017-05-06T19:12:08.920546Z" 163 | }, 164 | "collapsed": true 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "from torch.nn.init import kaiming_normal" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 94, 174 | "metadata": { 175 | "ExecuteTime": { 176 | "end_time": "2017-05-06T19:37:08.514664Z", 177 | "start_time": "2017-05-06T19:37:08.507521Z" 178 | }, 179 | "collapsed": true 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "class DecoderRNN(nn.Module):\n", 184 | " def __init__(self, num_feats, num_classes, hidden_size, num_layers):\n", 185 | " super(DecoderRNN, self).__init__()\n", 186 | " self.rnn = nn.GRU(input_size=num_feats,\n", 187 | " hidden_size=hidden_size,\n", 188 | " num_layers=num_layers,\n", 189 | " batch_first = True)\n", 190 | " self.classifier = nn.Linear(hidden_size, num_classes)\n", 191 | " self.hidden_size = hidden_size\n", 192 | " \n", 193 | " # Init of last layer\n", 194 | " kaiming_normal(self.classifier.weight)\n", 195 | " \n", 196 | "\n", 197 | " def forward(self, feats, hidden=None):\n", 198 | " x, hidden = self.rnn(feats.unsqueeze(1), hidden)\n", 199 | " x = x.view(-1, self.hidden_size)\n", 200 | " x = self.classifier(x)\n", 201 | " return x" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 95, 207 | "metadata": { 208 | "ExecuteTime": { 209 | "end_time": "2017-05-06T19:37:09.236010Z", 210 | "start_time": "2017-05-06T19:37:09.227380Z" 211 | }, 212 | "collapsed": true 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "decoderRNN = DecoderRNN(2048, 17, 64, 10).cuda()" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 96, 222 | "metadata": { 223 | "ExecuteTime": { 224 | "end_time": "2017-05-06T19:37:09.888453Z", 225 | "start_time": "2017-05-06T19:37:09.883933Z" 226 | }, 227 | "collapsed": true 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "## Normalization on ImageNet mean/std for finetuning\n", 232 | "normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],\n", 233 | " std=[0.229, 0.224, 0.225])\n", 234 | "\n", 235 | "# Note, p_training has lr_decay automated\n", 236 | "optimizer = optim.SGD(decoderRNN.parameters(), lr=1e-1, momentum=0.9) # Finetuning whole model\n", 237 | "\n", 238 | "# criterion = ConvolutedLoss()\n", 239 | "criterion = torch.nn.MultiLabelSoftMarginLoss(\n", 240 | " weight = torch.from_numpy(\n", 241 | " 1/np.array([1, 3, 2, 1,\n", 242 | " 1, 3, 2, 3,\n", 243 | " 4, 4, 1, 2,\n", 244 | " 1, 1, 3, 4, 1])\n", 245 | " )).float().cuda()" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 97, 251 | "metadata": { 252 | "ExecuteTime": { 253 | "end_time": "2017-05-06T19:37:10.454299Z", 254 | "start_time": "2017-05-06T19:37:10.451752Z" 255 | }, 256 | "collapsed": true 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "from src.p_data_augmentation import PowerPIL\n", 261 | "from src.p2_dataload import KaggleAmazonDataset\n", 262 | "from src.p_model_selection import train_valid_split\n", 263 | "from src.p_sampler import SubsetSampler, balance_weights" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 98, 269 | "metadata": { 270 | "ExecuteTime": { 271 | "end_time": "2017-05-06T19:37:11.149969Z", 272 | "start_time": "2017-05-06T19:37:10.773470Z" 273 | }, 274 | "collapsed": true 275 | }, 276 | "outputs": [], 277 | "source": [ 278 | "# Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic)\n", 279 | "torch.manual_seed(1337)\n", 280 | "torch.cuda.manual_seed(1337)\n", 281 | "np.random.seed(1337)\n", 282 | "random.seed(1337)\n", 283 | "\n", 284 | "##############################################################\n", 285 | "## Loading the dataset\n", 286 | "\n", 287 | "## Augmentation + Normalization for full training\n", 288 | "ds_transform_augmented = transforms.Compose([\n", 289 | " transforms.RandomSizedCrop(224),\n", 290 | " PowerPIL(),\n", 291 | " transforms.ToTensor(),\n", 292 | " normalize\n", 293 | "])\n", 294 | "\n", 295 | "## Normalization only for validation and test\n", 296 | "ds_transform_raw = transforms.Compose([\n", 297 | " transforms.Scale(224),\n", 298 | " transforms.ToTensor(),\n", 299 | " normalize\n", 300 | " ])\n", 301 | "\n", 302 | "#### ######### ######## ########### #####\n", 303 | "\n", 304 | "X_train = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg',\n", 305 | " ds_transform_augmented\n", 306 | " )\n", 307 | "X_val = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg',\n", 308 | " ds_transform_raw\n", 309 | " )\n", 310 | "\n", 311 | "# Creating a validation split\n", 312 | "train_idx, valid_idx = train_valid_split(X_train, 0.2)\n", 313 | "\n", 314 | "train_sampler = SubsetRandomSampler(train_idx)\n", 315 | "valid_sampler = SubsetSampler(valid_idx)\n", 316 | "\n", 317 | "###### ########## ########## ######## #########\n", 318 | "\n", 319 | "# Both dataloader loads from the same dataset but with different indices\n", 320 | "train_loader = DataLoader(X_train,\n", 321 | " batch_size=batch_size,\n", 322 | " sampler=train_sampler,\n", 323 | " num_workers=4,\n", 324 | " pin_memory=True)\n", 325 | "\n", 326 | "valid_loader = DataLoader(X_val,\n", 327 | " batch_size=batch_size,\n", 328 | " sampler=valid_sampler,\n", 329 | " num_workers=4,\n", 330 | " pin_memory=True)" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 99, 336 | "metadata": { 337 | "ExecuteTime": { 338 | "end_time": "2017-05-06T19:37:11.358273Z", 339 | "start_time": "2017-05-06T19:37:11.352047Z" 340 | }, 341 | "collapsed": true 342 | }, 343 | "outputs": [], 344 | "source": [ 345 | "def train(epoch, train_loader, encoder, decoder, criterion, optimizer):\n", 346 | " encoder.eval()\n", 347 | " decoder.train()\n", 348 | " for batch_idx, (data, target) in enumerate(train_loader):\n", 349 | " data, target = data.cuda(async=True), target.cuda(async=True) # On GPU\n", 350 | " data, target = Variable(data), Variable(target, requires_grad=False)\n", 351 | " optimizer.zero_grad()\n", 352 | " encoded = encoder(data)\n", 353 | " output = decoder(encoded)\n", 354 | " loss = criterion(output, target)\n", 355 | " loss.backward()\n", 356 | " optimizer.step()\n", 357 | " if batch_idx % 10 == 0:\n", 358 | " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", 359 | " epoch, batch_idx * len(data), len(train_loader.dataset),\n", 360 | " 100. * batch_idx / len(train_loader), loss.data[0]))" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 100, 366 | "metadata": { 367 | "ExecuteTime": { 368 | "end_time": "2017-05-06T19:51:05.592592Z", 369 | "start_time": "2017-05-06T19:37:12.612141Z" 370 | } 371 | }, 372 | "outputs": [ 373 | { 374 | "name": "stdout", 375 | "output_type": "stream", 376 | "text": [ 377 | "Train Epoch: 1 [0/40479 (0%)]\tLoss: 0.435556\n", 378 | "Train Epoch: 1 [640/40479 (2%)]\tLoss: 0.409556\n", 379 | "Train Epoch: 1 [1280/40479 (4%)]\tLoss: 0.369687\n", 380 | "Train Epoch: 1 [1920/40479 (6%)]\tLoss: 0.339583\n", 381 | "Train Epoch: 1 [2560/40479 (8%)]\tLoss: 0.303115\n", 382 | "Train Epoch: 1 [3200/40479 (10%)]\tLoss: 0.287817\n", 383 | "Train Epoch: 1 [3840/40479 (12%)]\tLoss: 0.251417\n", 384 | "Train Epoch: 1 [4480/40479 (14%)]\tLoss: 0.244486\n", 385 | "Train Epoch: 1 [5120/40479 (16%)]\tLoss: 0.251688\n", 386 | "Train Epoch: 1 [5760/40479 (18%)]\tLoss: 0.218834\n", 387 | "Train Epoch: 1 [6400/40479 (20%)]\tLoss: 0.228922\n", 388 | "Train Epoch: 1 [7040/40479 (22%)]\tLoss: 0.257038\n", 389 | "Train Epoch: 1 [7680/40479 (24%)]\tLoss: 0.240509\n", 390 | "Train Epoch: 1 [8320/40479 (26%)]\tLoss: 0.249612\n", 391 | "Train Epoch: 1 [8960/40479 (28%)]\tLoss: 0.212539\n", 392 | "Train Epoch: 1 [9600/40479 (30%)]\tLoss: 0.213244\n", 393 | "Train Epoch: 1 [10240/40479 (32%)]\tLoss: 0.232863\n", 394 | "Train Epoch: 1 [10880/40479 (34%)]\tLoss: 0.229000\n", 395 | "Train Epoch: 1 [11520/40479 (36%)]\tLoss: 0.217483\n", 396 | "Train Epoch: 1 [12160/40479 (38%)]\tLoss: 0.240352\n", 397 | "Train Epoch: 1 [12800/40479 (40%)]\tLoss: 0.228196\n", 398 | "Train Epoch: 1 [13440/40479 (42%)]\tLoss: 0.215130\n", 399 | "Train Epoch: 1 [14080/40479 (43%)]\tLoss: 0.215860\n", 400 | "Train Epoch: 1 [14720/40479 (45%)]\tLoss: 0.209214\n", 401 | "Train Epoch: 1 [15360/40479 (47%)]\tLoss: 0.213253\n", 402 | "Train Epoch: 1 [16000/40479 (49%)]\tLoss: 0.215986\n", 403 | "Train Epoch: 1 [16640/40479 (51%)]\tLoss: 0.204092\n", 404 | "Train Epoch: 1 [17280/40479 (53%)]\tLoss: 0.217447\n", 405 | "Train Epoch: 1 [17920/40479 (55%)]\tLoss: 0.224149\n", 406 | "Train Epoch: 1 [18560/40479 (57%)]\tLoss: 0.208092\n", 407 | "Train Epoch: 1 [19200/40479 (59%)]\tLoss: 0.208012\n", 408 | "Train Epoch: 1 [19840/40479 (61%)]\tLoss: 0.228090\n", 409 | "Train Epoch: 1 [20480/40479 (63%)]\tLoss: 0.227201\n", 410 | "Train Epoch: 1 [21120/40479 (65%)]\tLoss: 0.218654\n", 411 | "Train Epoch: 1 [21760/40479 (67%)]\tLoss: 0.245048\n", 412 | "Train Epoch: 1 [22400/40479 (69%)]\tLoss: 0.210235\n", 413 | "Train Epoch: 1 [23040/40479 (71%)]\tLoss: 0.247935\n", 414 | "Train Epoch: 1 [23680/40479 (73%)]\tLoss: 0.236987\n", 415 | "Train Epoch: 1 [24320/40479 (75%)]\tLoss: 0.216959\n", 416 | "Train Epoch: 1 [24960/40479 (77%)]\tLoss: 0.235669\n", 417 | "Train Epoch: 1 [25600/40479 (79%)]\tLoss: 0.226261\n", 418 | "Train Epoch: 1 [26240/40479 (81%)]\tLoss: 0.234269\n", 419 | "Train Epoch: 1 [26880/40479 (83%)]\tLoss: 0.218615\n", 420 | "Train Epoch: 1 [27520/40479 (85%)]\tLoss: 0.207581\n", 421 | "Train Epoch: 1 [28160/40479 (87%)]\tLoss: 0.211591\n", 422 | "Train Epoch: 1 [28800/40479 (89%)]\tLoss: 0.204346\n", 423 | "Train Epoch: 1 [29440/40479 (91%)]\tLoss: 0.221209\n", 424 | "Train Epoch: 1 [30080/40479 (93%)]\tLoss: 0.231252\n", 425 | "Train Epoch: 1 [30720/40479 (95%)]\tLoss: 0.233583\n", 426 | "Train Epoch: 1 [31360/40479 (97%)]\tLoss: 0.213050\n", 427 | "Train Epoch: 1 [32000/40479 (99%)]\tLoss: 0.208800\n", 428 | "Train Epoch: 2 [0/40479 (0%)]\tLoss: 0.221830\n", 429 | "Train Epoch: 2 [640/40479 (2%)]\tLoss: 0.226345\n", 430 | "Train Epoch: 2 [1280/40479 (4%)]\tLoss: 0.236332\n", 431 | "Train Epoch: 2 [1920/40479 (6%)]\tLoss: 0.220857\n", 432 | "Train Epoch: 2 [2560/40479 (8%)]\tLoss: 0.199616\n", 433 | "Train Epoch: 2 [3200/40479 (10%)]\tLoss: 0.217765\n", 434 | "Train Epoch: 2 [3840/40479 (12%)]\tLoss: 0.224895\n", 435 | "Train Epoch: 2 [4480/40479 (14%)]\tLoss: 0.192970\n", 436 | "Train Epoch: 2 [5120/40479 (16%)]\tLoss: 0.223005\n", 437 | "Train Epoch: 2 [5760/40479 (18%)]\tLoss: 0.221301\n", 438 | "Train Epoch: 2 [6400/40479 (20%)]\tLoss: 0.232181\n", 439 | "Train Epoch: 2 [7040/40479 (22%)]\tLoss: 0.223385\n", 440 | "Train Epoch: 2 [7680/40479 (24%)]\tLoss: 0.221813\n", 441 | "Train Epoch: 2 [8320/40479 (26%)]\tLoss: 0.224682\n", 442 | "Train Epoch: 2 [8960/40479 (28%)]\tLoss: 0.222579\n", 443 | "Train Epoch: 2 [9600/40479 (30%)]\tLoss: 0.200874\n", 444 | "Train Epoch: 2 [10240/40479 (32%)]\tLoss: 0.214944\n", 445 | "Train Epoch: 2 [10880/40479 (34%)]\tLoss: 0.207476\n", 446 | "Train Epoch: 2 [11520/40479 (36%)]\tLoss: 0.220802\n", 447 | "Train Epoch: 2 [12160/40479 (38%)]\tLoss: 0.221033\n", 448 | "Train Epoch: 2 [12800/40479 (40%)]\tLoss: 0.197412\n", 449 | "Train Epoch: 2 [13440/40479 (42%)]\tLoss: 0.231282\n", 450 | "Train Epoch: 2 [14080/40479 (43%)]\tLoss: 0.219897\n", 451 | "Train Epoch: 2 [14720/40479 (45%)]\tLoss: 0.215508\n", 452 | "Train Epoch: 2 [15360/40479 (47%)]\tLoss: 0.213763\n", 453 | "Train Epoch: 2 [16000/40479 (49%)]\tLoss: 0.226117\n", 454 | "Train Epoch: 2 [16640/40479 (51%)]\tLoss: 0.209263\n", 455 | "Train Epoch: 2 [17280/40479 (53%)]\tLoss: 0.221371\n", 456 | "Train Epoch: 2 [17920/40479 (55%)]\tLoss: 0.203588\n", 457 | "Train Epoch: 2 [18560/40479 (57%)]\tLoss: 0.208028\n", 458 | "Train Epoch: 2 [19200/40479 (59%)]\tLoss: 0.240881\n", 459 | "Train Epoch: 2 [19840/40479 (61%)]\tLoss: 0.219481\n", 460 | "Train Epoch: 2 [20480/40479 (63%)]\tLoss: 0.224081\n", 461 | "Train Epoch: 2 [21120/40479 (65%)]\tLoss: 0.226633\n", 462 | "Train Epoch: 2 [21760/40479 (67%)]\tLoss: 0.209731\n", 463 | "Train Epoch: 2 [22400/40479 (69%)]\tLoss: 0.196602\n", 464 | "Train Epoch: 2 [23040/40479 (71%)]\tLoss: 0.224780\n", 465 | "Train Epoch: 2 [23680/40479 (73%)]\tLoss: 0.221504\n", 466 | "Train Epoch: 2 [24320/40479 (75%)]\tLoss: 0.196250\n", 467 | "Train Epoch: 2 [24960/40479 (77%)]\tLoss: 0.209817\n", 468 | "Train Epoch: 2 [25600/40479 (79%)]\tLoss: 0.215482\n", 469 | "Train Epoch: 2 [26240/40479 (81%)]\tLoss: 0.245271\n", 470 | "Train Epoch: 2 [26880/40479 (83%)]\tLoss: 0.240716\n", 471 | "Train Epoch: 2 [27520/40479 (85%)]\tLoss: 0.234947\n", 472 | "Train Epoch: 2 [28160/40479 (87%)]\tLoss: 0.207777\n", 473 | "Train Epoch: 2 [28800/40479 (89%)]\tLoss: 0.217220\n", 474 | "Train Epoch: 2 [29440/40479 (91%)]\tLoss: 0.225086\n", 475 | "Train Epoch: 2 [30080/40479 (93%)]\tLoss: 0.200565\n", 476 | "Train Epoch: 2 [30720/40479 (95%)]\tLoss: 0.245808\n", 477 | "Train Epoch: 2 [31360/40479 (97%)]\tLoss: 0.217046\n", 478 | "Train Epoch: 2 [32000/40479 (99%)]\tLoss: 0.228471\n", 479 | "Train Epoch: 3 [0/40479 (0%)]\tLoss: 0.251557\n", 480 | "Train Epoch: 3 [640/40479 (2%)]\tLoss: 0.232586\n", 481 | "Train Epoch: 3 [1280/40479 (4%)]\tLoss: 0.202972\n", 482 | "Train Epoch: 3 [1920/40479 (6%)]\tLoss: 0.223908\n", 483 | "Train Epoch: 3 [2560/40479 (8%)]\tLoss: 0.201460\n", 484 | "Train Epoch: 3 [3200/40479 (10%)]\tLoss: 0.214049\n", 485 | "Train Epoch: 3 [3840/40479 (12%)]\tLoss: 0.208666\n", 486 | "Train Epoch: 3 [4480/40479 (14%)]\tLoss: 0.195132\n", 487 | "Train Epoch: 3 [5120/40479 (16%)]\tLoss: 0.253805\n", 488 | "Train Epoch: 3 [5760/40479 (18%)]\tLoss: 0.200275\n", 489 | "Train Epoch: 3 [6400/40479 (20%)]\tLoss: 0.211788\n", 490 | "Train Epoch: 3 [7040/40479 (22%)]\tLoss: 0.223072\n", 491 | "Train Epoch: 3 [7680/40479 (24%)]\tLoss: 0.238602\n", 492 | "Train Epoch: 3 [8320/40479 (26%)]\tLoss: 0.219171\n", 493 | "Train Epoch: 3 [8960/40479 (28%)]\tLoss: 0.233595\n", 494 | "Train Epoch: 3 [9600/40479 (30%)]\tLoss: 0.215720\n", 495 | "Train Epoch: 3 [10240/40479 (32%)]\tLoss: 0.237069\n", 496 | "Train Epoch: 3 [10880/40479 (34%)]\tLoss: 0.219968\n", 497 | "Train Epoch: 3 [11520/40479 (36%)]\tLoss: 0.210888\n", 498 | "Train Epoch: 3 [12160/40479 (38%)]\tLoss: 0.251297\n", 499 | "Train Epoch: 3 [12800/40479 (40%)]\tLoss: 0.202935\n", 500 | "Train Epoch: 3 [13440/40479 (42%)]\tLoss: 0.212312\n", 501 | "Train Epoch: 3 [14080/40479 (43%)]\tLoss: 0.232535\n", 502 | "Train Epoch: 3 [14720/40479 (45%)]\tLoss: 0.229978\n", 503 | "Train Epoch: 3 [15360/40479 (47%)]\tLoss: 0.226644\n", 504 | "Train Epoch: 3 [16000/40479 (49%)]\tLoss: 0.222986\n", 505 | "Train Epoch: 3 [16640/40479 (51%)]\tLoss: 0.216303\n", 506 | "Train Epoch: 3 [17280/40479 (53%)]\tLoss: 0.214122\n", 507 | "Train Epoch: 3 [17920/40479 (55%)]\tLoss: 0.226599\n", 508 | "Train Epoch: 3 [18560/40479 (57%)]\tLoss: 0.245155\n", 509 | "Train Epoch: 3 [19200/40479 (59%)]\tLoss: 0.217142\n", 510 | "Train Epoch: 3 [19840/40479 (61%)]\tLoss: 0.237862\n", 511 | "Train Epoch: 3 [20480/40479 (63%)]\tLoss: 0.210611\n", 512 | "Train Epoch: 3 [21120/40479 (65%)]\tLoss: 0.210613\n", 513 | "Train Epoch: 3 [21760/40479 (67%)]\tLoss: 0.220819\n", 514 | "Train Epoch: 3 [22400/40479 (69%)]\tLoss: 0.206627\n", 515 | "Train Epoch: 3 [23040/40479 (71%)]\tLoss: 0.218339\n", 516 | "Train Epoch: 3 [23680/40479 (73%)]\tLoss: 0.211441\n", 517 | "Train Epoch: 3 [24320/40479 (75%)]\tLoss: 0.205327\n", 518 | "Train Epoch: 3 [24960/40479 (77%)]\tLoss: 0.204353\n", 519 | "Train Epoch: 3 [25600/40479 (79%)]\tLoss: 0.206513\n", 520 | "Train Epoch: 3 [26240/40479 (81%)]\tLoss: 0.217377\n", 521 | "Train Epoch: 3 [26880/40479 (83%)]\tLoss: 0.227448\n", 522 | "Train Epoch: 3 [27520/40479 (85%)]\tLoss: 0.204768\n", 523 | "Train Epoch: 3 [28160/40479 (87%)]\tLoss: 0.212927\n", 524 | "Train Epoch: 3 [28800/40479 (89%)]\tLoss: 0.238617\n", 525 | "Train Epoch: 3 [29440/40479 (91%)]\tLoss: 0.206868\n", 526 | "Train Epoch: 3 [30080/40479 (93%)]\tLoss: 0.224896\n", 527 | "Train Epoch: 3 [30720/40479 (95%)]\tLoss: 0.215602\n", 528 | "Train Epoch: 3 [31360/40479 (97%)]\tLoss: 0.230982\n", 529 | "Train Epoch: 3 [32000/40479 (99%)]\tLoss: 0.221313\n", 530 | "Train Epoch: 4 [0/40479 (0%)]\tLoss: 0.209418\n", 531 | "Train Epoch: 4 [640/40479 (2%)]\tLoss: 0.218788\n", 532 | "Train Epoch: 4 [1280/40479 (4%)]\tLoss: 0.244478\n", 533 | "Train Epoch: 4 [1920/40479 (6%)]\tLoss: 0.213261\n", 534 | "Train Epoch: 4 [2560/40479 (8%)]\tLoss: 0.205852\n", 535 | "Train Epoch: 4 [3200/40479 (10%)]\tLoss: 0.238808\n", 536 | "Train Epoch: 4 [3840/40479 (12%)]\tLoss: 0.210391\n", 537 | "Train Epoch: 4 [4480/40479 (14%)]\tLoss: 0.228555\n", 538 | "Train Epoch: 4 [5120/40479 (16%)]\tLoss: 0.202205\n", 539 | "Train Epoch: 4 [5760/40479 (18%)]\tLoss: 0.244985\n", 540 | "Train Epoch: 4 [6400/40479 (20%)]\tLoss: 0.225250\n", 541 | "Train Epoch: 4 [7040/40479 (22%)]\tLoss: 0.228128\n", 542 | "Train Epoch: 4 [7680/40479 (24%)]\tLoss: 0.201752\n" 543 | ] 544 | }, 545 | { 546 | "name": "stdout", 547 | "output_type": "stream", 548 | "text": [ 549 | "Train Epoch: 4 [8320/40479 (26%)]\tLoss: 0.192985\n", 550 | "Train Epoch: 4 [8960/40479 (28%)]\tLoss: 0.232153\n", 551 | "Train Epoch: 4 [9600/40479 (30%)]\tLoss: 0.209456\n", 552 | "Train Epoch: 4 [10240/40479 (32%)]\tLoss: 0.212727\n", 553 | "Train Epoch: 4 [10880/40479 (34%)]\tLoss: 0.218378\n", 554 | "Train Epoch: 4 [11520/40479 (36%)]\tLoss: 0.216620\n", 555 | "Train Epoch: 4 [12160/40479 (38%)]\tLoss: 0.208027\n", 556 | "Train Epoch: 4 [12800/40479 (40%)]\tLoss: 0.234140\n", 557 | "Train Epoch: 4 [13440/40479 (42%)]\tLoss: 0.216869\n", 558 | "Train Epoch: 4 [14080/40479 (43%)]\tLoss: 0.192126\n", 559 | "Train Epoch: 4 [14720/40479 (45%)]\tLoss: 0.213921\n", 560 | "Train Epoch: 4 [15360/40479 (47%)]\tLoss: 0.208655\n", 561 | "Train Epoch: 4 [16000/40479 (49%)]\tLoss: 0.221401\n", 562 | "Train Epoch: 4 [16640/40479 (51%)]\tLoss: 0.226007\n", 563 | "Train Epoch: 4 [17280/40479 (53%)]\tLoss: 0.245658\n", 564 | "Train Epoch: 4 [17920/40479 (55%)]\tLoss: 0.216689\n", 565 | "Train Epoch: 4 [18560/40479 (57%)]\tLoss: 0.204786\n", 566 | "Train Epoch: 4 [19200/40479 (59%)]\tLoss: 0.231551\n", 567 | "Train Epoch: 4 [19840/40479 (61%)]\tLoss: 0.233882\n", 568 | "Train Epoch: 4 [20480/40479 (63%)]\tLoss: 0.237672\n", 569 | "Train Epoch: 4 [21120/40479 (65%)]\tLoss: 0.229086\n", 570 | "Train Epoch: 4 [21760/40479 (67%)]\tLoss: 0.223865\n", 571 | "Train Epoch: 4 [22400/40479 (69%)]\tLoss: 0.223414\n", 572 | "Train Epoch: 4 [23040/40479 (71%)]\tLoss: 0.232349\n", 573 | "Train Epoch: 4 [23680/40479 (73%)]\tLoss: 0.221572\n", 574 | "Train Epoch: 4 [24320/40479 (75%)]\tLoss: 0.216628\n", 575 | "Train Epoch: 4 [24960/40479 (77%)]\tLoss: 0.214147\n", 576 | "Train Epoch: 4 [25600/40479 (79%)]\tLoss: 0.225763\n", 577 | "Train Epoch: 4 [26240/40479 (81%)]\tLoss: 0.213933\n", 578 | "Train Epoch: 4 [26880/40479 (83%)]\tLoss: 0.208559\n", 579 | "Train Epoch: 4 [27520/40479 (85%)]\tLoss: 0.212612\n", 580 | "Train Epoch: 4 [28160/40479 (87%)]\tLoss: 0.209097\n", 581 | "Train Epoch: 4 [28800/40479 (89%)]\tLoss: 0.228192\n", 582 | "Train Epoch: 4 [29440/40479 (91%)]\tLoss: 0.221341\n", 583 | "Train Epoch: 4 [30080/40479 (93%)]\tLoss: 0.219709\n", 584 | "Train Epoch: 4 [30720/40479 (95%)]\tLoss: 0.216520\n", 585 | "Train Epoch: 4 [31360/40479 (97%)]\tLoss: 0.206181\n", 586 | "Train Epoch: 4 [32000/40479 (99%)]\tLoss: 0.200449\n", 587 | "Train Epoch: 5 [0/40479 (0%)]\tLoss: 0.202878\n", 588 | "Train Epoch: 5 [640/40479 (2%)]\tLoss: 0.223886\n", 589 | "Train Epoch: 5 [1280/40479 (4%)]\tLoss: 0.232988\n", 590 | "Train Epoch: 5 [1920/40479 (6%)]\tLoss: 0.212176\n", 591 | "Train Epoch: 5 [2560/40479 (8%)]\tLoss: 0.208125\n", 592 | "Train Epoch: 5 [3200/40479 (10%)]\tLoss: 0.219078\n", 593 | "Train Epoch: 5 [3840/40479 (12%)]\tLoss: 0.197650\n", 594 | "Train Epoch: 5 [4480/40479 (14%)]\tLoss: 0.218611\n", 595 | "Train Epoch: 5 [5120/40479 (16%)]\tLoss: 0.194874\n", 596 | "Train Epoch: 5 [5760/40479 (18%)]\tLoss: 0.238438\n", 597 | "Train Epoch: 5 [6400/40479 (20%)]\tLoss: 0.204757\n", 598 | "Train Epoch: 5 [7040/40479 (22%)]\tLoss: 0.207183\n", 599 | "Train Epoch: 5 [7680/40479 (24%)]\tLoss: 0.205667\n", 600 | "Train Epoch: 5 [8320/40479 (26%)]\tLoss: 0.218935\n", 601 | "Train Epoch: 5 [8960/40479 (28%)]\tLoss: 0.214910\n", 602 | "Train Epoch: 5 [9600/40479 (30%)]\tLoss: 0.245063\n", 603 | "Train Epoch: 5 [10240/40479 (32%)]\tLoss: 0.212979\n", 604 | "Train Epoch: 5 [10880/40479 (34%)]\tLoss: 0.214062\n", 605 | "Train Epoch: 5 [11520/40479 (36%)]\tLoss: 0.214615\n", 606 | "Train Epoch: 5 [12160/40479 (38%)]\tLoss: 0.208764\n", 607 | "Train Epoch: 5 [12800/40479 (40%)]\tLoss: 0.216662\n", 608 | "Train Epoch: 5 [13440/40479 (42%)]\tLoss: 0.225370\n", 609 | "Train Epoch: 5 [14080/40479 (43%)]\tLoss: 0.225904\n", 610 | "Train Epoch: 5 [14720/40479 (45%)]\tLoss: 0.237777\n", 611 | "Train Epoch: 5 [15360/40479 (47%)]\tLoss: 0.220133\n", 612 | "Train Epoch: 5 [16000/40479 (49%)]\tLoss: 0.230607\n", 613 | "Train Epoch: 5 [16640/40479 (51%)]\tLoss: 0.222831\n", 614 | "Train Epoch: 5 [17280/40479 (53%)]\tLoss: 0.246762\n", 615 | "Train Epoch: 5 [17920/40479 (55%)]\tLoss: 0.230246\n", 616 | "Train Epoch: 5 [18560/40479 (57%)]\tLoss: 0.216454\n", 617 | "Train Epoch: 5 [19200/40479 (59%)]\tLoss: 0.229916\n", 618 | "Train Epoch: 5 [19840/40479 (61%)]\tLoss: 0.201469\n", 619 | "Train Epoch: 5 [20480/40479 (63%)]\tLoss: 0.197106\n", 620 | "Train Epoch: 5 [21120/40479 (65%)]\tLoss: 0.205562\n", 621 | "Train Epoch: 5 [21760/40479 (67%)]\tLoss: 0.213962\n", 622 | "Train Epoch: 5 [22400/40479 (69%)]\tLoss: 0.211499\n", 623 | "Train Epoch: 5 [23040/40479 (71%)]\tLoss: 0.205921\n", 624 | "Train Epoch: 5 [23680/40479 (73%)]\tLoss: 0.224541\n", 625 | "Train Epoch: 5 [24320/40479 (75%)]\tLoss: 0.203990\n", 626 | "Train Epoch: 5 [24960/40479 (77%)]\tLoss: 0.200465\n", 627 | "Train Epoch: 5 [25600/40479 (79%)]\tLoss: 0.233774\n", 628 | "Train Epoch: 5 [26240/40479 (81%)]\tLoss: 0.238049\n", 629 | "Train Epoch: 5 [26880/40479 (83%)]\tLoss: 0.215357\n", 630 | "Train Epoch: 5 [27520/40479 (85%)]\tLoss: 0.239355\n", 631 | "Train Epoch: 5 [28160/40479 (87%)]\tLoss: 0.206513\n", 632 | "Train Epoch: 5 [28800/40479 (89%)]\tLoss: 0.223596\n", 633 | "Train Epoch: 5 [29440/40479 (91%)]\tLoss: 0.219268\n", 634 | "Train Epoch: 5 [30080/40479 (93%)]\tLoss: 0.217147\n", 635 | "Train Epoch: 5 [30720/40479 (95%)]\tLoss: 0.222204\n", 636 | "Train Epoch: 5 [31360/40479 (97%)]\tLoss: 0.244053\n", 637 | "Train Epoch: 5 [32000/40479 (99%)]\tLoss: 0.200348\n", 638 | "Train Epoch: 6 [0/40479 (0%)]\tLoss: 0.221157\n", 639 | "Train Epoch: 6 [640/40479 (2%)]\tLoss: 0.252395\n", 640 | "Train Epoch: 6 [1280/40479 (4%)]\tLoss: 0.241545\n", 641 | "Train Epoch: 6 [1920/40479 (6%)]\tLoss: 0.228716\n", 642 | "Train Epoch: 6 [2560/40479 (8%)]\tLoss: 0.191572\n", 643 | "Train Epoch: 6 [3200/40479 (10%)]\tLoss: 0.194431\n", 644 | "Train Epoch: 6 [3840/40479 (12%)]\tLoss: 0.239263\n", 645 | "Train Epoch: 6 [4480/40479 (14%)]\tLoss: 0.207937\n", 646 | "Train Epoch: 6 [5120/40479 (16%)]\tLoss: 0.201746\n", 647 | "Train Epoch: 6 [5760/40479 (18%)]\tLoss: 0.223089\n", 648 | "Train Epoch: 6 [6400/40479 (20%)]\tLoss: 0.215151\n", 649 | "Train Epoch: 6 [7040/40479 (22%)]\tLoss: 0.210253\n", 650 | "Train Epoch: 6 [7680/40479 (24%)]\tLoss: 0.230108\n", 651 | "Train Epoch: 6 [8320/40479 (26%)]\tLoss: 0.209366\n", 652 | "Train Epoch: 6 [8960/40479 (28%)]\tLoss: 0.195442\n", 653 | "Train Epoch: 6 [9600/40479 (30%)]\tLoss: 0.225066\n", 654 | "Train Epoch: 6 [10240/40479 (32%)]\tLoss: 0.206362\n", 655 | "Train Epoch: 6 [10880/40479 (34%)]\tLoss: 0.231513\n", 656 | "Train Epoch: 6 [11520/40479 (36%)]\tLoss: 0.224703\n", 657 | "Train Epoch: 6 [12160/40479 (38%)]\tLoss: 0.223797\n", 658 | "Train Epoch: 6 [12800/40479 (40%)]\tLoss: 0.206543\n", 659 | "Train Epoch: 6 [13440/40479 (42%)]\tLoss: 0.249504\n", 660 | "Train Epoch: 6 [14080/40479 (43%)]\tLoss: 0.215576\n", 661 | "Train Epoch: 6 [14720/40479 (45%)]\tLoss: 0.198119\n", 662 | "Train Epoch: 6 [15360/40479 (47%)]\tLoss: 0.208393\n", 663 | "Train Epoch: 6 [16000/40479 (49%)]\tLoss: 0.214696\n", 664 | "Train Epoch: 6 [16640/40479 (51%)]\tLoss: 0.203025\n", 665 | "Train Epoch: 6 [17280/40479 (53%)]\tLoss: 0.192496\n", 666 | "Train Epoch: 6 [17920/40479 (55%)]\tLoss: 0.239695\n", 667 | "Train Epoch: 6 [18560/40479 (57%)]\tLoss: 0.230435\n", 668 | "Train Epoch: 6 [19200/40479 (59%)]\tLoss: 0.204453\n", 669 | "Train Epoch: 6 [19840/40479 (61%)]\tLoss: 0.234823\n", 670 | "Train Epoch: 6 [20480/40479 (63%)]\tLoss: 0.226867\n", 671 | "Train Epoch: 6 [21120/40479 (65%)]\tLoss: 0.200829\n", 672 | "Train Epoch: 6 [21760/40479 (67%)]\tLoss: 0.227352\n", 673 | "Train Epoch: 6 [22400/40479 (69%)]\tLoss: 0.254058\n", 674 | "Train Epoch: 6 [23040/40479 (71%)]\tLoss: 0.200713\n", 675 | "Train Epoch: 6 [23680/40479 (73%)]\tLoss: 0.219419\n", 676 | "Train Epoch: 6 [24320/40479 (75%)]\tLoss: 0.211055\n", 677 | "Train Epoch: 6 [24960/40479 (77%)]\tLoss: 0.224809\n", 678 | "Train Epoch: 6 [25600/40479 (79%)]\tLoss: 0.229782\n", 679 | "Train Epoch: 6 [26240/40479 (81%)]\tLoss: 0.224255\n", 680 | "Train Epoch: 6 [26880/40479 (83%)]\tLoss: 0.214961\n", 681 | "Train Epoch: 6 [27520/40479 (85%)]\tLoss: 0.235325\n", 682 | "Train Epoch: 6 [28160/40479 (87%)]\tLoss: 0.232684\n", 683 | "Train Epoch: 6 [28800/40479 (89%)]\tLoss: 0.219771\n", 684 | "Train Epoch: 6 [29440/40479 (91%)]\tLoss: 0.206950\n", 685 | "Train Epoch: 6 [30080/40479 (93%)]\tLoss: 0.206973\n", 686 | "Train Epoch: 6 [30720/40479 (95%)]\tLoss: 0.205198\n", 687 | "Train Epoch: 6 [31360/40479 (97%)]\tLoss: 0.202008\n", 688 | "Train Epoch: 6 [32000/40479 (99%)]\tLoss: 0.237647\n", 689 | "Train Epoch: 7 [0/40479 (0%)]\tLoss: 0.238941\n", 690 | "Train Epoch: 7 [640/40479 (2%)]\tLoss: 0.206254\n", 691 | "Train Epoch: 7 [1280/40479 (4%)]\tLoss: 0.202189\n", 692 | "Train Epoch: 7 [1920/40479 (6%)]\tLoss: 0.222286\n", 693 | "Train Epoch: 7 [2560/40479 (8%)]\tLoss: 0.212710\n", 694 | "Train Epoch: 7 [3200/40479 (10%)]\tLoss: 0.261827\n", 695 | "Train Epoch: 7 [3840/40479 (12%)]\tLoss: 0.233636\n", 696 | "Train Epoch: 7 [4480/40479 (14%)]\tLoss: 0.217446\n", 697 | "Train Epoch: 7 [5120/40479 (16%)]\tLoss: 0.197451\n", 698 | "Train Epoch: 7 [5760/40479 (18%)]\tLoss: 0.218938\n", 699 | "Train Epoch: 7 [6400/40479 (20%)]\tLoss: 0.206823\n", 700 | "Train Epoch: 7 [7040/40479 (22%)]\tLoss: 0.215967\n", 701 | "Train Epoch: 7 [7680/40479 (24%)]\tLoss: 0.234034\n", 702 | "Train Epoch: 7 [8320/40479 (26%)]\tLoss: 0.222782\n", 703 | "Train Epoch: 7 [8960/40479 (28%)]\tLoss: 0.221467\n", 704 | "Train Epoch: 7 [9600/40479 (30%)]\tLoss: 0.215337\n", 705 | "Train Epoch: 7 [10240/40479 (32%)]\tLoss: 0.225604\n", 706 | "Train Epoch: 7 [10880/40479 (34%)]\tLoss: 0.243185\n", 707 | "Train Epoch: 7 [11520/40479 (36%)]\tLoss: 0.216148\n", 708 | "Train Epoch: 7 [12160/40479 (38%)]\tLoss: 0.229720\n", 709 | "Train Epoch: 7 [12800/40479 (40%)]\tLoss: 0.205371\n", 710 | "Train Epoch: 7 [13440/40479 (42%)]\tLoss: 0.222294\n", 711 | "Train Epoch: 7 [14080/40479 (43%)]\tLoss: 0.223919\n", 712 | "Train Epoch: 7 [14720/40479 (45%)]\tLoss: 0.215905\n", 713 | "Train Epoch: 7 [15360/40479 (47%)]\tLoss: 0.219890\n", 714 | "Train Epoch: 7 [16000/40479 (49%)]\tLoss: 0.232056\n" 715 | ] 716 | }, 717 | { 718 | "name": "stdout", 719 | "output_type": "stream", 720 | "text": [ 721 | "Train Epoch: 7 [16640/40479 (51%)]\tLoss: 0.211867\n", 722 | "Train Epoch: 7 [17280/40479 (53%)]\tLoss: 0.213061\n", 723 | "Train Epoch: 7 [17920/40479 (55%)]\tLoss: 0.182352\n", 724 | "Train Epoch: 7 [18560/40479 (57%)]\tLoss: 0.204680\n", 725 | "Train Epoch: 7 [19200/40479 (59%)]\tLoss: 0.204601\n", 726 | "Train Epoch: 7 [19840/40479 (61%)]\tLoss: 0.204036\n", 727 | "Train Epoch: 7 [20480/40479 (63%)]\tLoss: 0.214126\n", 728 | "Train Epoch: 7 [21120/40479 (65%)]\tLoss: 0.235875\n", 729 | "Train Epoch: 7 [21760/40479 (67%)]\tLoss: 0.211310\n", 730 | "Train Epoch: 7 [22400/40479 (69%)]\tLoss: 0.219646\n", 731 | "Train Epoch: 7 [23040/40479 (71%)]\tLoss: 0.223003\n", 732 | "Train Epoch: 7 [23680/40479 (73%)]\tLoss: 0.220125\n", 733 | "Train Epoch: 7 [24320/40479 (75%)]\tLoss: 0.221888\n", 734 | "Train Epoch: 7 [24960/40479 (77%)]\tLoss: 0.193348\n", 735 | "Train Epoch: 7 [25600/40479 (79%)]\tLoss: 0.231392\n", 736 | "Train Epoch: 7 [26240/40479 (81%)]\tLoss: 0.215880\n", 737 | "Train Epoch: 7 [26880/40479 (83%)]\tLoss: 0.220085\n", 738 | "Train Epoch: 7 [27520/40479 (85%)]\tLoss: 0.259755\n", 739 | "Train Epoch: 7 [28160/40479 (87%)]\tLoss: 0.229210\n", 740 | "Train Epoch: 7 [28800/40479 (89%)]\tLoss: 0.228965\n", 741 | "Train Epoch: 7 [29440/40479 (91%)]\tLoss: 0.238347\n", 742 | "Train Epoch: 7 [30080/40479 (93%)]\tLoss: 0.237505\n", 743 | "Train Epoch: 7 [30720/40479 (95%)]\tLoss: 0.242703\n", 744 | "Train Epoch: 7 [31360/40479 (97%)]\tLoss: 0.201824\n", 745 | "Train Epoch: 7 [32000/40479 (99%)]\tLoss: 0.222159\n", 746 | "Train Epoch: 8 [0/40479 (0%)]\tLoss: 0.210300\n", 747 | "Train Epoch: 8 [640/40479 (2%)]\tLoss: 0.213195\n", 748 | "Train Epoch: 8 [1280/40479 (4%)]\tLoss: 0.236923\n", 749 | "Train Epoch: 8 [1920/40479 (6%)]\tLoss: 0.214789\n", 750 | "Train Epoch: 8 [2560/40479 (8%)]\tLoss: 0.189022\n", 751 | "Train Epoch: 8 [3200/40479 (10%)]\tLoss: 0.202073\n", 752 | "Train Epoch: 8 [3840/40479 (12%)]\tLoss: 0.223525\n", 753 | "Train Epoch: 8 [4480/40479 (14%)]\tLoss: 0.212316\n", 754 | "Train Epoch: 8 [5120/40479 (16%)]\tLoss: 0.197626\n", 755 | "Train Epoch: 8 [5760/40479 (18%)]\tLoss: 0.229944\n", 756 | "Train Epoch: 8 [6400/40479 (20%)]\tLoss: 0.208881\n", 757 | "Train Epoch: 8 [7040/40479 (22%)]\tLoss: 0.222905\n", 758 | "Train Epoch: 8 [7680/40479 (24%)]\tLoss: 0.215646\n", 759 | "Train Epoch: 8 [8320/40479 (26%)]\tLoss: 0.208961\n", 760 | "Train Epoch: 8 [8960/40479 (28%)]\tLoss: 0.201373\n", 761 | "Train Epoch: 8 [9600/40479 (30%)]\tLoss: 0.187059\n", 762 | "Train Epoch: 8 [10240/40479 (32%)]\tLoss: 0.205768\n", 763 | "Train Epoch: 8 [10880/40479 (34%)]\tLoss: 0.195439\n", 764 | "Train Epoch: 8 [11520/40479 (36%)]\tLoss: 0.229307\n", 765 | "Train Epoch: 8 [12160/40479 (38%)]\tLoss: 0.233949\n", 766 | "Train Epoch: 8 [12800/40479 (40%)]\tLoss: 0.208750\n", 767 | "Train Epoch: 8 [13440/40479 (42%)]\tLoss: 0.210449\n", 768 | "Train Epoch: 8 [14080/40479 (43%)]\tLoss: 0.223516\n", 769 | "Train Epoch: 8 [14720/40479 (45%)]\tLoss: 0.224399\n", 770 | "Train Epoch: 8 [15360/40479 (47%)]\tLoss: 0.222601\n", 771 | "Train Epoch: 8 [16000/40479 (49%)]\tLoss: 0.215521\n", 772 | "Train Epoch: 8 [16640/40479 (51%)]\tLoss: 0.217097\n", 773 | "Train Epoch: 8 [17280/40479 (53%)]\tLoss: 0.210878\n", 774 | "Train Epoch: 8 [17920/40479 (55%)]\tLoss: 0.240644\n", 775 | "Train Epoch: 8 [18560/40479 (57%)]\tLoss: 0.234446\n", 776 | "Train Epoch: 8 [19200/40479 (59%)]\tLoss: 0.220509\n", 777 | "Train Epoch: 8 [19840/40479 (61%)]\tLoss: 0.214906\n", 778 | "Train Epoch: 8 [20480/40479 (63%)]\tLoss: 0.231343\n", 779 | "Train Epoch: 8 [21120/40479 (65%)]\tLoss: 0.221846\n", 780 | "Train Epoch: 8 [21760/40479 (67%)]\tLoss: 0.231584\n", 781 | "Train Epoch: 8 [22400/40479 (69%)]\tLoss: 0.201782\n", 782 | "Train Epoch: 8 [23040/40479 (71%)]\tLoss: 0.234168\n", 783 | "Train Epoch: 8 [23680/40479 (73%)]\tLoss: 0.225944\n", 784 | "Train Epoch: 8 [24320/40479 (75%)]\tLoss: 0.219733\n", 785 | "Train Epoch: 8 [24960/40479 (77%)]\tLoss: 0.200213\n", 786 | "Train Epoch: 8 [25600/40479 (79%)]\tLoss: 0.222768\n", 787 | "Train Epoch: 8 [26240/40479 (81%)]\tLoss: 0.224571\n", 788 | "Train Epoch: 8 [26880/40479 (83%)]\tLoss: 0.227366\n", 789 | "Train Epoch: 8 [27520/40479 (85%)]\tLoss: 0.192402\n", 790 | "Train Epoch: 8 [28160/40479 (87%)]\tLoss: 0.206933\n", 791 | "Train Epoch: 8 [28800/40479 (89%)]\tLoss: 0.222230\n", 792 | "Train Epoch: 8 [29440/40479 (91%)]\tLoss: 0.241823\n", 793 | "Train Epoch: 8 [30080/40479 (93%)]\tLoss: 0.219849\n", 794 | "Train Epoch: 8 [30720/40479 (95%)]\tLoss: 0.223774\n", 795 | "Train Epoch: 8 [31360/40479 (97%)]\tLoss: 0.221740\n", 796 | "Train Epoch: 8 [32000/40479 (99%)]\tLoss: 0.226302\n", 797 | "Train Epoch: 9 [0/40479 (0%)]\tLoss: 0.246171\n", 798 | "Train Epoch: 9 [640/40479 (2%)]\tLoss: 0.194035\n", 799 | "Train Epoch: 9 [1280/40479 (4%)]\tLoss: 0.226316\n", 800 | "Train Epoch: 9 [1920/40479 (6%)]\tLoss: 0.219594\n", 801 | "Train Epoch: 9 [2560/40479 (8%)]\tLoss: 0.192174\n", 802 | "Train Epoch: 9 [3200/40479 (10%)]\tLoss: 0.208604\n", 803 | "Train Epoch: 9 [3840/40479 (12%)]\tLoss: 0.238914\n", 804 | "Train Epoch: 9 [4480/40479 (14%)]\tLoss: 0.227050\n", 805 | "Train Epoch: 9 [5120/40479 (16%)]\tLoss: 0.218689\n", 806 | "Train Epoch: 9 [5760/40479 (18%)]\tLoss: 0.241844\n", 807 | "Train Epoch: 9 [6400/40479 (20%)]\tLoss: 0.230105\n", 808 | "Train Epoch: 9 [7040/40479 (22%)]\tLoss: 0.221337\n", 809 | "Train Epoch: 9 [7680/40479 (24%)]\tLoss: 0.226021\n", 810 | "Train Epoch: 9 [8320/40479 (26%)]\tLoss: 0.206170\n", 811 | "Train Epoch: 9 [8960/40479 (28%)]\tLoss: 0.189948\n", 812 | "Train Epoch: 9 [9600/40479 (30%)]\tLoss: 0.218153\n", 813 | "Train Epoch: 9 [10240/40479 (32%)]\tLoss: 0.221108\n", 814 | "Train Epoch: 9 [10880/40479 (34%)]\tLoss: 0.200809\n", 815 | "Train Epoch: 9 [11520/40479 (36%)]\tLoss: 0.224982\n", 816 | "Train Epoch: 9 [12160/40479 (38%)]\tLoss: 0.232980\n", 817 | "Train Epoch: 9 [12800/40479 (40%)]\tLoss: 0.199956\n", 818 | "Train Epoch: 9 [13440/40479 (42%)]\tLoss: 0.225886\n", 819 | "Train Epoch: 9 [14080/40479 (43%)]\tLoss: 0.234917\n", 820 | "Train Epoch: 9 [14720/40479 (45%)]\tLoss: 0.215907\n", 821 | "Train Epoch: 9 [15360/40479 (47%)]\tLoss: 0.201263\n", 822 | "Train Epoch: 9 [16000/40479 (49%)]\tLoss: 0.200151\n", 823 | "Train Epoch: 9 [16640/40479 (51%)]\tLoss: 0.200983\n", 824 | "Train Epoch: 9 [17280/40479 (53%)]\tLoss: 0.199013\n", 825 | "Train Epoch: 9 [17920/40479 (55%)]\tLoss: 0.238366\n", 826 | "Train Epoch: 9 [18560/40479 (57%)]\tLoss: 0.212182\n", 827 | "Train Epoch: 9 [19200/40479 (59%)]\tLoss: 0.216866\n", 828 | "Train Epoch: 9 [19840/40479 (61%)]\tLoss: 0.223518\n", 829 | "Train Epoch: 9 [20480/40479 (63%)]\tLoss: 0.206608\n", 830 | "Train Epoch: 9 [21120/40479 (65%)]\tLoss: 0.221796\n", 831 | "Train Epoch: 9 [21760/40479 (67%)]\tLoss: 0.243650\n", 832 | "Train Epoch: 9 [22400/40479 (69%)]\tLoss: 0.215382\n", 833 | "Train Epoch: 9 [23040/40479 (71%)]\tLoss: 0.212058\n", 834 | "Train Epoch: 9 [23680/40479 (73%)]\tLoss: 0.225021\n", 835 | "Train Epoch: 9 [24320/40479 (75%)]\tLoss: 0.227688\n", 836 | "Train Epoch: 9 [24960/40479 (77%)]\tLoss: 0.235619\n", 837 | "Train Epoch: 9 [25600/40479 (79%)]\tLoss: 0.238336\n", 838 | "Train Epoch: 9 [26240/40479 (81%)]\tLoss: 0.207296\n", 839 | "Train Epoch: 9 [26880/40479 (83%)]\tLoss: 0.208340\n", 840 | "Train Epoch: 9 [27520/40479 (85%)]\tLoss: 0.220505\n", 841 | "Train Epoch: 9 [28160/40479 (87%)]\tLoss: 0.222068\n", 842 | "Train Epoch: 9 [28800/40479 (89%)]\tLoss: 0.221910\n", 843 | "Train Epoch: 9 [29440/40479 (91%)]\tLoss: 0.185835\n", 844 | "Train Epoch: 9 [30080/40479 (93%)]\tLoss: 0.206984\n", 845 | "Train Epoch: 9 [30720/40479 (95%)]\tLoss: 0.236037\n", 846 | "Train Epoch: 9 [31360/40479 (97%)]\tLoss: 0.230958\n", 847 | "Train Epoch: 9 [32000/40479 (99%)]\tLoss: 0.197022\n" 848 | ] 849 | } 850 | ], 851 | "source": [ 852 | "for epoch in range(1, 10):\n", 853 | " train(epoch, train_loader, encoderCNN, decoderRNN, criterion, optimizer)" 854 | ] 855 | }, 856 | { 857 | "cell_type": "code", 858 | "execution_count": null, 859 | "metadata": { 860 | "collapsed": true 861 | }, 862 | "outputs": [], 863 | "source": [] 864 | }, 865 | { 866 | "cell_type": "code", 867 | "execution_count": null, 868 | "metadata": { 869 | "collapsed": true 870 | }, 871 | "outputs": [], 872 | "source": [] 873 | }, 874 | { 875 | "cell_type": "code", 876 | "execution_count": null, 877 | "metadata": { 878 | "collapsed": true 879 | }, 880 | "outputs": [], 881 | "source": [] 882 | }, 883 | { 884 | "cell_type": "code", 885 | "execution_count": null, 886 | "metadata": { 887 | "collapsed": true 888 | }, 889 | "outputs": [], 890 | "source": [] 891 | }, 892 | { 893 | "cell_type": "code", 894 | "execution_count": null, 895 | "metadata": { 896 | "collapsed": true 897 | }, 898 | "outputs": [], 899 | "source": [] 900 | } 901 | ], 902 | "metadata": { 903 | "kernelspec": { 904 | "display_name": "Python 3", 905 | "language": "python", 906 | "name": "python3" 907 | }, 908 | "language_info": { 909 | "codemirror_mode": { 910 | "name": "ipython", 911 | "version": 3 912 | }, 913 | "file_extension": ".py", 914 | "mimetype": "text/x-python", 915 | "name": "python", 916 | "nbconvert_exporter": "python", 917 | "pygments_lexer": "ipython3", 918 | "version": "3.6.0" 919 | } 920 | }, 921 | "nbformat": 4, 922 | "nbformat_minor": 2 923 | } 924 | --------------------------------------------------------------------------------