├── media
    ├── agg1.jpg
    ├── chips.jpg
    ├── haze1.jpg
    ├── pc1.jpg
    ├── river.jpg
    ├── road.jpg
    ├── chipdesc.jpg
    ├── cloudy_1.jpg
    └── habitation1.jpg
├── .gitignore
├── src
    ├── p_logger.py
    ├── p_model_selection.py
    ├── k_model_selection.py
    ├── p_validation.py
    ├── p_training.py
    ├── p2_loss.py
    ├── p_prediction.py
    ├── p_dataload.py
    ├── p_sampler.py
    ├── p2_validation.py
    ├── p2_prediction.py
    ├── p2_dataload.py
    ├── p_metrics.py
    ├── p2_metrics.py
    ├── p3_neuroRNN.py
    ├── p_data_augmentation.py
    ├── p_neuro.py
    ├── _deprecated.py
    └── k_dataloader.py
├── compute-mean-std.py
├── adjust_prediction.py
├── baseline
    ├── unfinished_attempts
    │   ├── 000-Mxnet-ResNet-baseline-TODO.py
    │   ├── 002-Keras-Inception-Transfer.py
    │   └── 000-Mxnet-Resnet-extraction-XGBoost-MultiLabel-TODO.ipynb
    ├── 001-keras-baseline-0.80752.py
    ├── pytorch_scatter_gather_onehotencoding.ipynb
    ├── 003-pytorch-kernel-baseline.ipynb
    └── RNN_experiment_1.ipynb
├── Ideas.txt
├── README.md
├── pytorch_predict_only.py
├── main_keras.py
├── main_pytorch-baseline.py
├── main_pytorch.py
└── Dual_Feed_Image_Label.ipynb


/media/agg1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/agg1.jpg


--------------------------------------------------------------------------------
/media/chips.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/chips.jpg


--------------------------------------------------------------------------------
/media/haze1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/haze1.jpg


--------------------------------------------------------------------------------
/media/pc1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/pc1.jpg


--------------------------------------------------------------------------------
/media/river.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/river.jpg


--------------------------------------------------------------------------------
/media/road.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/road.jpg


--------------------------------------------------------------------------------
/media/chipdesc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/chipdesc.jpg


--------------------------------------------------------------------------------
/media/cloudy_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/cloudy_1.jpg


--------------------------------------------------------------------------------
/media/habitation1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mratsim/Amazon-Forest-Computer-Vision/HEAD/media/habitation1.jpg


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | data/
 2 | out/
 3 | core
 4 | EDA/
 5 | kernels/
 6 | pretrained-models/
 7 | tmp/
 8 | .ipynb_checkpoints/
 9 | __pycache__/
10 | snapshots/
11 | zoo/
12 | 


--------------------------------------------------------------------------------
/src/p_logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | def setup_logs(save_dir, run_name):
 5 |     # initialize logger
 6 |     logger = logging.getLogger("Planet-Amazon")
 7 |     logger.setLevel(logging.INFO)
 8 |  
 9 |     # create the logging file handler
10 |     log_file = os.path.join(save_dir, run_name + ".log")
11 |     fh = logging.FileHandler(log_file)
12 |     
13 |     # create the logging console handler
14 |     ch = logging.StreamHandler()
15 |     
16 |     # format
17 |     formatter = logging.Formatter("%(asctime)s - %(message)s")
18 |     fh.setFormatter(formatter)
19 |     
20 |     # add handlers to logger object
21 |     logger.addHandler(fh)
22 |     logger.addHandler(ch)
23 |     
24 |     return logger


--------------------------------------------------------------------------------
/src/p_model_selection.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from math import floor
 3 | 
 4 | def train_valid_split(dataset, test_size = 0.25, shuffle = False, random_seed = 0):
 5 |     """ Return a list of splitted indices from a DataSet.
 6 |     Indices can be used with DataLoader to build a train and validation set.
 7 |     
 8 |     Arguments:
 9 |         A Dataset
10 |         A test_size, as a float between 0 and 1 (percentage split) or as an int (fixed number split)
11 |         Shuffling True or False
12 |         Random seed
13 |     """
14 |     length = dataset.__len__()
15 |     indices = list(range(1,length))
16 |     
17 |     if shuffle == True:
18 |         random.seed(random_seed)
19 |         random.shuffle(indices)
20 |     
21 |     if type(test_size) is float:
22 |         split = floor(test_size * length)
23 |     elif type(test_size) is int:
24 |         split = test_size
25 |     else:
26 |         raise ValueError('%s should be an int or a float' % str)
27 |     return indices[split:], indices[:split]


--------------------------------------------------------------------------------
/src/k_model_selection.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from math import floor
 3 | 
 4 | def train_valid_split(dataframe, test_size = 0.25, shuffle = False, random_seed = 0):
 5 |     """ Return a list of splitted indices from a DataSet.
 6 |     Indices can be used with DataLoader to build a train and validation set.
 7 |     
 8 |     Arguments:
 9 |         A Dataframe
10 |         A test_size, as a float between 0 and 1 (percentage split) or as an int (fixed number split)
11 |         Shuffling True or False
12 |         Random seed
13 |     """
14 |     length = len(dataframe.index)
15 |     indices = list(range(1,length))
16 |     
17 |     if shuffle == True:
18 |         random.seed(random_seed)
19 |         random.shuffle(indices)
20 |     
21 |     if type(test_size) is float:
22 |         split = floor(test_size * length)
23 |     elif type(test_size) is int:
24 |         split = test_size
25 |     else:
26 |         raise ValueError('%s should be an int or a float' % str)
27 |     return indices[split:], indices[:split]


--------------------------------------------------------------------------------
/compute-mean-std.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import pandas as pd
 4 | from tqdm import tqdm
 5 | 
 6 | RESOLUTION = 96 # Ideally we shouldn't be resizing but I'm lacking memory
 7 | 
 8 | if __name__ == "__main__":
 9 |     data = []
10 |     df_train = pd.read_csv('./data/train.csv')
11 | 
12 |     for file in tqdm(df_train['image_name'], miniters=256):
13 |         img = cv2.imread('./data/train-jpg/{}.jpg'.format(file))
14 |         data.append(cv2.resize(img,(RESOLUTION,RESOLUTION)))
15 | 
16 |     data = np.array(data, np.float32) / 255 # Must use float32 at least otherwise we get over float16 limits
17 |     print("Shape: ", data.shape)
18 | 
19 |     means = []
20 |     stdevs = []
21 |     for i in range(3):
22 |         pixels = data[:,:,:,i].ravel()
23 |         means.append(np.mean(pixels))
24 |         stdevs.append(np.std(pixels))
25 | 
26 |     print("means: {}".format(means))
27 |     print("stdevs: {}".format(stdevs))
28 |     print('transforms.Normalize(mean = {}, std = {})'.format(means, stdevs))


--------------------------------------------------------------------------------
/adjust_prediction.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import pandas as pd
 4 | import torch
 5 | from src.p_dataload import KaggleAmazonDataset
 6 | 
 7 | 
 8 | ## Load MultiLabelBinarizer config
 9 | X_train = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg')
10 | mlb = X_train.getLabelEncoder()
11 | 
12 | ## Load sample submission:
13 | df_test = pd.read_csv('./data/sample_submission_v2.csv')
14 | 
15 | ## Load raw prediction (proba):
16 | subm_proba = np.loadtxt('./out/2017-05-12_1223-resnet50-L2reg-new-data-raw-pred-0.922374050536.csv',
17 |                        delimiter=';')
18 | 
19 | ## Load threshold:
20 | model_path = './snapshots/2017-05-12_1223-resnet50-L2reg-new-data-model_best.pth'
21 | checkpoint = torch.load(model_path)
22 | threshold = checkpoint['threshold']
23 | 
24 | ## Force single weather: TODO check if cloudy is alone
25 | weather = subm_proba[:, 0:4]
26 | indices = np.argmax(weather, axis=1)
27 | new_weather = np.eye(4)[indices]
28 | subm_proba[:,0:4] = new_weather
29 | 
30 | predictions = subm_proba > threshold
31 | 
32 | result = pd.DataFrame({
33 |     'image_name': df_test['image_name'],
34 |     'tags': mlb.inverse_transform(predictions)
35 |     })
36 | result['tags'] = result['tags'].apply(lambda tags: " ".join(tags))
37 |     
38 | result_path = './out/2017-05-12_1223-resnet50-L2reg-new-data-adjusted-pred-0.922374050536.csv'
39 | result.to_csv(result_path, index=False)


--------------------------------------------------------------------------------
/src/p_validation.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Variable
 2 | import numpy as np
 3 | import logging
 4 | import torch.nn.functional as F
 5 | from tqdm import tqdm
 6 | 
 7 | from src.p_metrics import best_f2_score
 8 | 
 9 | ## Get the same logger from main"
10 | logger = logging.getLogger("Planet-Amazon")
11 | 
12 | ##################################################
13 | #### Validate function
14 | def validate(epoch,valid_loader,model,loss_func,mlb):
15 |     ## Volatile variables do not save intermediate results and build graphs for backprop, achieving massive memory savings.
16 |     
17 |     model.eval()
18 |     total_loss = 0
19 |     predictions = []
20 |     true_labels = []
21 |     
22 |     logger.info("Starting Validation")
23 |     for batch_idx, (data, target) in enumerate(tqdm(valid_loader)):
24 |         true_labels.append(target.cpu().numpy())
25 |         
26 |         data, target = data.cuda(async=True), target.cuda(async=True)
27 |         data, target = Variable(data, volatile=True), Variable(target, volatile=True)
28 |     
29 |         pred = model(data)
30 |         predictions.append(F.sigmoid(pred).data.cpu().numpy())
31 |         
32 |         total_loss += loss_func(pred,target).data[0]
33 |     
34 |     avg_loss = total_loss / len(valid_loader)
35 |     
36 |     predictions = np.vstack(predictions)
37 |     true_labels = np.vstack(true_labels)
38 |    
39 |     score, threshold = best_f2_score(true_labels, predictions)
40 |     logger.info("Corresponding tags\n{}".format(mlb.classes_))
41 |     
42 |     logger.info("===> Validation - Avg. loss: {:.4f}\tF2 Score: {:.4f}".format(avg_loss,score))
43 |     return score, avg_loss, threshold


--------------------------------------------------------------------------------
/src/p_training.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Variable
 2 | import torch
 3 | import os
 4 | import logging
 5 | 
 6 | ## Get the same logger from main"
 7 | logger = logging.getLogger("Planet-Amazon")
 8 | 
 9 | 
10 | def lr_scheduler(optimizer, epoch, init_lr=0.01, lr_decay_epoch=7):
11 |     """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs."""
12 |     lr = init_lr * (0.1**(epoch // lr_decay_epoch))
13 | 
14 |     if epoch % lr_decay_epoch == 0:
15 |         logger.info('LR is set to {}'.format(lr))
16 | 
17 |     for param_group in optimizer.param_groups:
18 |         param_group['lr'] = lr
19 | 
20 |     return optimizer
21 | 
22 | def train(epoch,train_loader,model,loss_func, optimizer):
23 |     model.train()
24 |     optimizer = lr_scheduler(optimizer, epoch)
25 |     
26 |     
27 |     for batch_idx, (data, target) in enumerate(train_loader):
28 |         data, target = data.cuda(async=True), target.cuda(async=True)
29 |         data, target = Variable(data), Variable(target, requires_grad=False)
30 |         optimizer.zero_grad()
31 |         output = model(data)
32 |         loss = loss_func(output,target)
33 |         loss.backward()
34 |         optimizer.step()
35 |         if batch_idx % 100 == 0:
36 |             logger.info('Train Epoch: {:03d} [{:05d}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
37 |                 epoch, batch_idx * len(data), len(train_loader) * len(data),
38 |                 100. * batch_idx / len(train_loader), loss.data[0]))
39 | 
40 | def snapshot(dir_path, run_name, is_best, state):
41 |     snapshot_file = os.path.join(dir_path,
42 |                     run_name + '-model_best.pth')
43 |     if is_best:
44 |         torch.save(state, snapshot_file)
45 |         logger.info("Snapshot saved to {}".format(snapshot_file))


--------------------------------------------------------------------------------
/baseline/unfinished_attempts/000-Mxnet-ResNet-baseline-TODO.py:
--------------------------------------------------------------------------------
 1 | import xgboost as xgb
 2 | import cv2
 3 | import mxnet as mx
 4 | import os
 5 | import numpy as np
 6 | from timeit import default_timer as timer
 7 | from sklearn.model_selection import train_test_split
 8 | 
 9 | SRC_IMAGES = '../data/train-jpg/'
10 | SRCDIR = os.listdir(SRC_IMAGES)
11 | TMPDIR = './tmp/'
12 | 
13 | def get_extractor():
14 |     model = mx.model.FeedForward.load('../pretrained-models/resnet-50', 0, ctx=mx.gpu(), numpy_batch_size=1)
15 |     fea_symbol = model.symbol.get_internals()["flatten0_output"]
16 |     feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=fea_symbol, numpy_batch_size=64,
17 |                                              arg_params=model.arg_params, aux_params=model.aux_params,
18 |                                              allow_extra_params=True)
19 | 
20 |     return feature_extractor
21 | 
22 | 
23 | def prepare_image_batch(image):
24 |     img = SRC_IMAGES + image
25 |     img = cv2.imread(img)
26 |     img = 255.0 / np.amax(img) * img
27 |     # img = cv2.equalizeHist(img.astype(np.uint8))
28 |     img = cv2.resize(img.astype(np.int16), (224, 224))
29 |     img = img.reshape(1,3,224,224)
30 | 
31 |     return img
32 | 
33 | def calc_features():
34 |     net = get_extractor()
35 |     n=1
36 |     for image in SRCDIR:
37 |         print("Doing image %s/%s: %s" % (n, len(SRCDIR), image))
38 |         img = prepare_image_batch(image)
39 |         print(img.shape)
40 |         feats = net.predict(img)
41 |         print("Prediction features have shape:")
42 |         print(feats.shape)
43 |         np.save(TMPDIR+image, feats)
44 |         
45 |         n+=1
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     start_time = timer()
50 |     calc_features()
51 |     # make_submit()
52 |     end_time = timer()
53 |     print("Elapsed time: %s" % (end_time - start_time))


--------------------------------------------------------------------------------
/src/p2_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn.functional as F
 2 | from torch.nn.modules.loss import _WeightedLoss
 3 | import torch
 4 | from torch.autograd import Variable
 5 | 
 6 | # If needed to code the categorical cross entropy from scratch: https://github.com/twitter/torch-autograd/blob/master/src/loss/init.lua
 7 | 
 8 | class ConvolutedLoss(_WeightedLoss):
 9 |     """ Treat the weather as MultiClassification (only one label possible)
10 |         Treat the rest as Multilabel
11 |         ==> Multi-Task learning
12 |     """
13 |     def __init__(self, weight=None, size_average=True):
14 |         super(ConvolutedLoss, self).__init__(size_average)
15 |         if weight is None:
16 |             self.register_buffer('weight_weather', None)
17 |             self.register_buffer('weight_other', None)
18 |         else:
19 |             self.register_buffer('weight_weather', weight[:4]) # Weather conditions are the first 4
20 |             self.register_buffer('weight_other', weight[4:])
21 |     
22 |     def forward(self, input, target):
23 |         # Cross-Entropy wants categorical not one-hot
24 |         # Reverse one hot
25 |         weather_targets = Variable(torch.arange(0,4).expand(target.size(0),4).masked_select(target[:,:4].data.byte().cpu()).long().cuda(), requires_grad = False)
26 |         
27 |         loss_weather = F.cross_entropy(input[:,:4],
28 |                                        weather_targets,
29 |                                        self.weight_weather,
30 |                                        self.size_average)
31 |         loss_other = F.binary_cross_entropy(F.sigmoid(input[:,4:]),
32 |                                             target[:,4:],
33 |                                             self.weight_other,
34 |                                             self.size_average)
35 |         
36 |         return (loss_weather * 4/17) + (loss_other * 13/17)
37 |         


--------------------------------------------------------------------------------
/src/p_prediction.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Variable
 2 | import numpy as np
 3 | import logging
 4 | import os
 5 | import pandas as pd
 6 | import torch.nn.functional as F
 7 | from tqdm import tqdm
 8 | 
 9 | ## Get the same logger from main"
10 | logger = logging.getLogger("Planet-Amazon")
11 | 
12 | ##################################################
13 | #### Prediction function
14 | def predict(test_loader, model):    
15 |     model.eval()
16 |     predictions = []
17 |     
18 |     logger.info("Starting Prediction")
19 |     for batch_idx, (data, _) in enumerate(tqdm(test_loader)):
20 |         data = data.cuda(async=True)
21 |         data = Variable(data, volatile=True)
22 |     
23 |         pred = F.sigmoid(model(data))
24 |         predictions.append(pred.data.cpu().numpy())
25 |     
26 |     predictions = np.vstack(predictions)
27 |     
28 |     logger.info("===> Raw predictions done. Here is a snippet")
29 |     logger.info(predictions)
30 |     return predictions
31 | 
32 | def output(predictions, threshold, X_test, mlb, dir_path, run_name, accuracy):
33 |     
34 |     raw_pred_path = os.path.join(dir_path, run_name + '-raw-pred-'+str(accuracy)+'.csv')
35 |     np.savetxt(raw_pred_path,predictions,delimiter=";")
36 |     logger.info("Raw predictions saved to {}".format(raw_pred_path))
37 |     
38 |     predictions = predictions > threshold
39 |     
40 |     result = pd.DataFrame({
41 |         'image_name': X_test.X,
42 |         'tags': mlb.inverse_transform(predictions)
43 |     })
44 |     result['tags'] = result['tags'].apply(lambda tags: " ".join(tags))
45 |     
46 |     logger.info("===> Final predictions done. Here is a snippet")
47 |     logger.info(result)
48 |     
49 |     result_path = os.path.join(dir_path, run_name + '-final-pred-'+str(accuracy)+'.csv')
50 |     result.to_csv(result_path, index=False)
51 |     logger.info("Final predictions saved to {}".format(result_path))


--------------------------------------------------------------------------------
/src/p_dataload.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data.dataset import Dataset
 2 | from torchvision import transforms
 3 | import pandas as pd
 4 | import os
 5 | from PIL import Image # Replace by accimage when ready
 6 | from PIL.Image import FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM, ROTATE_90, ROTATE_180, ROTATE_270
 7 | from PIL.ImageEnhance import Color, Contrast, Brightness, Sharpness
 8 | from sklearn.preprocessing import MultiLabelBinarizer
 9 | from torch import np, from_numpy # Numpy like wrapper
10 | 
11 | class KaggleAmazonDataset(Dataset):
12 |     """Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition.
13 | 
14 |     Arguments:
15 |         A CSV file path
16 |         Path to image folder
17 |         Extension of images
18 |     """
19 | 
20 |     def __init__(self, csv_path, img_path, img_ext, transform=None):
21 |     
22 |         self.df = pd.read_csv(csv_path)
23 |         assert self.df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \
24 | "Some images referenced in the CSV file were not found"
25 |         
26 |         self.mlb = MultiLabelBinarizer()
27 |         self.img_path = img_path
28 |         self.img_ext = img_ext
29 |         self.transform = transform
30 | 
31 |         self.X = self.df['image_name']
32 |         self.y = self.mlb.fit_transform(self.df['tags'].str.split()).astype(np.float32)
33 | 
34 |     def X(self):
35 |         return self.X
36 |         
37 |     def __getitem__(self, index):
38 |         img = Image.open(self.img_path + self.X[index] + self.img_ext)
39 |         img = img.convert('RGB')
40 |         if self.transform is not None:
41 |             img = self.transform(img)
42 |         
43 |         label = from_numpy(self.y[index])
44 |         return img, label
45 | 
46 |     def __len__(self):
47 |         return len(self.df.index)
48 |     
49 |     def getLabelEncoder(self):
50 |         return self.mlb
51 |     
52 |     def getDF(self):
53 |         return self.df


--------------------------------------------------------------------------------
/src/p_sampler.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data.sampler import Sampler
 2 | import numpy as np
 3 | import pandas as pd
 4 | 
 5 | class SubsetSampler(Sampler):
 6 |      """Samples elements from a given list of indices.
 7 |  
 8 |      Arguments:
 9 |          indices (list): a list of indices
10 |      """
11 |  
12 |      def __init__(self, indices):
13 |         self.num_samples = len(indices)
14 |         self.indices = indices
15 |  
16 |      def __iter__(self):
17 |         return iter(self.indices)
18 |  
19 |      def __len__(self):
20 |         return self.num_samples
21 |     
22 | def balance_weights(df_source, col_target, mlb):
23 |     """ Compute balanced weights from a Multilabel dataframe
24 |     
25 |     Arguments:
26 |         Dataframe
27 |         The name of the column with the target labels
28 |         A MultiLabelBinarizer to one-hot-encode/decode the label column
29 |         
30 |     Returns:
31 |         A Pandas Series with balanced weights
32 |     """
33 |     
34 |     # Create a working copy of the dataframe
35 |     df = df_source.copy(deep=True)
36 |     
37 |     df_labels = mlb.transform(df[col_target].str.split(" "))
38 |     
39 |     ## Next 4 lines won't be needed when axis argument is added to np.unique in NumPy 1.13
40 |     ncols = df_labels.shape[1]
41 |     dtype = df_labels.dtype.descr * ncols
42 |     struct = df_labels.view(dtype)
43 |     uniq_labels, uniq_counts = np.unique(struct, return_counts=True)
44 |     
45 |     uniq_labels = uniq_labels.view(df_labels.dtype).reshape(-1, ncols)
46 |     
47 |     ## We convert the One-Hot-Encoded labels as string to store them in a dataframe and join on them
48 |     df_stats = pd.DataFrame({
49 |         'target':np.apply_along_axis(np.array_str, 1, uniq_labels),
50 |         'freq':uniq_counts
51 |     })
52 |     
53 |     df['target'] = np.apply_along_axis(np.array_str, 1, df_labels)
54 |     
55 |     ## Join the dataframe to add frequency
56 |     df = df.merge(df_stats,how='left',on='target')
57 |     
58 |     ## Compute balanced weights
59 |     weights = 1 / df['freq'].astype(np.float)
60 |     
61 |     return weights


--------------------------------------------------------------------------------
/src/p2_validation.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Variable
 2 | import numpy as np
 3 | import logging
 4 | import torch.nn.functional as F
 5 | from tqdm import tqdm
 6 | import torch
 7 | 
 8 | from src.p_metrics import best_f2_score
 9 | 
10 | ## Get the same logger from main"
11 | logger = logging.getLogger("Planet-Amazon")
12 | 
13 | ##################################################
14 | #### Validate function
15 | def validate(epoch,valid_loader,model,loss_func,mlb):
16 |     ## Volatile variables do not save intermediate results and build graphs for backprop, achieving massive memory savings.
17 |     
18 |     model.eval()
19 |     total_loss = 0
20 |     predictions = []
21 |     true_labels = []
22 |     
23 |     logger.info("Starting Validation")
24 |     for batch_idx, (data, target) in enumerate(tqdm(valid_loader)):
25 |         true_labels.append(target.cpu().numpy())
26 |         
27 |         data, target = data.cuda(async=True), target.cuda(async=True)
28 |         data, target = Variable(data, volatile=True), Variable(target, volatile=True)
29 |     
30 |         raw_pred = model(data)
31 |         # Even though we use softmax for training, it doesn't give good result here
32 |         # However activated neuro for weather will giv emuch larger response for much easier thresholding
33 |         # pred = torch.cat(
34 |         #                    (
35 |         #                        F.softmax(raw_pred[:4]),
36 |         #                        F.sigmoid(raw_pred[4:])
37 |         #                    ), 0
38 |         #       )
39 |         pred = F.sigmoid(raw_pred)
40 |         predictions.append(pred.data.cpu().numpy())
41 |         
42 |         total_loss += loss_func(raw_pred,target).data[0]
43 |     
44 |     avg_loss = total_loss / len(valid_loader)
45 |     
46 |     predictions = np.vstack(predictions)
47 |     true_labels = np.vstack(true_labels)
48 |    
49 |     score, threshold = best_f2_score(true_labels, predictions)
50 |     logger.info("Corresponding tags\n{}".format(mlb.classes_))
51 |     
52 |     logger.info("===> Validation - Avg. loss: {:.4f}\tF2 Score: {:.4f}".format(avg_loss,score))
53 |     return score, avg_loss, threshold


--------------------------------------------------------------------------------
/src/p2_prediction.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Variable
 2 | import numpy as np
 3 | import logging
 4 | import os
 5 | import pandas as pd
 6 | import torch.nn.functional as F
 7 | from tqdm import tqdm
 8 | import torch
 9 | 
10 | ## Get the same logger from main"
11 | logger = logging.getLogger("Planet-Amazon")
12 | 
13 | ##################################################
14 | #### Prediction function
15 | def predict(test_loader, model):    
16 |     model.eval()
17 |     predictions = []
18 |     
19 |     logger.info("Starting Prediction")
20 |     for batch_idx, (data, _) in enumerate(tqdm(test_loader)):
21 |         data = data.cuda(async=True)
22 |         data = Variable(data, volatile=True)
23 |     
24 |         raw_pred = model(data)
25 |         # Even though we use softmax for training, it doesn't give good result here
26 |         # However activated neuro for weather will giv emuch larger response for much easier thresholding
27 |         # pred = torch.cat(
28 |         #                    (
29 |         #                        F.softmax(raw_pred[:4]),
30 |         #                        F.sigmoid(raw_pred[4:])
31 |         #                    ), 0
32 |         #       )
33 |         pred = F.sigmoid(raw_pred)
34 |         predictions.append(pred.data.cpu().numpy())
35 |     
36 |     predictions = np.vstack(predictions)
37 |     
38 |     logger.info("===> Raw predictions done. Here is a snippet")
39 |     print(predictions)
40 |     return predictions
41 | 
42 | def output(predictions, threshold, X_test, mlb, dir_path, run_name, accuracy):
43 |     
44 |     raw_pred_path = os.path.join(dir_path, run_name + '-raw-pred-'+str(accuracy)+'.csv')
45 |     np.savetxt(raw_pred_path,predictions,delimiter=";")
46 |     logger.info("Raw predictions saved to {}".format(raw_pred_path))
47 |     
48 |     predictions = predictions > threshold
49 |     
50 |     result = pd.DataFrame({
51 |         'image_name': X_test.X,
52 |         'tags': mlb.inverse_transform(predictions)
53 |     })
54 |     result['tags'] = result['tags'].apply(lambda tags: " ".join(tags))
55 |     
56 |     logger.info("===> Final predictions done. Here is a snippet")
57 |     print(result)
58 |     
59 |     result_path = os.path.join(dir_path, run_name + '-final-pred-'+str(accuracy)+'.csv')
60 |     result.to_csv(result_path, index=False)
61 |     logger.info("Final predictions saved to {}".format(result_path))


--------------------------------------------------------------------------------
/src/p2_dataload.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data.dataset import Dataset
 2 | from torchvision import transforms
 3 | import pandas as pd
 4 | import os
 5 | from PIL import Image # Replace by accimage when ready
 6 | from PIL.Image import FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM, ROTATE_90, ROTATE_180, ROTATE_270
 7 | from PIL.ImageEnhance import Color, Contrast, Brightness, Sharpness
 8 | from sklearn.preprocessing import MultiLabelBinarizer
 9 | from torch import np, from_numpy # Numpy like wrapper
10 | 
11 | class KaggleAmazonDataset(Dataset):
12 |     """Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition.
13 | 
14 |     Arguments:
15 |         A CSV file path
16 |         Path to image folder
17 |         Extension of images
18 |     """
19 | 
20 |     def __init__(self, csv_path, img_path, img_ext, transform=None):
21 |     
22 |         self.df = pd.read_csv(csv_path)
23 |         assert self.df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \
24 | "Some images referenced in the CSV file were not found"
25 |         
26 |         # Ordering weather first
27 |         self.mlb = MultiLabelBinarizer(
28 |                     classes = [
29 |                        'clear', 'cloudy', 'haze','partly_cloudy',
30 |                        'agriculture','artisinal_mine','bare_ground','blooming',
31 |                        'blow_down','conventional_mine','cultivation','habitation',
32 |                        'primary','road','selective_logging','slash_burn','water'
33 |                       ]
34 |         )
35 |         self.img_path = img_path
36 |         self.img_ext = img_ext
37 |         self.transform = transform
38 | 
39 |         self.X = self.df['image_name']
40 |         self.y = self.mlb.fit_transform(self.df['tags'].str.split()).astype(np.float32)
41 | 
42 |     def X(self):
43 |         return self.X
44 |         
45 |     def __getitem__(self, index):
46 |         img = Image.open(self.img_path + self.X[index] + self.img_ext)
47 |         img = img.convert('RGB')
48 |         if self.transform is not None:
49 |             img = self.transform(img)
50 |         
51 |         label = from_numpy(self.y[index])
52 |         return img, label
53 | 
54 |     def __len__(self):
55 |         return len(self.df.index)
56 |     
57 |     def getLabelEncoder(self):
58 |         return self.mlb
59 |     
60 |     def getDF(self):
61 |         return self.df


--------------------------------------------------------------------------------
/src/p_metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import logging
 3 | from sklearn.metrics import fbeta_score
 4 | from scipy.optimize import fmin_l_bfgs_b, basinhopping
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from torch.autograd import Variable
 9 | from timeit import default_timer as timer
10 | 
11 | 
12 | ## Get the same logger from main"
13 | logger = logging.getLogger("Planet-Amazon")
14 | 
15 | def best_f2_score(true_labels, predictions):
16 | 
17 |     def f_neg(threshold):
18 |         ## Scipy tries to minimize the function so we must get its inverse
19 |         return - fbeta_score(true_labels, predictions > threshold, beta=2, average='samples')
20 | 
21 |     # Initialization of best threshold search
22 |     thr_0 = [0.20] * 17
23 |     constraints = [(0.,1.)] * 17
24 |     def bounds(**kwargs):
25 |         x = kwargs["x_new"]
26 |         tmax = bool(np.all(x <= 1))
27 |         tmin = bool(np.all(x >= 0)) 
28 |         return tmax and tmin
29 |     
30 |     # Search using L-BFGS-B, the epsilon step must be big otherwise there is no gradient
31 |     minimizer_kwargs = {"method": "L-BFGS-B",
32 |                         "bounds":constraints,
33 |                         "options":{
34 |                             "eps": 0.05
35 |                             }
36 |                        }
37 |     
38 |     # We combine L-BFGS-B with Basinhopping for stochastic search with random steps
39 |     logger.info("===> Searching optimal threshold for each label")
40 |     start_time = timer()
41 |     
42 |     opt_output = basinhopping(f_neg, thr_0,
43 |                                 stepsize = 0.1,
44 |                                 minimizer_kwargs=minimizer_kwargs,
45 |                                 niter=10,
46 |                                 accept_test=bounds)
47 |     
48 |     end_time = timer()
49 |     logger.info("===> Optimal threshold for each label:\n{}".format(opt_output.x))
50 |     logger.info("Threshold found in: %s seconds" % (end_time - start_time))
51 |     
52 |     score = - opt_output.fun
53 |     return score, opt_output.x
54 | 
55 | 
56 | # We use real valued F2 score for training. Input can be anything between 0 and 1.
57 | # Threshold is not differentiable so we don't use it during training
58 | # We get a smooth F2 score valid for real values and not only 0/1
59 | def torch_f2_score(y_true, y_pred):
60 |     return torch_fbeta_score(y_true, y_pred, 2)
61 | 
62 | def torch_fbeta_score(y_true, y_pred, beta, eps=1e-9):
63 |     beta2 = beta**2
64 | 
65 |     y_true = y_true.float()
66 | 
67 |     true_positive = (y_pred * y_true).sum(dim=1)
68 |     precision = true_positive.div(y_pred.sum(dim=1).add(eps))
69 |     recall = true_positive.div(y_true.sum(dim=1).add(eps))
70 | 
71 |     return torch.mean(
72 |         (precision*recall).
73 |         div(precision.mul(beta2) + recall + eps).
74 |         mul(1 + beta2))
75 | 
76 | 
77 | class SmoothF2Loss(nn.Module):
78 |     def __init__(self):
79 |         super(MeanF2Loss, self).__init__()
80 |     
81 |     def forward(self, input, target):
82 |         return 1 - torch_f2_score(target, torch.sigmoid(input))


--------------------------------------------------------------------------------
/src/p2_metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import logging
 3 | from sklearn.metrics import fbeta_score
 4 | from scipy.optimize import fmin_l_bfgs_b, basinhopping
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from torch.autograd import Variable
 9 | from timeit import default_timer as timer
10 | 
11 | 
12 | ## Get the same logger from main"
13 | logger = logging.getLogger("Planet-Amazon")
14 | 
15 | def best_f2_score(true_labels, predictions):
16 | 
17 |     def f_neg(threshold):
18 |         ## Scipy tries to minimize the function so we must get its inverse
19 |         return - fbeta_score(true_labels, predictions > threshold, beta=2, average='samples')
20 | 
21 |     # Initialization of best threshold search
22 |     thr_0 = [0.20] * 17
23 |     constraints = [(0.,1.)] * 17
24 |     def bounds(**kwargs):
25 |         x = kwargs["x_new"]
26 |         tmax = bool(np.all(x <= 1))
27 |         tmin = bool(np.all(x >= 0)) 
28 |         return tmax and tmin
29 |     
30 |     # Search using L-BFGS-B, the epsilon step must be big otherwise there is no gradient
31 |     minimizer_kwargs = {"method": "L-BFGS-B",
32 |                         "bounds":constraints,
33 |                         "options":{
34 |                             "eps": 0.05
35 |                             }
36 |                        }
37 |     
38 |     # We combine L-BFGS-B with Basinhopping for stochastic search with random steps
39 |     logger.info("===> Searching optimal threshold for each label")
40 |     start_time = timer()
41 |     
42 |     opt_output = basinhopping(f_neg, thr_0,
43 |                                 stepsize = 0.1,
44 |                                 minimizer_kwargs=minimizer_kwargs,
45 |                                 niter=10,
46 |                                 accept_test=bounds)
47 |     
48 |     end_time = timer()
49 |     logger.info("===> Optimal threshold for each label:\n{}".format(opt_output.x))
50 |     logger.info("Threshold found in: %s seconds" % (end_time - start_time))
51 |     
52 |     score = - opt_output.fun
53 |     return score, opt_output.x
54 | 
55 | 
56 | # We use real valued F2 score for training. Input can be anything between 0 and 1.
57 | # Threshold is not differentiable so we don't use it during training
58 | # We get a smooth F2 score valid for real values and not only 0/1
59 | def torch_f2_score(y_true, y_pred):
60 |     return torch_fbeta_score(y_true, y_pred, 2)
61 | 
62 | def torch_fbeta_score(y_true, y_pred, beta, eps=1e-9):
63 |     beta2 = beta**2
64 | 
65 |     y_true = y_true.float()
66 | 
67 |     true_positive = (y_pred * y_true).sum(dim=1)
68 |     precision = true_positive.div(y_pred.sum(dim=1).add(eps))
69 |     recall = true_positive.div(y_true.sum(dim=1).add(eps))
70 | 
71 |     return torch.mean(
72 |         (precision*recall).
73 |         div(precision.mul(beta2) + recall + eps).
74 |         mul(1 + beta2))
75 | 
76 | 
77 | class SmoothF2Loss(nn.Module):
78 |     def __init__(self):
79 |         super(MeanF2Loss, self).__init__()
80 |     
81 |     def forward(self, input, target):
82 |         return 1 - torch_f2_score(target, torch.sigmoid(input))


--------------------------------------------------------------------------------
/Ideas.txt:
--------------------------------------------------------------------------------
 1 | ## Data sources:
 2 | https://github.com/CreativeInquiry/terrapattern
 3 | https://github.com/nealjean/predicting-poverty
 4 | 
 5 | ## Weather
 6 | https://www.kaggle.com/c/planet-understanding-the-amazon-from-space/data
 7 | Each chip will have one and potentially more than one atmospheric label and zero or more common and rare labels. Chips that are labeled as cloudy should have no other labels, but there may be labeling errors.
 8 | 
 9 | Cloud Cover Labels
10 | 
11 | Clouds are a major challenge for passive satellite imaging, and daily cloud cover and rain showers in the Amazon basin can significantly complicate monitoring in the area. For this reason we have chosen to include a cloud cover label for each chip. These labels closely mirror what one would see in a local weather forecast: clear, partly cloudy, cloudy, and haze. For our purposes haze is defined as any chip where atmospheric clouds are visible but they are not so opaque as to obscure the ground. Clear scenes show no evidence of clouds, and partly cloudy scenes can show opaque cloud cover over any portion of the image. Cloudy images have 90% of the chip obscured by opaque cloud cover.
12 | 
13 | => Probably you can't be clear and cloudy and partly cloudy at the same time
14 | ==> Separate the output in a softmax + Sigmoid activation?
15 | ==> Use RNN to compute the dependency?
16 | 
17 | ## Deal with Imbalance:
18 | 
19 | - Penalization: change cost so that NN pays more attention to underrepresented classes
20 | 
21 | 
22 | ## Loss function:
23 | - Which loss function for multilabel instead of BCE?
24 | - WARP loss?
25 | 
26 | 
27 | ## Thresholding
28 | - Remove thresholding all together with an end to end learner
29 | 
30 | ## Architecture
31 | Have a RNN that understand intensity/correlation "partly"
32 | 
33 | 
34 | - PyTorch Image captioning (Neural Talk)
35 | - RNN+CNN Multilabel classification
36 | 
37 | ## Forum:
38 | - CNN-RNN implementation
39 | https://github.com/fchollet/keras/issues/5146
40 | 
41 | ## Papers:
42 | - DL - Imbalanced dataset - kNN cluster + Quintuplet hinge loss
43 | https://pdfs.semanticscholar.org/69a6/8f9cf874c69e2232f47808016c2736b90c35.pdf
44 | 
45 | - Multilabel ranking
46 | https://arxiv.org/abs/1312.4894
47 | 
48 | - Multilabel classification for fashion search
49 | https://openreview.net/pdf?id=HyWDCXjgx
50 | 
51 | - CNN+RNN Unified Arch for multilabel
52 | https://www.ics.uci.edu/~yyang8/research/cnn-rnn/cnn-rnn-cvpr2016.pdf
53 | 
54 | # Overviews:
55 | - RNN + CNN combo
56 | https://wiki.tum.de/display/lfdv/Recurrent+Neural+Networks+-+Combination+of+RNN+and+CNN
57 | 
58 | # Done
59 | ===> Optimize threshold with L-BFGS-B
60 | ===> search global minimum with basinhoping ?
61 | ===> Resampling to deal with imbalance - To be done with care, thresholds became 0 and 1 for certain classes
62 | ===> Data augmentation with color and affine transforms - Zoom might have adverse effect
63 | 
64 | # Done and not used
65 | ===> Tried using a smooth F2 based loss function (no threshold), the network performance on F2 score was reduced by 0.03 from the start to finish (0.85 --> 0.88)
66 | ===> Apparently cross-entropy have very nice properties when used together with sigmoid function and F2 score does not have those.
67 | 


--------------------------------------------------------------------------------
/baseline/001-keras-baseline-0.80752.py:
--------------------------------------------------------------------------------
 1 | import numpy as np # linear algebra
 2 | import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
 3 | import os
 4 | import gc
 5 | 
 6 | import keras as k
 7 | from keras.models import Sequential, load_model
 8 | from keras.layers import Dense, Dropout, Flatten, BatchNormalization
 9 | from keras.layers import Conv2D, MaxPooling2D
10 | from sklearn.preprocessing import MultiLabelBinarizer
11 | 
12 | import cv2
13 | from tqdm import tqdm
14 | 
15 | RESOLUTION = 128
16 | CACHE_FILE = '001-baseline-cache.h5'
17 | THRESHOLD = 0.2
18 | 
19 | df_train = pd.read_csv('../data/train.csv')
20 | 
21 | mlb = MultiLabelBinarizer()
22 | X_train = []
23 | X_test = []
24 | df_train = pd.read_csv('../data/train.csv')
25 | y_train = mlb.fit_transform(df_train['tags'].str.split())
26 | 
27 | for file in tqdm(df_train['image_name'], miniters=256):
28 |     img = cv2.imread('../data/train-jpg/{}.jpg'.format(file))
29 |     X_train.append(cv2.resize(img,(RESOLUTION,RESOLUTION)))
30 | 
31 | X_train = np.array(X_train, np.float16) / 255. ## TODO load per batch to avoid memory error here
32 | 
33 | print(X_train.shape)
34 | print(y_train.shape)
35 | 
36 | split = 15000
37 | x_train, x_valid, y_train, y_valid = X_train[:split], X_train[split:], y_train[:split], y_train[split:]
38 | 
39 | model = Sequential()
40 | model.add(Conv2D(32, kernel_size=(3, 3),
41 |                  activation='relu',
42 |                  input_shape=(RESOLUTION,RESOLUTION, 3)))
43 | model.add(BatchNormalization())
44 | model.add(Conv2D(64, (3, 3), activation='relu'))
45 | model.add(BatchNormalization())
46 | model.add(MaxPooling2D(pool_size=(2, 2)))
47 | model.add(BatchNormalization())
48 | model.add(Flatten())
49 | model.add(Dense(128, activation='relu'))
50 | model.add(BatchNormalization())
51 | model.add(Dense(17, activation='sigmoid'))
52 | 
53 | model.compile(loss='binary_crossentropy', # We NEED binary here, since categorical_crossentropy l1 norms the output before calculating loss.
54 |               optimizer='adam',
55 |               metrics=['accuracy'])
56 | 
57 | 
58 | 
59 | if os.path.isfile(CACHE_FILE):
60 |     print('####### Loading model from cache ######')
61 |     model = load_model(CACHE_FILE)
62 | 
63 | else:
64 |     print('####### Cache not found, building from scratch ######')
65 |     model.fit(x_train, y_train,
66 |               batch_size=64,
67 |               epochs=6, # Should implement early stopping
68 |               verbose=1,
69 |               validation_data=(x_valid, y_valid))
70 |     model.save(CACHE_FILE)
71 | 
72 | from sklearn.metrics import fbeta_score
73 | 
74 | p_valid = model.predict(x_valid, batch_size=128)
75 | print(y_valid)
76 | print(p_valid)
77 | print(fbeta_score(y_valid, np.array(p_valid) > THRESHOLD, beta=2, average='samples'))
78 | 
79 | 
80 | ######## Prediction ########
81 | 
82 | df_test = pd.read_csv('../data/sample_submission.csv')
83 | 
84 | for file in tqdm(df_test['image_name'], miniters=256):
85 |     img = cv2.imread('../data/test-jpg/{}.jpg'.format(file))
86 |     X_test.append(cv2.resize(img,(RESOLUTION,RESOLUTION)))
87 | 
88 | 
89 | X_test = np.array(X_test, np.float16) / 255.
90 | 
91 | y_pred = model.predict(X_test, batch_size=128)
92 | # np.savetxt("pred-baseline.csv", y_pred, delimiter=";")
93 | 
94 | df_submission = pd.DataFrame()
95 | df_submission['image_name'] = df_test['image_name']
96 | df_submission['tags'] = [' '.join(x) for x in mlb.inverse_transform(y_pred > THRESHOLD)]
97 | 
98 | df_submission.to_csv('001-baseline.csv', index=False)
99 | 


--------------------------------------------------------------------------------
/baseline/unfinished_attempts/002-Keras-Inception-Transfer.py:
--------------------------------------------------------------------------------
  1 | from keras.applications.inception_v3 import InceptionV3
  2 | 
  3 | from keras.models import Model, load_model
  4 | from keras.layers import Dense, Flatten, Input, BatchNormalization
  5 | from keras import optimizers
  6 | from sklearn.model_selection import train_test_split
  7 | import pandas as pd
  8 | from sklearn.metrics import fbeta_score
  9 | from tqdm import tqdm
 10 | import cv2
 11 | import numpy as np
 12 | import os
 13 | from sklearn.preprocessing import MultiLabelBinarizer
 14 | 
 15 | RESOLUTION = 96
 16 | CACHE_FILE = '002-inception-baseline-cache.h5'
 17 | THRESHOLD = 0.2
 18 | 
 19 | def build_model():
 20 |     #Create own input format
 21 |     model_input = Input(shape=(RESOLUTION,RESOLUTION,3),name = 'image_input')
 22 |     
 23 |     #Load Inception v3
 24 |     base_model = InceptionV3(weights='imagenet', include_top=False)
 25 |     for layer in base_model.layers:
 26 |         layer.trainable = False
 27 |     
 28 |     x = base_model(model_input)
 29 |     feat = Flatten(name='flatten')(x)
 30 |     feat = Dense(128, activation='relu')(feat)
 31 |     feat = BatchNormalization()(feat)
 32 |     out = Dense(17, activation='sigmoid')(feat)
 33 |     model = Model(inputs=model_input, outputs=out)
 34 |     
 35 |     model.compile(loss='binary_crossentropy',
 36 |               optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
 37 |               metrics=['accuracy'])
 38 |     
 39 |     
 40 |     print('######## Summary ########')
 41 |     model.summary()
 42 |     print('\n\n\n######## Config ########')
 43 |     model.get_config()
 44 |     print('\n\n\n######## ###### ########')
 45 |     
 46 |     return model
 47 | 
 48 | mlb = MultiLabelBinarizer()
 49 | X_train = []
 50 | X_test = []
 51 | df_train = pd.read_csv('../data/train.csv')
 52 | y_train = mlb.fit_transform(df_train['tags'].str.split())
 53 | 
 54 | 
 55 | for file in tqdm(df_train['image_name'], miniters=256):
 56 |     img = cv2.imread('../data/train-jpg/{}.jpg'.format(file))
 57 |     X_train.append(cv2.resize(img,(RESOLUTION,RESOLUTION)))
 58 | 
 59 | X_train = np.array(X_train, np.float16) / 255. ## TODO load per batch to avoid memory error here
 60 | 
 61 | print(X_train.shape)
 62 | print(y_train.shape)
 63 | 
 64 | ######## Validation ########
 65 | x_trn, x_val, y_trn, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
 66 | 
 67 | if os.path.isfile(CACHE_FILE):
 68 |     print('####### Loading model from cache ######')
 69 |     model = load_model(CACHE_FILE)
 70 | 
 71 | else:
 72 |     print('####### Cache not found, building from scratch ######')
 73 |     model = build_model()
 74 |     model.fit(x_trn, y_trn,
 75 |               batch_size=64,
 76 |               epochs=15,
 77 |               verbose=1,
 78 |               validation_data=(x_val, y_val))
 79 |     model.save(CACHE_FILE)
 80 |     
 81 | 
 82 | p_valid = model.predict(x_val, batch_size=128)
 83 | print(y_val)
 84 | print(p_valid)
 85 | print(fbeta_score(y_val, np.array(p_valid) > THRESHOLD, beta=2, average='samples'))
 86 | 
 87 | ######## Prediction ########
 88 | 
 89 | df_test = pd.read_csv('../data/sample_submission.csv')
 90 | 
 91 | for file in tqdm(df_test['image_name'], miniters=256):
 92 |     img = cv2.imread('../data/test-jpg/{}.jpg'.format(file))
 93 |     X_test.append(cv2.resize(img,(RESOLUTION,RESOLUTION)))
 94 | 
 95 | 
 96 | X_test = np.array(X_test, np.float16) / 255.
 97 | 
 98 | y_pred = model.predict(X_test, batch_size=128)
 99 | 
100 | df_submission = pd.DataFrame()
101 | df_submission['image_name'] = df_test['image_name']
102 | df_submission['tags'] = [' '.join(x) for x in mlb.inverse_transform(y_pred > THRESHOLD)]
103 | 
104 | df_submission.to_csv('002-inception-baseline.csv', index=False)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon Forest Computer Vision
 2 | Satellite Image tagging code using PyTorch / Keras
 3 | 
 4 | Here is a sample of images we had to work with
 5 | 
 6 | ![](media/chipdesc.jpg)
 7 | ![](media/chips.jpg)
 8 | ![](media/agg1.jpg)
 9 | ![](media/cloudy_1.jpg)
10 | ![](media/habitation1.jpg)
11 | ![](media/haze1.jpg)
12 | ![](media/pc1.jpg)
13 | ![](media/river.jpg)
14 | ![](media/road.jpg)
15 | 
16 | _Source: https://www.kaggle.com/c/planet-understanding-the-amazon-from-space/data_
17 | 
18 | > Note: the repo was developed in May 2017 on PyTorch 0.1. PyTorch was publicly announced in January 2017 and has seen tremendous changes since then.
19 | 
20 | You will find:
21 |   - [A script that output the mean and stddev of your image if you want to train from scratch](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/compute-mean-std.py#L28)
22 | 
23 |   - [Using weighted loss function](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/main_pytorch.py#L61)
24 | 
25 |   - [Logging your experiment](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/main_pytorch.py#L89)
26 | 
27 |   - [Composing data augmentations](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/main_pytorch.py#L103), also [here](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p_data_augmentation.py#L181).
28 | Note use [Pillow-SIMD](https://python-pillow.org/pillow-perf/) instead of PIL/Pillow. It is even faster than OpenCV
29 | 
30 |   - [Loading from a CSV that contains image path - 61 lines yeah](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p2_dataload.py#L23)
31 | 
32 |   - [Equivalent in Keras - 216 lines ugh](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/k_dataloader.py). Note: so much lines were needed because by default in Keras you either have the data augmentation with ImageDataGenerator or lazy loading of images with "flow_from_directory" and there is no flow_from_csv
33 | 
34 |   - [Model finetuning with custom PyCaffe weights](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p_neuro.py#L139)
35 | 
36 |   - Train_test_split, [PyTorch version](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p_model_selection.py#L4) and [Keras version](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/k_model_selection.py#L4)
37 | 
38 | - [Weighted sampling training so that the model view rare cases more often](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/main_pytorch.py#L131-L140)
39 | 
40 |  - [Custom Sampler creation, example for the balanced sampler](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p_sampler.py)
41 | 
42 |  - [Saving snapshots each epoch](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/main_pytorch.py#L171)
43 | 
44 |  - [Loading the best snapshot for prediction](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/pytorch_predict_only.py#L83)
45 | 
46 |  - [Failed word embeddings experiments](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/Embedding-RNN-Autoencoder.ipynb) to [combine image and text data](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/Dual_Feed_Image_Label.ipynb)
47 | 
48 |  - [Combined weighted loss function (softmax for unique weather tags, BCE for multilabel tags)](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p2_loss.py#L36)
49 | 
50 |  - [Selecting the best F2-threshold](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p2_metrics.py#L38) via stochastic search at the end of each epoch to [maximize validation score](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/526128239a6abcbb32fbf5b34ed8cc7a3cd87c4e/src/p2_validation.py#L49). This is then saved along model parameter.
51 | 
52 |   - [CNN-RNN combination (work in progress)](https://github.com/mratsim/Amazon_Forest_Computer_Vision/blob/master/src/p3_neuroRNN.py#L10)
53 | 


--------------------------------------------------------------------------------
/pytorch_predict_only.py:
--------------------------------------------------------------------------------
  1 | ## Custom Imports
  2 | from src.p_dataload import KaggleAmazonDataset
  3 | from src.p_neuro import Net, ResNet50, DenseNet121
  4 | from src.p_training import train, snapshot
  5 | from src.p_validation import validate
  6 | from src.p_model_selection import train_valid_split
  7 | from src.p_logger import setup_logs
  8 | from src.p_prediction import predict, output
  9 | from src.p_data_augmentation import ColorJitter
 10 | 
 11 | ## Utilities
 12 | import random
 13 | import logging
 14 | import time
 15 | from timeit import default_timer as timer
 16 | import os
 17 | 
 18 | ## Libraries
 19 | import numpy as np
 20 | 
 21 | ## Torch
 22 | import torch.optim as optim
 23 | import torch.nn.functional as F
 24 | from torchvision import transforms
 25 | from torch.utils.data import DataLoader
 26 | from torch.utils.data.sampler import SubsetRandomSampler
 27 | import torch
 28 | 
 29 | 
 30 | ############################################################################
 31 | #######  CONTROL CENTER ############# STAR COMMAND #########################
 32 | 
 33 | # Run name
 34 | run_name = "2017-05-04_1730-thresh_densenet121-predict-only"
 35 | 
 36 | model = DenseNet121(17).cuda()
 37 | batch_size = 32
 38 | 
 39 | ## Normalization on dataset mean/std
 40 | # normalize = transforms.Normalize(mean=[0.30249774, 0.34421161, 0.31507745],
 41 | #                                  std=[0.13718569, 0.14363895, 0.16695958])
 42 |     
 43 | ## Normalization on ImageNet mean/std for finetuning
 44 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 45 |                                      std=[0.229, 0.224, 0.225])
 46 | 
 47 | save_dir = './snapshots'
 48 | 
 49 | #######  CONTROL CENTER ############# STAR COMMAND #########################
 50 | ############################################################################
 51 | 
 52 | if __name__ == "__main__":
 53 |     # Initiate timer
 54 |     global_timer = timer()
 55 |     
 56 |     # Setup logs
 57 |     logger = setup_logs(save_dir, run_name)
 58 | 
 59 |     # Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic)
 60 |     torch.manual_seed(1337)
 61 |     torch.cuda.manual_seed(1337)
 62 |     np.random.seed(1337)
 63 |     random.seed(1337)
 64 |     
 65 |     ## Normalization only for validation and test
 66 |     ds_transform_raw = transforms.Compose([
 67 |                      transforms.CenterCrop(224),
 68 |                      transforms.ToTensor(),
 69 |                      normalize
 70 |                      ])
 71 |     
 72 | 
 73 |     
 74 |     X_test = KaggleAmazonDataset('./data/sample_submission_v2.csv','./data/test-jpg/','.jpg',
 75 |                                   ds_transform_raw
 76 |                                  )
 77 |     test_loader = DataLoader(X_test,
 78 |                               batch_size=batch_size,
 79 |                               num_workers=4,
 80 |                               pin_memory=True)
 81 |     
 82 |     # Load model from best iteration
 83 |     model_path = './snapshots/2017-05-04_1730-thresh_densenet121-model_best.pth'
 84 |     logger.info('===> loading {} for prediction'.format(model_path))
 85 |     checkpoint = torch.load(model_path)
 86 |     model.load_state_dict(checkpoint['state_dict'])
 87 |     
 88 |     # Predict
 89 |     predictions = predict(test_loader, model) # TODO load model from the best on disk
 90 |     
 91 |     # Output
 92 |     X_train = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg')
 93 |                                  
 94 |     
 95 |     output(predictions,
 96 |            checkpoint['threshold'],
 97 |            X_test,
 98 |            X_train.getLabelEncoder(),
 99 |            './out',
100 |            '2017-05-04_1730-thresh_densenet121',
101 |            checkpoint['best_score'])
102 |     
103 |     ##########################################################
104 |     
105 |     end_global_timer = timer()
106 |     logger.info("################## Success #########################")
107 |     logger.info("Total elapsed time: %s" % (end_global_timer - global_timer))


--------------------------------------------------------------------------------
/main_keras.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from keras.models import Sequential, load_model
 5 | from keras.layers import Dense, Dropout, Flatten, BatchNormalization
 6 | from keras.layers import Conv2D, MaxPooling2D
 7 | 
 8 | from timeit import default_timer as timer
 9 | from src.k_dataloader import AmazonGenerator
10 | from src.k_model_selection import train_valid_split
11 | 
12 | from sklearn.metrics import fbeta_score
13 | 
14 | RESOLUTION = 256
15 | 
16 | if __name__ == "__main__":
17 |     # Initiate timer
18 |     global_timer = timer()
19 | 
20 |     # Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic)
21 |     np.random.seed(1337)    
22 |     
23 |     model = Sequential()
24 |     model.add(Conv2D(32, kernel_size=(3, 3),
25 |                      activation='relu',
26 |                      input_shape=(RESOLUTION,RESOLUTION, 3)))
27 |     model.add(BatchNormalization())
28 |     model.add(Conv2D(64, (3, 3), activation='relu'))
29 |     model.add(BatchNormalization())
30 |     model.add(MaxPooling2D(pool_size=(2, 2)))
31 |     model.add(BatchNormalization())
32 |     model.add(Flatten())
33 |     model.add(Dense(96, activation='relu'))
34 |     model.add(BatchNormalization())
35 |     model.add(Dense(17, activation='sigmoid'))
36 | 
37 |     model.compile(loss='binary_crossentropy',
38 |                   optimizer='adam',
39 |                   metrics=['accuracy'])
40 |     
41 |     train_gen = AmazonGenerator(featurewise_center=True,
42 |                             featurewise_std_normalization=True,
43 |                             width_shift_range=0.15,
44 |                             horizontal_flip=True,
45 |                             rotation_range=15,
46 |                             rescale=1./255
47 |                            )
48 |     
49 |     valid_gen = AmazonGenerator(featurewise_center=True,
50 |                                 featurewise_std_normalization=True,
51 |                                 rescale=1./255)
52 |     
53 |     # train_gen.fit_from_csv('./data/train.csv',
54 |     #                               './data/train-jpg/',
55 |     #                               '.jpg',
56 |     #                              rescale=1./255,
57 |     #                              target_size=(RESOLUTION,RESOLUTION))
58 |     
59 |     # train_gen.dump_dataset_mean_std('train_256_mean.npy', 'train_256_std.npy')
60 |     train_gen.load_mean_std('train_256_mean.npy', 'train_256_std.npy')
61 |     valid_gen.load_mean_std('train_256_mean.npy', 'train_256_std.npy')
62 |     
63 |     df_train = pd.read_csv('./data/train.csv')
64 |     
65 |     trn_idx, val_idx = train_valid_split(df_train, 0.2)
66 |     
67 |     batch_size = 32
68 |     
69 |     x_trn = train_gen.flow_from_df(df_train.iloc[trn_idx].reset_index(),
70 |                                    './data/train-jpg/',
71 |                                    '.jpg',
72 |                                    mode='fit',
73 |                                    batch_size=batch_size)
74 |     x_val = valid_gen.flow_from_df(df_train.iloc[val_idx].reset_index(),
75 |                                    './data/train-jpg/',
76 |                                    '.jpg',
77 |                                    mode='predict',
78 |                                    batch_size=batch_size)
79 |     model.fit_generator(x_trn,
80 |                         steps_per_epoch = len(trn_idx) / batch_size,
81 |                         epochs=1,
82 |                         workers=6, pickle_safe=True
83 |                        )
84 |     
85 |     ypreds = model.predict_generator(x_val,
86 |                                      steps = len(val_idx)/batch_size,
87 |                                      workers=6, pickle_safe=True
88 |                                     )
89 |     
90 |     mlb = train_gen.getLabelEncoder()
91 |     predictions = ypreds > 0.2
92 |     true_labels = mlb.transform(df_train['tags'].iloc[val_idx].values)
93 |     
94 |     score=fbeta_score(true_labels, predictions, beta=2, average='samples')
95 |     
96 |     end_global_timer = timer()
97 |     print("################## Success #########################")
98 |     print("Total elapsed time: %s" % (end_global_timer - global_timer))


--------------------------------------------------------------------------------
/src/p3_neuroRNN.py:
--------------------------------------------------------------------------------
  1 | from torch import nn, ones
  2 | from torch.autograd import Variable
  3 | from torchvision import models
  4 | from torch.nn.init import kaiming_normal
  5 | from torch import np
  6 | import torch
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class GRU_ResNet50(nn.Module):
 11 |     ## We use ResNet weights from PyCaffe.
 12 |     def __init__(self, num_classes, hidden_size, num_layers):
 13 |         super(GRU_ResNet50, self).__init__()
 14 |         
 15 |         # Loading ResNet arch from PyTorch and weights from Pycaffe
 16 |         original_model = models.resnet50(pretrained=False)
 17 |         original_model.load_state_dict(torch.load('./zoo/resnet50.pth'))
 18 |         
 19 |         # Everything except the last linear layer
 20 |         self.features = nn.Sequential(*list(original_model.children())[:-1])
 21 |         
 22 |         # Get number of features of last layer
 23 |         num_feats = original_model.fc.in_features
 24 |         
 25 |         self.bn = nn.BatchNorm1d(num_feats, momentum=0.01)
 26 |         
 27 |         self.hidden_size = hidden_size
 28 |         self.rnn = nn.GRU(input_size=num_feats,
 29 |                             hidden_size=hidden_size,
 30 |                             num_layers=num_layers,
 31 |                             batch_first = True)
 32 |         
 33 |         # Plug our classifier
 34 |         self.classifier = nn.Sequential(
 35 |         nn.Linear(hidden_size, num_classes)
 36 |         )
 37 |         
 38 |         # Init of last layer
 39 |         for m in self.classifier:
 40 |             kaiming_normal(m.weight)
 41 |         self.bn.weight.data.fill_(1)
 42 |         self.bn.bias.data.zero_()
 43 |         # How to init RNN?
 44 | 
 45 |         # Freeze those weights
 46 |         # for p in self.features.parameters():
 47 |         #     p.requires_grad = False
 48 | 
 49 |     def forward(self, x, hidden=None):
 50 |         f = self.features(x)
 51 |         f = self.bn(f.view(f.size(0), -1))
 52 |         f = f.unsqueeze(1)
 53 |         x, hidden = self.rnn(f, hidden)
 54 |         x = x.view(-1, self.hidden_size)
 55 |         y = self.classifier(x)
 56 |         return y
 57 |     
 58 | class LSTM_ResNet50(nn.Module):
 59 |     ## We use ResNet weights from PyCaffe.
 60 |     def __init__(self, num_classes, hidden_size, num_layers):
 61 |         super(LSTM_ResNet50, self).__init__()
 62 |         
 63 |         # Loading ResNet arch from PyTorch and weights from Pycaffe
 64 |         original_model = models.resnet50(pretrained=False)
 65 |         original_model.load_state_dict(torch.load('./zoo/resnet50.pth'))
 66 |         
 67 |         # Everything except the last linear layer
 68 |         self.features = nn.Sequential(*list(original_model.children())[:-1])
 69 |         
 70 |         # Get number of features of last layer
 71 |         num_feats = original_model.fc.in_features
 72 |         
 73 |         self.bn = nn.BatchNorm1d(num_feats, momentum=0.01)
 74 |         
 75 |         self.hidden_size = hidden_size
 76 |         self.rnn = nn.LSTM(input_size=num_feats,
 77 |                             hidden_size=hidden_size,
 78 |                             num_layers=num_layers,
 79 |                             batch_first = True)
 80 |         
 81 |         # Plug our classifier
 82 |         self.classifier = nn.Sequential(
 83 |         nn.Linear(hidden_size, num_classes)
 84 |         )
 85 |         
 86 |         # Init of last layer
 87 |         for m in self.classifier:
 88 |             kaiming_normal(m.weight)
 89 |         self.bn.weight.data.fill_(1)
 90 |         self.bn.bias.data.zero_()
 91 |                 
 92 |         # How to init RNN?
 93 | 
 94 |         # Freeze those weights
 95 |         # for p in self.features.parameters():
 96 |         #     p.requires_grad = False
 97 | 
 98 |     def forward(self, x, hidden=None):
 99 |         f = self.features(x)
100 |         f = self.bn(f.view(f.size(0), -1))
101 |         f = f.unsqueeze(1)
102 |         x, hidden = self.rnn(f, hidden)
103 |         x = x.view(-1, self.hidden_size)
104 |         y = self.classifier(x)
105 |         return y
106 | 
107 |     
108 | class Skip_LSTM_RN50(nn.Module):
109 |     ## We use ResNet weights from PyCaffe.
110 |     def __init__(self, num_classes, hidden_size, num_layers):
111 |         super(Skip_LSTM_RN50, self).__init__()
112 |         
113 |         # Loading ResNet arch from PyTorch and weights from Pycaffe
114 |         original_model = models.resnet50(pretrained=False)
115 |         original_model.load_state_dict(torch.load('./zoo/resnet50.pth'))
116 |         
117 |         # Everything except the last linear layer
118 |         self.features = nn.Sequential(*list(original_model.children())[:-1])
119 |         
120 |         # Get number of features of last layer
121 |         num_feats = original_model.fc.in_features
122 |         
123 |         self.bn = nn.BatchNorm1d(num_feats, momentum=0.01)
124 |         
125 |         self.hidden_size = hidden_size
126 |         self.rnn = nn.LSTM(input_size=num_feats,
127 |                             hidden_size=hidden_size,
128 |                             num_layers=num_layers,
129 |                             batch_first = True)
130 |         
131 |         # Plug our classifier
132 |         self.classifier = nn.Sequential(
133 |         nn.Linear(hidden_size + num_feats, num_classes)
134 |         )
135 |         
136 |         # Init of last layer
137 |         for m in self.classifier:
138 |             kaiming_normal(m.weight)
139 |         self.bn.weight.data.fill_(1)
140 |         self.bn.bias.data.zero_()
141 |                 
142 |         # How to init RNN?
143 | 
144 |         # Freeze those weights
145 |         # for p in self.features.parameters():
146 |         #     p.requires_grad = False
147 | 
148 |     def forward(self, x, hidden=None):
149 |         f = self.features(x)
150 |         f = self.bn(f.view(f.size(0), -1))
151 |         x, hidden = self.rnn(f.unsqueeze(1), hidden)
152 |         x = x.view(-1, self.hidden_size)
153 |         c = torch.cat((x,f),1) # Skip connection to avoid the LSTM eating the whole gradients
154 |         y = self.classifier(c)
155 |         return y


--------------------------------------------------------------------------------
/src/p_data_augmentation.py:
--------------------------------------------------------------------------------
  1 | ## Additional transforms for PyTorch data augmentation
  2 | ## It is very recommended to use Pillow-SIMD for speed gain in the 5x range.
  3 | ## https://python-pillow.org/pillow-perf/
  4 | ## OpenCV built with IPP and TBB is also fast but inaccurate
  5 | 
  6 | import torch
  7 | import random
  8 | import PIL.ImageEnhance as ie
  9 | import PIL.Image as im
 10 | 
 11 | 
 12 | class Lighting(object):
 13 |     """Lighting noise(AlexNet - style PCA - based noise)"""
 14 | 
 15 |     def __init__(self, alphastd, eigval, eigvec):
 16 |         self.alphastd = alphastd
 17 |         self.eigval = eigval
 18 |         self.eigvec = eigvec
 19 | 
 20 |     def __call__(self, img):
 21 |         if self.alphastd == 0:
 22 |             return img
 23 | 
 24 |         alpha = img.new().resize_(3).normal_(0, self.alphastd)
 25 |         rgb = self.eigvec.type_as(img).clone()\
 26 |             .mul(alpha.view(1, 3).expand(3, 3))\
 27 |             .mul(self.eigval.view(1, 3).expand(3, 3))\
 28 |             .sum(1).squeeze()
 29 | 
 30 |         return img.add(rgb.view(3, 1, 1).expand_as(img))
 31 | 
 32 | 
 33 | class Grayscale(object):
 34 | 
 35 |     def __call__(self, img):
 36 |         gs = img.clone()
 37 |         gs[0].mul_(0.299).add_(0.587, gs[1]).add_(0.114, gs[2])
 38 |         gs[1].copy_(gs[0])
 39 |         gs[2].copy_(gs[0])
 40 |         return gs
 41 | 
 42 | 
 43 | class Saturation(object):
 44 | 
 45 |     def __init__(self, var):
 46 |         self.var = var
 47 | 
 48 |     def __call__(self, img):
 49 |         gs = Grayscale()(img)
 50 |         alpha = random.uniform(0, self.var)
 51 |         return img.lerp(gs, alpha)
 52 | 
 53 | 
 54 | class Brightness(object):
 55 | 
 56 |     def __init__(self, var):
 57 |         self.var = var
 58 | 
 59 |     def __call__(self, img):
 60 |         gs = img.new().resize_as_(img).zero_()
 61 |         alpha = random.uniform(0, self.var)
 62 |         return img.lerp(gs, alpha)
 63 | 
 64 | 
 65 | class Contrast(object):
 66 | 
 67 |     def __init__(self, var):
 68 |         self.var = var
 69 | 
 70 |     def __call__(self, img):
 71 |         gs = Grayscale()(img)
 72 |         gs.fill_(gs.mean())
 73 |         alpha = random.uniform(0, self.var)
 74 |         return img.lerp(gs, alpha)
 75 | 
 76 | 
 77 | class RandomOrder(object):
 78 |     """ Composes several transforms together in random order.
 79 |     """
 80 | 
 81 |     def __init__(self, transforms):
 82 |         self.transforms = transforms
 83 | 
 84 |     def __call__(self, img):
 85 |         if self.transforms is None:
 86 |             return img
 87 |         order = torch.randperm(len(self.transforms))
 88 |         for i in order:
 89 |             img = self.transforms[i](img)
 90 |         return img
 91 | 
 92 | 
 93 | class ColorJitter(RandomOrder):
 94 | 
 95 |     def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4):
 96 |         self.transforms = []
 97 |         if brightness != 0:
 98 |             self.transforms.append(Brightness(brightness))
 99 |         if contrast != 0:
100 |             self.transforms.append(Contrast(contrast))
101 |         if saturation != 0:
102 |             self.transforms.append(Saturation(saturation))
103 | 
104 | class RandomFlip(object):
105 |     """Randomly flips the given PIL.Image with a probability of 0.25 horizontal,
106 |                                                                 0.25 vertical,
107 |                                                                 0.5 as is
108 |     """
109 |     
110 |     def __call__(self, img):
111 |         dispatcher = {
112 |             0: img,
113 |             1: img,
114 |             2: img.transpose(im.FLIP_LEFT_RIGHT),
115 |             3: img.transpose(im.FLIP_TOP_BOTTOM)
116 |         }
117 |     
118 |         return dispatcher[random.randint(0,3)] #randint is inclusive
119 | 
120 | class RandomRotate(object):
121 |     """Randomly rotate the given PIL.Image with a probability of 1/6 90°,
122 |                                                                  1/6 180°,
123 |                                                                  1/6 270°,
124 |                                                                  1/2 as is
125 |     """
126 |     
127 |     def __call__(self, img):
128 |         dispatcher = {
129 |             0: img,
130 |             1: img,
131 |             2: img,            
132 |             3: img.transpose(im.ROTATE_90),
133 |             4: img.transpose(im.ROTATE_180),
134 |             5: img.transpose(im.ROTATE_270)
135 |         }
136 |     
137 |         return dispatcher[random.randint(0,5)] #randint is inclusive
138 |     
139 | class PILColorBalance(object):
140 | 
141 |     def __init__(self, var):
142 |         self.var = var
143 | 
144 |     def __call__(self, img):
145 |         alpha = random.uniform(1 - self.var, 1 + self.var)
146 |         return ie.Color(img).enhance(alpha)
147 | 
148 | class PILContrast(object):
149 | 
150 |     def __init__(self, var):
151 |         self.var = var
152 | 
153 |     def __call__(self, img):
154 |         alpha = random.uniform(1 - self.var, 1 + self.var)
155 |         return ie.Contrast(img).enhance(alpha)
156 | 
157 | 
158 | class PILBrightness(object):
159 | 
160 |     def __init__(self, var):
161 |         self.var = var
162 | 
163 |     def __call__(self, img):
164 |         alpha = random.uniform(1 - self.var, 1 + self.var)
165 |         return ie.Brightness(img).enhance(alpha)
166 | 
167 | class PILSharpness(object):
168 | 
169 |     def __init__(self, var):
170 |         self.var = var
171 | 
172 |     def __call__(self, img):
173 |         alpha = random.uniform(1 - self.var, 1 + self.var)
174 |         return ie.Sharpness(img).enhance(alpha)
175 |     
176 | 
177 | # Check ImageEnhancer effect: https://www.youtube.com/watch?v=_7iDTpTop04
178 | # Not documented but all enhancements can go beyond 1.0 to 2
179 | # Image must be RGB
180 | # Use Pillow-SIMD because Pillow is too slow
181 | class PowerPIL(RandomOrder):
182 |     def __init__(self, rotate=True,
183 |                        flip=True,
184 |                        colorbalance=0.4,
185 |                        contrast=0.4,
186 |                        brightness=0.4,
187 |                        sharpness=0.4):
188 |         self.transforms = []
189 |         if rotate:
190 |             self.transforms.append(RandomRotate())
191 |         if flip:
192 |             self.transforms.append(RandomFlip())
193 |         if brightness != 0:
194 |             self.transforms.append(PILBrightness(brightness))
195 |         if contrast != 0:
196 |             self.transforms.append(PILContrast(contrast))
197 |         if colorbalance != 0:
198 |             self.transforms.append(PILColorBalance(colorbalance))
199 |         if sharpness != 0:
200 |             self.transforms.append(PILSharpness(sharpness))


--------------------------------------------------------------------------------
/src/p_neuro.py:
--------------------------------------------------------------------------------
  1 | from torch import nn, ones
  2 | from torchvision import models
  3 | from torch.nn.init import kaiming_normal
  4 | from torch import np
  5 | import torch
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | ## Custom baseline
 10 | class Net(nn.Module):    
 11 |     def __init__(self, input_size=(3,224,224), nb_classes=17):
 12 |         
 13 |         super(Net, self).__init__()
 14 |         
 15 |         self.features = nn.Sequential(
 16 |             nn.Conv2d(3,32,3),
 17 |             nn.BatchNorm2d(32),
 18 |             nn.ReLU(),
 19 |             nn.Conv2d(32,64,3),
 20 |             nn.BatchNorm2d(64),
 21 |             nn.ReLU(),
 22 |             nn.MaxPool2d((3,3))
 23 |         )
 24 |         
 25 |         ## Compute linear layer size
 26 |         self.flat_feats = self._get_flat_feats(input_size, self.features)
 27 |         
 28 |         self.classifier = nn.Sequential(
 29 |             nn.Linear(self.flat_feats, 256),
 30 |             nn.BatchNorm1d(256),
 31 |             nn.ReLU(),
 32 |             nn.Dropout(p=0.15),
 33 |             nn.Linear(256, 64),
 34 |             nn.BatchNorm1d(64),
 35 |             nn.ReLU(),
 36 |             nn.Dropout(p=0.10),
 37 |             nn.Linear(64, nb_classes)
 38 |         )
 39 |      
 40 |         ## Weights initialization
 41 |         def _weights_init(m):
 42 |             if isinstance(m, nn.Conv2d or nn.Linear):
 43 |                 kaiming_normal(m.weight)
 44 |             elif isinstance(m, nn.BatchNorm2d or BatchNorm1d):
 45 |                 m.weight.data.fill_(1)
 46 |                 m.bias.data.zero_()
 47 |         
 48 |         self.apply(_weights_init)       
 49 |     
 50 |     def _get_flat_feats(self, in_size, feats):
 51 |         f = feats(Variable(ones(1,*in_size)))
 52 |         return int(np.prod(f.size()[1:]))
 53 |     
 54 | 
 55 |             
 56 |     def forward(self, x):
 57 |         feats = self.features(x)
 58 |         flat_feats = feats.view(-1, self.flat_feats)
 59 |         out = self.classifier(flat_feats)
 60 |         return out
 61 | 
 62 |     
 63 | ## ResNet fine-tuning
 64 | class ResNet50(nn.Module):
 65 |     ## We use ResNet weights from PyCaffe.
 66 |     def __init__(self, num_classes):
 67 |         super(ResNet50, self).__init__()
 68 |         
 69 |         # Loading ResNet arch from PyTorch and weights from Pycaffe
 70 |         original_model = models.resnet50(pretrained=False)
 71 |         original_model.load_state_dict(torch.load('./zoo/resnet50.pth'))
 72 |         
 73 |         # Everything except the last linear layer
 74 |         self.features = nn.Sequential(*list(original_model.children())[:-1])
 75 |         
 76 |         # Get number of features of last layer
 77 |         num_feats = original_model.fc.in_features
 78 |         
 79 |         # Plug our classifier
 80 |         self.classifier = nn.Sequential(
 81 |         nn.Linear(num_feats, num_classes)
 82 |         )
 83 |         
 84 |         # Init of last layer
 85 |         for m in self.classifier:
 86 |             kaiming_normal(m.weight)
 87 | 
 88 |         # Freeze those weights
 89 |         # for p in self.features.parameters():
 90 |         #     p.requires_grad = False
 91 | 
 92 |     def forward(self, x):
 93 |         f = self.features(x)
 94 |         f = f.view(f.size(0), -1)
 95 |         y = self.classifier(f)
 96 |         return y
 97 | 
 98 | class ResNet101(nn.Module):
 99 |     ## We use ResNet weights from PyCaffe.
100 |     def __init__(self, num_classes):
101 |         super(ResNet101, self).__init__()
102 |         
103 |         # Loading ResNet arch from PyTorch and weights from Pycaffe
104 |         original_model = models.resnet101(pretrained=False)
105 |         original_model.load_state_dict(torch.load('./zoo/resnet101.pth'))
106 |         
107 |         # Everything except the last linear layer
108 |         self.features = nn.Sequential(*list(original_model.children())[:-1])
109 |         
110 |         # Get number of features of last layer
111 |         num_feats = original_model.fc.in_features
112 |         
113 |         # Plug our classifier
114 |         self.classifier = nn.Sequential(
115 |         nn.Linear(num_feats, num_classes)
116 |         )
117 |         
118 |         # Init of last layer
119 |         for m in self.classifier:
120 |             kaiming_normal(m.weight)
121 | 
122 |         # Freeze those weights
123 |         # for p in self.features.parameters():
124 |         #     p.requires_grad = False
125 | 
126 |     def forward(self, x):
127 |         f = self.features(x)
128 |         f = f.view(f.size(0), -1)
129 |         y = self.classifier(f)
130 |         return y
131 | 
132 | class ResNet152(nn.Module):
133 |     ## We use ResNet weights from PyCaffe.
134 |     def __init__(self, num_classes):
135 |         super(ResNet152, self).__init__()
136 |         
137 |         # Loading ResNet arch from PyTorch and weights from Pycaffe
138 |         original_model = models.resnet152(pretrained=False)
139 |         original_model.load_state_dict(torch.load('./zoo/resnet152.pth'))
140 |         
141 |         # Everything except the last linear layer
142 |         self.features = nn.Sequential(*list(original_model.children())[:-1])
143 |         
144 |         # Get number of features of last layer
145 |         num_feats = original_model.fc.in_features
146 |         
147 |         # Plug our classifier
148 |         self.classifier = nn.Sequential(
149 |         nn.Linear(num_feats, num_classes)
150 |         )
151 |         
152 |         # Init of last layer
153 |         for m in self.classifier:
154 |             kaiming_normal(m.weight)
155 | 
156 |         # Freeze those weights
157 |         # for p in self.features.parameters():
158 |         #     p.requires_grad = False
159 | 
160 |     def forward(self, x):
161 |         f = self.features(x)
162 |         f = f.view(f.size(0), -1)
163 |         y = self.classifier(f)
164 |         return y
165 |     
166 | ## VGG fine-tuning
167 | class VGG16(nn.Module):
168 |         def __init__(self, nb_classes=17):
169 |             super(VGG16, self).__init__()
170 |             original_model = models.vgg16(pretrained=False)
171 |             self.features = original_model.features
172 |             self.classifier = nn.Sequential(
173 |                     nn.Dropout(),
174 |                     nn.Linear(25088, 4096),
175 |                     nn.ReLU(inplace=True),
176 |                     nn.Dropout(),
177 |                     nn.Linear(4096, 4096),
178 |                     nn.ReLU(inplace=True),
179 |                     nn.Linear(4096, num_classes),
180 |                 )
181 | 
182 |             # Freeze Convolutional weights
183 |             for p in self.features.parameters():
184 |                 p.requires_grad = False
185 | 
186 |         def forward(self, x):
187 |             f = self.features(x)
188 |             f = f.view(f.size(0), -1)
189 |             y = self.classifier(f)
190 |             return y
191 | 
192 | class DenseNet121(nn.Module):
193 |     def __init__(self, num_classes):
194 |         super(DenseNet121, self).__init__()
195 |         
196 |         original_model = models.densenet121(pretrained=True)
197 |         
198 |         # Everything except the last linear layer
199 |         self.features = nn.Sequential(*list(original_model.children())[:-1])
200 |         
201 |         # Get number of features of last layer
202 |         num_feats = original_model.classifier.in_features
203 |         
204 |         # Plug our classifier
205 |         self.classifier = nn.Sequential(
206 |         nn.Linear(num_feats, num_classes)
207 |         )
208 | 
209 |         # Init of last layer
210 |         for m in self.classifier:
211 |             kaiming_normal(m.weight)
212 |             
213 |         # Freeze weights
214 |         # for p in self.features.parameters():
215 |         #     p.requires_grad = False
216 | 
217 |     def forward(self, x):
218 |         f = self.features(x)
219 |         out = F.relu(f, inplace=True)
220 |         out = F.avg_pool2d(out, kernel_size=7).view(f.size(0), -1)
221 |         out = self.classifier(out)
222 |         return out


--------------------------------------------------------------------------------
/main_pytorch-baseline.py:
--------------------------------------------------------------------------------
  1 | ## Custom Imports
  2 | from src.p_dataload import KaggleAmazonDataset
  3 | from src.p_neuro import Net, ResNet50, ResNet101, DenseNet121
  4 | from src.p_training import train, snapshot
  5 | from src.p_validation import validate
  6 | from src.p_model_selection import train_valid_split
  7 | from src.p_logger import setup_logs
  8 | from src.p_prediction import predict, output
  9 | from src.p_data_augmentation import ColorJitter
 10 | # from src.p_metrics import SmoothF2Loss
 11 | from src.p_sampler import SubsetSampler, balance_weights
 12 | 
 13 | ## Utilities
 14 | import random
 15 | import logging
 16 | import time
 17 | from timeit import default_timer as timer
 18 | import os
 19 | 
 20 | ## Libraries
 21 | import numpy as np
 22 | import math
 23 | 
 24 | ## Torch
 25 | import torch.optim as optim
 26 | import torch.nn.functional as F
 27 | from torchvision import transforms
 28 | from torch.utils.data import DataLoader
 29 | import torch
 30 | from torchsample.transforms import Affine
 31 | from torch.utils.data.sampler import WeightedRandomSampler, SubsetRandomSampler
 32 | 
 33 | ############################################################################
 34 | #######  CONTROL CENTER ############# STAR COMMAND #########################
 35 | ## Variables setup
 36 | model = ResNet50(17).cuda()
 37 | # model = Net().cuda()
 38 | # model = WideResNet(16, 17, 4, 0.3)
 39 | # model = ResNet101(17).cuda()
 40 | # model = DenseNet121(17).cuda() # Note: Until May 5 19:12 CEST DenseNet121 was actually ResNet50 :/
 41 | 
 42 | epochs = 30
 43 | batch_size = 16
 44 | 
 45 | # Run name
 46 | run_name = time.strftime("%Y-%m-%d_%H%M-") + "BASELINE"
 47 | 
 48 | ## Normalization on dataset mean/std
 49 | # normalize = transforms.Normalize(mean=[0.30249774, 0.34421161, 0.31507745],
 50 | #                                  std=[0.13718569, 0.14363895, 0.16695958])
 51 | 
 52 | ## Normalization on ImageNet mean/std for finetuning
 53 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 54 |                                      std=[0.229, 0.224, 0.225])
 55 | 
 56 | # Note, p_training has lr_decay automated
 57 | # optimizer = optim.Adam(model.parameters(), lr=0.1) # From scratch # Don't use Weight Decay with PReLU
 58 | # optimizer = optim.SGD(model.parameters(), lr=1e-1, momentum=0.9, weight_decay=1e-4)  # From scratch
 59 | optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) # Finetuning whole model
 60 | 
 61 | criterion = torch.nn.MultiLabelSoftMarginLoss()
 62 | # criterion = SmoothF2Loss() # Using F2 directly as a cost function does 0.88 as a final cross validation. This is probably explained because cross-enropy is very efficient for sigmoid outputs (turning it into a convex problem). So keep Sigmoid + Cross entropy or something else + SmoothF2
 63 | 
 64 | save_dir = './snapshots'
 65 | 
 66 | #######  CONTROL CENTER ############# STAR COMMAND #########################
 67 | ############################################################################
 68 | 
 69 | if __name__ == "__main__":
 70 |     # Initiate timer
 71 |     global_timer = timer()
 72 | 
 73 |     # Setup logs
 74 |     logger = setup_logs(save_dir, run_name)
 75 | 
 76 |     # Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic)
 77 |     torch.manual_seed(1337)
 78 |     torch.cuda.manual_seed(1337)
 79 |     np.random.seed(1337)
 80 |     random.seed(1337)
 81 | 
 82 |     ##############################################################
 83 |     ## Loading the dataset
 84 | 
 85 |     ## Augmentation + Normalization for full training
 86 |     ds_transform_augmented = transforms.Compose([
 87 |                      transforms.RandomSizedCrop(224),
 88 |                      transforms.RandomHorizontalFlip(),
 89 |                      transforms.ToTensor(),
 90 |                      ColorJitter(),
 91 |                      normalize
 92 |                      # Affine(
 93 |                      #     rotation_range = 15,
 94 |                      #     translation_range = (0.2,0.2),
 95 |                      #     shear_range = math.pi/6,
 96 |                      #     zoom_range=(0.7,1.4)
 97 |                      # )
 98 |     ])
 99 | 
100 |     ## Normalization only for validation and test
101 |     ds_transform_raw = transforms.Compose([
102 |                      transforms.Scale(224),
103 |                      transforms.ToTensor(),
104 |                      normalize
105 |                      ])
106 | 
107 |     ####     #########     ########     ###########     #####
108 | 
109 |     X_train = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg',
110 |                                  ds_transform_augmented
111 |                                  )
112 |     X_val = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg',
113 |                                  ds_transform_raw
114 |                                  )
115 | 
116 |     # Resample the dataset
117 |     # weights = balance_weights(X_train.getDF(), 'tags', X_train.getLabelEncoder())
118 |     # weights = np.clip(weights,0.02,0.2) # We need to let the net view the most common classes or learning is too slow
119 | 
120 |     # Creating a validation split
121 |     train_idx, valid_idx = train_valid_split(X_train, 0.2)
122 | 
123 |     # weights[valid_idx] = 0
124 | 
125 |     # train_sampler = WeightedRandomSampler(weights, len(train_idx))
126 |     train_sampler = SubsetRandomSampler(train_idx)
127 |     valid_sampler = SubsetSampler(valid_idx)
128 | 
129 |     ######    ##########    ##########    ########    #########
130 | 
131 |     # Both dataloader loads from the same dataset but with different indices
132 |     train_loader = DataLoader(X_train,
133 |                           batch_size=batch_size,
134 |                           sampler=train_sampler,
135 |                           num_workers=4,
136 |                           pin_memory=True)
137 | 
138 |     valid_loader = DataLoader(X_val,
139 |                           batch_size=batch_size,
140 |                           sampler=valid_sampler,
141 |                           num_workers=4,
142 |                           pin_memory=True)
143 | 
144 |     ###########################################################
145 |     ## Start training
146 |     best_score = 0.
147 |     for epoch in range(epochs):
148 |         epoch_timer = timer()
149 | 
150 |         # Train and validate
151 |         train(epoch, train_loader, model, criterion, optimizer)
152 |         score, loss, threshold = validate(epoch, valid_loader, model, criterion, X_train.getLabelEncoder())
153 |         # Save
154 |         is_best = score > best_score
155 |         best_score = max(score, best_score)
156 |         snapshot(save_dir, run_name, is_best,{
157 |             'epoch': epoch + 1,
158 |             'state_dict': model.state_dict(),
159 |             'best_score': best_score,
160 |             'optimizer': optimizer.state_dict(),
161 |             'threshold': threshold,
162 |             'val_loss': loss
163 |         })
164 | 
165 |         end_epoch_timer = timer()
166 |         logger.info("#### End epoch {}, elapsed time: {}".format(epoch, end_epoch_timer - epoch_timer))
167 | 
168 |     ###########################################################
169 |     ## Prediction
170 |     X_test = KaggleAmazonDataset('./data/sample_submission.csv','./data/test-jpg/','.jpg',
171 |                                   ds_transform_raw
172 |                                  )
173 |     test_loader = DataLoader(X_test,
174 |                               batch_size=batch_size,
175 |                               num_workers=4,
176 |                               pin_memory=True)
177 | 
178 |     # Load model from best iteration
179 |     logger.info('===> loading best model for prediction')
180 |     checkpoint = torch.load(os.path.join(save_dir,
181 |                                         run_name + '-model_best.pth'
182 |                                         )
183 |                            )
184 |     model.load_state_dict(checkpoint['state_dict'])
185 | 
186 |     # Predict
187 |     predictions = predict(test_loader, model) # TODO load model from the best on disk
188 | 
189 |     output(predictions,
190 |            checkpoint['threshold'],
191 |            X_test,
192 |            X_train.getLabelEncoder(),
193 |            './out',
194 |            run_name,
195 |            checkpoint['best_score']) # TODO early_stopping and use best_score
196 | 
197 |     ##########################################################
198 | 
199 |     end_global_timer = timer()
200 |     logger.info("################## Success #########################")
201 |     logger.info("Total elapsed time: %s" % (end_global_timer - global_timer))
202 | 


--------------------------------------------------------------------------------
/main_pytorch.py:
--------------------------------------------------------------------------------
  1 | ## Custom Imports
  2 | from src.p2_dataload import KaggleAmazonDataset
  3 | from src.p_neuro import Net, ResNet50, ResNet101, ResNet152, DenseNet121
  4 | from src.p3_neuroRNN import GRU_ResNet50, LSTM_ResNet50, Skip_LSTM_RN50
  5 | from src.p_training import train, snapshot
  6 | #from src.p2_validation import validate
  7 | from src.p_validation import validate
  8 | from src.p_model_selection import train_valid_split
  9 | from src.p_logger import setup_logs
 10 | #from src.p2_prediction import predict, output
 11 | from src.p_prediction import predict, output
 12 | from src.p_data_augmentation import ColorJitter, PowerPIL
 13 | # from src.p_metrics import SmoothF2Loss
 14 | from src.p2_loss import ConvolutedLoss
 15 | from src.p_sampler import SubsetSampler, balance_weights
 16 | 
 17 | ## Utilities
 18 | import random
 19 | import logging
 20 | import time
 21 | from timeit import default_timer as timer
 22 | import os
 23 | 
 24 | ## Libraries
 25 | import numpy as np
 26 | import math
 27 | 
 28 | ## Torch
 29 | import torch.optim as optim
 30 | import torch.nn.functional as F
 31 | from torchvision import transforms
 32 | from torch.utils.data import DataLoader
 33 | import torch
 34 | from torchsample.transforms import Affine
 35 | from torch.utils.data.sampler import WeightedRandomSampler, SubsetRandomSampler
 36 | 
 37 | ############################################################################
 38 | #######  CONTROL CENTER ############# STAR COMMAND #########################
 39 | ## Variables setup
 40 | model = ResNet50(17).cuda()
 41 | # model = ResNet152(17).cuda()
 42 | 
 43 | # model = GRU_ResNet50(17, 128, 2).cuda()
 44 | # model = LSTM_ResNet50(17, 128, 2).cuda()
 45 | # model = Skip_LSTM_RN50(17, 128, 2).cuda()
 46 | 
 47 | epochs = 16
 48 | batch_size = 64
 49 | 
 50 | # Run name
 51 | run_name = time.strftime("%Y-%m-%d_%H%M-") + "resnet50-L2reg-new-data"
 52 | ## Normalization on ImageNet mean/std for finetuning
 53 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 54 |                                      std=[0.229, 0.224, 0.225])
 55 | 
 56 | # Note, p_training has lr_decay automated
 57 | optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005) # Finetuning whole model
 58 | 
 59 | # criterion = ConvolutedLoss()
 60 | criterion = torch.nn.MultiLabelSoftMarginLoss(
 61 |     weight = torch.Tensor([1,  4,  2,  1,
 62 |                              1,  3,  3,  3,
 63 |                              4,  4,  1,  2,
 64 |                              1,  1,  3,  4,  1])
 65 |     ).cuda()
 66 | 
 67 | #classes = [
 68 | #    'clear', 'cloudy', 'haze','partly_cloudy',
 69 | #    'agriculture','artisinal_mine','bare_ground','blooming',
 70 | #    'blow_down','conventional_mine','cultivation','habitation',
 71 | #    'primary','road','selective_logging','slash_burn','water'
 72 | #    ]
 73 | ## Frequency
 74 | #    [28203,  2330,  2695,  7251,
 75 | #     12338,   339,   859,   332,
 76 | #        98,   100,  4477,  3662,
 77 | #     37840,  8076,   340,   209,  7262]
 78 | 
 79 | save_dir = './snapshots'
 80 | 
 81 | #######  CONTROL CENTER ############# STAR COMMAND #########################
 82 | ############################################################################
 83 | 
 84 | if __name__ == "__main__":
 85 |     # Initiate timer
 86 |     global_timer = timer()
 87 |     
 88 |     # Setup logs
 89 |     logger = setup_logs(save_dir, run_name)
 90 | 
 91 |     # Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic)
 92 |     torch.manual_seed(1337)
 93 |     torch.cuda.manual_seed(1337)
 94 |     np.random.seed(1337)
 95 |     random.seed(1337)
 96 |     
 97 |     ##############################################################
 98 |     ## Loading the dataset
 99 |     
100 |     ## Augmentation + Normalization for full training
101 |     ds_transform_augmented = transforms.Compose([
102 |                      transforms.RandomSizedCrop(224),
103 |                      PowerPIL(),
104 |                      transforms.ToTensor(),
105 |                      # ColorJitter(), # Use PowerPIL instead, with PillowSIMD it's much more efficient
106 |                      normalize,
107 |                      # Affine(
108 |                      #    rotation_range = 15,
109 |                      #    translation_range = (0.2,0.2),
110 |                      #    shear_range = math.pi/6,
111 |                      #    zoom_range=(0.7,1.4)
112 |                      #)
113 |     ])
114 |     
115 |     ## Normalization only for validation and test
116 |     ds_transform_raw = transforms.Compose([
117 |                      transforms.Scale(224),
118 |                      transforms.ToTensor(),
119 |                      normalize
120 |                      ])
121 |     
122 |     ####     #########     ########     ###########     #####
123 |     
124 |     X_train = KaggleAmazonDataset('./data/train_v2.csv','./data/train-jpg/','.jpg',
125 |                                  ds_transform_augmented
126 |                                  )
127 |     X_val = KaggleAmazonDataset('./data/train_v2.csv','./data/train-jpg/','.jpg',
128 |                                  ds_transform_raw
129 |                                  )
130 |     
131 |     # Resample the dataset
132 |     # weights = balance_weights(X_train.getDF(), 'tags', X_train.getLabelEncoder())
133 |     # weights = np.clip(weights,0.02,0.2) # We need to let the net view the most common classes or learning is too slow
134 | 
135 |     # Creating a validation split
136 |     train_idx, valid_idx = train_valid_split(X_train, 0.2)
137 |     
138 |     # weights[valid_idx] = 0
139 |     
140 |     # train_sampler = WeightedRandomSampler(weights, len(train_idx))
141 |     train_sampler = SubsetRandomSampler(train_idx)
142 |     valid_sampler = SubsetSampler(valid_idx)
143 |     
144 |     ######    ##########    ##########    ########    #########
145 |     
146 |     # Both dataloader loads from the same dataset but with different indices
147 |     train_loader = DataLoader(X_train,
148 |                           batch_size=batch_size,
149 |                           sampler=train_sampler,
150 |                           num_workers=4,
151 |                           pin_memory=True)
152 |     
153 |     valid_loader = DataLoader(X_val,
154 |                           batch_size=batch_size,
155 |                           sampler=valid_sampler,
156 |                           num_workers=4,
157 |                           pin_memory=True)
158 |     
159 |     ###########################################################
160 |     ## Start training
161 |     best_score = 0.
162 |     for epoch in range(epochs):
163 |         epoch_timer = timer()
164 |         
165 |         # Train and validate
166 |         train(epoch, train_loader, model, criterion, optimizer)
167 |         score, loss, threshold = validate(epoch, valid_loader, model, criterion, X_train.getLabelEncoder())
168 |         # Save
169 |         is_best = score > best_score
170 |         best_score = max(score, best_score)
171 |         snapshot(save_dir, run_name, is_best,{
172 |             'epoch': epoch + 1,
173 |             'state_dict': model.state_dict(),
174 |             'best_score': best_score,
175 |             'optimizer': optimizer.state_dict(),
176 |             'threshold': threshold,
177 |             'val_loss': loss
178 |         })
179 |         
180 |         end_epoch_timer = timer()
181 |         logger.info("#### End epoch {}, elapsed time: {}".format(epoch, end_epoch_timer - epoch_timer))
182 |         
183 |     ###########################################################
184 |     ## Prediction
185 |     X_test = KaggleAmazonDataset('./data/sample_submission_v2.csv','./data/test-jpg/','.jpg',
186 |                                   ds_transform_raw
187 |                                  )
188 |     test_loader = DataLoader(X_test,
189 |                               batch_size=batch_size,
190 |                               num_workers=4,
191 |                               pin_memory=True)
192 |     
193 |     # Load model from best iteration
194 |     logger.info('===> loading best model for prediction')
195 |     checkpoint = torch.load(os.path.join(save_dir,
196 |                                         run_name + '-model_best.pth'
197 |                                         )
198 |                            )
199 |     model.load_state_dict(checkpoint['state_dict'])
200 |     
201 |     # Predict
202 |     predictions = predict(test_loader, model) # TODO load model from the best on disk
203 |     
204 |     output(predictions,
205 |            checkpoint['threshold'],
206 |            X_test,
207 |            X_train.getLabelEncoder(),
208 |            './out',
209 |            run_name,
210 |            checkpoint['best_score']) # TODO early_stopping and use best_score
211 |     
212 |     ##########################################################
213 |     
214 |     end_global_timer = timer()
215 |     logger.info("################## Success #########################")
216 |     logger.info("Total elapsed time: %s" % (end_global_timer - global_timer))


--------------------------------------------------------------------------------
/baseline/unfinished_attempts/000-Mxnet-Resnet-extraction-XGBoost-MultiLabel-TODO.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2017-04-21T06:06:23.865835Z",
  9 |      "start_time": "2017-04-21T06:06:23.863222Z"
 10 |     }
 11 |    },
 12 |    "outputs": [],
 13 |    "source": [
 14 |     "import pandas as pd\n",
 15 |     "import numpy as np"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 6,
 21 |    "metadata": {
 22 |     "ExecuteTime": {
 23 |      "end_time": "2017-04-21T06:06:24.336423Z",
 24 |      "start_time": "2017-04-21T06:06:24.315747Z"
 25 |     }
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "df_train = pd.read_csv('./data/train.csv')"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 7,
 35 |    "metadata": {
 36 |     "ExecuteTime": {
 37 |      "end_time": "2017-04-21T06:06:24.608470Z",
 38 |      "start_time": "2017-04-21T06:06:24.603983Z"
 39 |     }
 40 |    },
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "Reading labels ...\n",
 47 |       "  image_name                                       tags\n",
 48 |       "0    train_0                               haze primary\n",
 49 |       "1    train_1            agriculture clear primary water\n",
 50 |       "2    train_2                              clear primary\n",
 51 |       "3    train_3                              clear primary\n",
 52 |       "4    train_4  agriculture clear habitation primary road\n"
 53 |      ]
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "print(\"Reading labels ...\")\n",
 58 |     "print(df_train.head())"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 9,
 64 |    "metadata": {
 65 |     "ExecuteTime": {
 66 |      "end_time": "2017-04-21T06:06:37.925413Z",
 67 |      "start_time": "2017-04-21T06:06:30.374043Z"
 68 |     }
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "X = np.array([np.load('./baseline/tmp/TMPDIR%s.jpg.npy' % str(name)) for name in df_train['image_name'].tolist()])"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 10,
 78 |    "metadata": {
 79 |     "ExecuteTime": {
 80 |      "end_time": "2017-04-21T06:06:37.970605Z",
 81 |      "start_time": "2017-04-21T06:06:37.926635Z"
 82 |     }
 83 |    },
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "flatten = lambda l: [item for sublist in l for item in sublist]\n",
 87 |     "labels = list(set(flatten([l.split(' ') for l in df_train['tags'].values])))\n",
 88 |     "\n",
 89 |     "label_map = {l: i for i, l in enumerate(labels)}\n",
 90 |     "inv_label_map = {i: l for l, i in label_map.items()}\n"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 11,
 96 |    "metadata": {
 97 |     "ExecuteTime": {
 98 |      "end_time": "2017-04-21T06:06:37.981505Z",
 99 |      "start_time": "2017-04-21T06:06:37.971927Z"
100 |     }
101 |    },
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "{'agriculture': 16,\n",
107 |        " 'artisinal_mine': 2,\n",
108 |        " 'bare_ground': 10,\n",
109 |        " 'blooming': 8,\n",
110 |        " 'blow_down': 3,\n",
111 |        " 'clear': 6,\n",
112 |        " 'cloudy': 1,\n",
113 |        " 'conventional_mine': 4,\n",
114 |        " 'cultivation': 5,\n",
115 |        " 'habitation': 11,\n",
116 |        " 'haze': 15,\n",
117 |        " 'partly_cloudy': 13,\n",
118 |        " 'primary': 0,\n",
119 |        " 'road': 9,\n",
120 |        " 'selective_logging': 12,\n",
121 |        " 'slash_burn': 7,\n",
122 |        " 'water': 14}"
123 |       ]
124 |      },
125 |      "execution_count": 11,
126 |      "metadata": {},
127 |      "output_type": "execute_result"
128 |     }
129 |    ],
130 |    "source": [
131 |     "label_map"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 40,
137 |    "metadata": {
138 |     "ExecuteTime": {
139 |      "end_time": "2017-04-21T06:31:30.982656Z",
140 |      "start_time": "2017-04-21T06:31:30.980680Z"
141 |     }
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "from sklearn.metrics import fbeta_score\n",
146 |     "import xgboost as xgb"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 13,
152 |    "metadata": {
153 |     "ExecuteTime": {
154 |      "end_time": "2017-04-21T06:06:38.132293Z",
155 |      "start_time": "2017-04-21T06:06:38.128493Z"
156 |     }
157 |    },
158 |    "outputs": [],
159 |    "source": [
160 |     "param = {}\n",
161 |     "param['objective'] = 'binary:softprob'\n",
162 |     "param['eta'] = 0.2\n",
163 |     "param['max_depth'] = 4\n",
164 |     "param['silent'] = 1\n",
165 |     "param['num_class'] = 3\n",
166 |     "param['eval_metric'] = \"logloss\"\n",
167 |     "param['min_child_weight'] = 1\n",
168 |     "param['subsample'] = 0.7\n",
169 |     "param['colsample_bytree'] = 0.5\n",
170 |     "param['seed'] = 1337"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 31,
176 |    "metadata": {
177 |     "ExecuteTime": {
178 |      "end_time": "2017-04-21T06:18:58.799314Z",
179 |      "start_time": "2017-04-21T06:18:58.794489Z"
180 |     }
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "from sklearn.preprocessing import LabelBinarizer\n",
185 |     "from sklearn.model_selection import train_test_split\n",
186 |     "from sklearn.ensemble import RandomForestClassifier\n",
187 |     "from sklearn.multioutput import MultiOutputClassifier"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 16,
193 |    "metadata": {
194 |     "ExecuteTime": {
195 |      "end_time": "2017-04-21T06:10:22.617461Z",
196 |      "start_time": "2017-04-21T06:10:22.614960Z"
197 |     },
198 |     "collapsed": true
199 |    },
200 |    "outputs": [],
201 |    "source": [
202 |     "le = LabelBinarizer()"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 18,
208 |    "metadata": {
209 |     "ExecuteTime": {
210 |      "end_time": "2017-04-21T06:10:45.783573Z",
211 |      "start_time": "2017-04-21T06:10:45.649143Z"
212 |     }
213 |    },
214 |    "outputs": [],
215 |    "source": [
216 |     "y = le.fit_transform(df_train['tags'])"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 24,
222 |    "metadata": {
223 |     "ExecuteTime": {
224 |      "end_time": "2017-04-21T06:11:56.363157Z",
225 |      "start_time": "2017-04-21T06:11:56.361001Z"
226 |     }
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "plst = list(param.items())"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": 27,
236 |    "metadata": {
237 |     "ExecuteTime": {
238 |      "end_time": "2017-04-21T06:15:04.726534Z",
239 |      "start_time": "2017-04-21T06:15:04.594424Z"
240 |     }
241 |    },
242 |    "outputs": [],
243 |    "source": [
244 |     "# Create a validation set\n",
245 |     "x_trn, x_val, y_trn, y_val = train_test_split(X, y, test_size=0.2, random_state=42)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 42,
251 |    "metadata": {
252 |     "ExecuteTime": {
253 |      "end_time": "2017-04-21T06:31:47.113599Z",
254 |      "start_time": "2017-04-21T06:31:47.111312Z"
255 |     }
256 |    },
257 |    "outputs": [],
258 |    "source": [
259 |     "xgb_c = xgb.XGBClassifier(n_estimators=100)"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 43,
265 |    "metadata": {
266 |     "ExecuteTime": {
267 |      "end_time": "2017-04-21T06:31:53.203540Z",
268 |      "start_time": "2017-04-21T06:31:53.201474Z"
269 |     },
270 |     "collapsed": true
271 |    },
272 |    "outputs": [],
273 |    "source": [
274 |     "multi_target_forest = MultiOutputClassifier(xgb_c, n_jobs=-1)"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": 44,
280 |    "metadata": {
281 |     "ExecuteTime": {
282 |      "end_time": "2017-04-21T06:31:54.526726Z",
283 |      "start_time": "2017-04-21T06:31:54.524871Z"
284 |     },
285 |     "collapsed": true
286 |    },
287 |    "outputs": [],
288 |    "source": [
289 |     "X = X.reshape(40479,2048)"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {
296 |     "ExecuteTime": {
297 |      "start_time": "2017-04-21T06:31:59.421Z"
298 |     }
299 |    },
300 |    "outputs": [],
301 |    "source": [
302 |     "multi_target_forest.fit(X, y)"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {
309 |     "ExecuteTime": {
310 |      "start_time": "2017-04-21T06:34:09.608Z"
311 |     },
312 |     "collapsed": true
313 |    },
314 |    "outputs": [],
315 |    "source": [
316 |     "from sklearn.externals import joblib\n",
317 |     "joblib.dump(multi_target_forest, 'multi_target_forest.pkl') "
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": null,
323 |    "metadata": {
324 |     "collapsed": true
325 |    },
326 |    "outputs": [],
327 |    "source": []
328 |   }
329 |  ],
330 |  "metadata": {
331 |   "kernelspec": {
332 |    "display_name": "Python 3",
333 |    "language": "python",
334 |    "name": "python3"
335 |   },
336 |   "language_info": {
337 |    "codemirror_mode": {
338 |     "name": "ipython",
339 |     "version": 3
340 |    },
341 |    "file_extension": ".py",
342 |    "mimetype": "text/x-python",
343 |    "name": "python",
344 |    "nbconvert_exporter": "python",
345 |    "pygments_lexer": "ipython3",
346 |    "version": "3.6.0"
347 |   }
348 |  },
349 |  "nbformat": 4,
350 |  "nbformat_minor": 2
351 | }
352 | 


--------------------------------------------------------------------------------
/src/_deprecated.py:
--------------------------------------------------------------------------------
  1 | ### From Validation.py
  2 | 
  3 | ## DEPRECATED: Unfortunately COBYLA from Scipy can does not respect "lexical bounds".
  4 | ## Beware: the following will probably overfit the threshold to the validation set
  5 | ##################################################################################
  6 | ## Metrics
  7 | ## Given the labels imbalance we can't use the same threshold for each label.
  8 | ## We could implement our own maximizer on all 17 classes but scipy.optimize already have
  9 | ## 4 optimizations algorithms in C/Fortran that can work with constraints: L-BFGS-B, TNC, COBYLA and SLSQP.
 10 | ## Of those only cobyla doesn't rely on 2nd order hessians which are error-prone with our function
 11 | ## based on inequalities
 12 | 
 13 | # Cobyla constraints are build by comparing return value with 0.
 14 | # They must be >= 0 or be rejected
 15 | 
 16 | def constr_sup0(x):
 17 |     return np.min(x)
 18 | def constr_inf1(x):
 19 |     return 1 - np.max(x)
 20 | 
 21 | def f2_score(true_target, predictions):
 22 | 
 23 |     def f_neg(threshold):
 24 |         ## Scipy tries to minimize the function so we must get its inverse
 25 |         return - fbeta_score(true_target, predictions > threshold, beta=2, average='samples')
 26 | 
 27 |     # Initialization of best threshold search
 28 |     thr_0 = np.array([0.2 for i in range(17)])
 29 |     
 30 |     # Search
 31 |     thr_opt = fmin_cobyla(f_neg, thr_0, [constr_sup0,constr_inf1], disp=0)
 32 | 
 33 |     logger.info("===> Optimal threshold for each label:\n{}".format(thr_opt))
 34 |     
 35 |     score = fbeta_score(true_target, predictions > thr_opt, beta=2, average='samples')
 36 |     return score, thr_opt
 37 | 
 38 | ## The jit is slower than scikit by a few ms. Unless the optimizing loop can be JIT too it's not worth it
 39 | 
 40 | ##################################################################################
 41 | ## Metrics
 42 | ## Given the labels imbalance we can't use the same threshold for each label.
 43 | ## We loop on each column label independently and maximize F2 score
 44 | ## Limit: might overfit
 45 | ## We don't model interdependance of coefs
 46 | 
 47 | from numba import jit
 48 | 
 49 | 
 50 | # True Positive
 51 | @jit(nopython=True)
 52 | def true_pos(pred_labels, true_labels):
 53 |     return np.sum(np.logical_and(pred_labels == 1, true_labels == 1))
 54 |  
 55 | # True Negative
 56 | @jit(nopython=True)
 57 | def true_neg(pred_labels, true_labels):
 58 |     return np.sum(np.logical_and(pred_labels == 0, true_labels == 0))
 59 |  
 60 | # False Positive - Type I Error
 61 | @jit(nopython=True)
 62 | def false_pos(pred_labels, true_labels):
 63 |     return np.sum(np.logical_and(pred_labels == 1, true_labels == 0))
 64 |  
 65 | # False Negative - Type II Error
 66 | @jit(nopython=True)
 67 | def false_neg(pred_labels, true_labels):
 68 |     return np.sum(np.logical_and(pred_labels == 0, true_labels == 1))
 69 | 
 70 | @jit(nopython=True)
 71 | def precision(pred_labels, true_labels):
 72 |     TP = true_pos(pred_labels, true_labels)
 73 |     FP = false_pos(pred_labels, true_labels)
 74 |     
 75 |     # Edge cases True Positives = 0, False negative = 0
 76 |     # No predicted labels at all
 77 |     # Shouldn't happen all photos must have at least one label
 78 |     # We return 0 so that the threshold becomes better
 79 |     # Should we penalize more ?
 80 |     if TP==0 and FP==0: return 0
 81 |     
 82 |     return TP / (TP + FP)
 83 | 
 84 | @jit(nopython=True)
 85 | def recall(pred_labels, true_labels):
 86 |     TP = true_pos(pred_labels, true_labels)
 87 |     FN = false_neg(pred_labels, true_labels)
 88 |     
 89 |     # Edge cases True Positives = 0, False negative = 0
 90 |     # i.e no label in the true_labels input.
 91 |     # Shouldn't happen  all photos have at least one label
 92 | 
 93 |     return TP / (TP + FN)
 94 | 
 95 | @jit(nopython=True)
 96 | def f2_score_macro(pred_labels, true_labels):
 97 |     p = precision(pred_labels, true_labels)
 98 |     r = recall(pred_labels, true_labels)
 99 |     if p == 0 and r == 0: return 0
100 |     return 5 * p * r / (4 * p + r)
101 | 
102 | @jit
103 | def f2_score_mean(pred_labels, true_labels):
104 |     # F2_score_mean accelerated by numba
105 |     # Cannot force nopython mode because for loop on arrays does not work
106 |     i = 0
107 |     acc = 0
108 |     for (x,y) in zip(pred_labels,true_labels):
109 |         acc += f2_score_macro(x,y)
110 |         i+=1
111 |     return acc / i
112 | 
113 | 
114 | ### Kaggle kernel search
115 | def search_best_threshold(p_valid, y_valid, try_all=False, verbose=False):
116 |     p_valid, y_valid = np.array(p_valid), np.array(y_valid)
117 | 
118 |     best_threshold = 0
119 |     best_score = -1
120 |     totry = np.arange(0,1,0.005) if try_all is False else np.unique(p_valid)
121 |     for t in totry:
122 |         score = fbeta_score(y_valid, p_valid > t, beta=2, average='samples')
123 |         if score > best_score:
124 |             best_score = score
125 |             best_threshold = t
126 |     logger.info("===> Optimal threshold for each label:\n{}".format(best_threshold))
127 |     return best_score, best_threshold
128 | 
129 | # Search with L-BFGS-B
130 |     thr_0 = np.array([0.20 for i in range(17)])
131 |     constraints = [(0.,1.) for i in range(17)]
132 | thr_opt, score_neg, dico = fmin_l_bfgs_b(f_neg, thr_0, bounds=constraints, approx_grad=True, epsilon=0.05)
133 | 
134 | ## From dataload.py
135 | ##################################################
136 | ## DEPRECATED: https://discuss.pytorch.org/t/feedback-on-pytorch-for-kaggle-competitions/2252/8?u=mratsim
137 | ## Augmentation on PyTorch are done randomly at each epoch
138 | 
139 | class AugmentedAmazonDataset(Dataset):
140 |     """Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition.
141 |     This dataset is augmented
142 | 
143 |     Arguments:
144 |         A CSV file path
145 |         Path to image folder
146 |         Extension of images
147 |     """
148 | 
149 |     def __init__(self, csv_path, img_path, img_ext, transform=None):
150 |     
151 |         tmp_df = pd.read_csv(csv_path)
152 |         assert tmp_df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \
153 | "Some images referenced in the CSV file were not found"
154 |         
155 |         self.mlb = MultiLabelBinarizer()
156 |         self.img_path = img_path
157 |         self.img_ext = img_ext
158 |         self.transform = transform
159 | 
160 |         self.X = tmp_df['image_name']
161 |         self.y = self.mlb.fit_transform(tmp_df['tags'].str.split()).astype(np.float32)
162 |         self.augmentNumber = 14 # TODO, do something about this harcoded value
163 | 
164 |     def __getitem__(self, index):
165 |         real_length = self.real_length()
166 |         real_index = index % real_length
167 |         
168 |         img = Image.open(self.img_path + self.X[real_index] + self.img_ext)
169 |         img = img.convert('RGB')
170 |         
171 |         ## Augmentation code
172 |         if 0 <= index < real_length:
173 |             pass
174 |         
175 |         ### Mirroring and Rotating
176 |         elif real_length <= index < 2 * real_length:
177 |             img = img.transpose(FLIP_LEFT_RIGHT)
178 |         elif 2 * real_length <= index < 3 * real_length:
179 |             img = img.transpose(FLIP_TOP_BOTTOM)
180 |         elif 3 * real_length <= index < 4 * real_length:
181 |             img = img.transpose(ROTATE_90)
182 |         elif 4 * real_length <= index < 5 * real_length:
183 |             img = img.transpose(ROTATE_180)
184 |         elif 5 * real_length <= index < 6 * real_length:
185 |             img = img.transpose(ROTATE_270)
186 | 
187 |         ### Color balance
188 |         elif 6 * real_length <= index < 7 * real_length:
189 |             img = Color(img).enhance(0.95)
190 |         elif 7 * real_length <= index < 8 * real_length:
191 |             img = Color(img).enhance(1.05)
192 |         ## Contrast
193 |         elif 8 * real_length <= index < 9 * real_length:
194 |             img = Contrast(img).enhance(0.95)
195 |         elif 9 * real_length <= index < 10 * real_length:
196 |             img = Contrast(img).enhance(1.05)
197 |         ## Brightness
198 |         elif 10 * real_length <= index < 11 * real_length:
199 |             img = Brightness(img).enhance(0.95)
200 |         elif 11 * real_length <= index < 12 * real_length:
201 |             img = Brightness(img).enhance(1.05)
202 |         ## Sharpness
203 |         elif 12 * real_length <= index < 13 * real_length:
204 |             img = Sharpness(img).enhance(0.95)
205 |         elif 13 * real_length <= index < 14 * real_length:
206 |             img = Sharpness(img).enhance(1.05)
207 |         else:
208 |             raise IndexError("Index out of bounds")
209 |             
210 |         
211 |         if self.transform is not None:
212 |             img = self.transform(img)
213 |         
214 |         label = from_numpy(self.y[real_index])
215 |         return img, label
216 |     
217 |     def __len__(self):
218 |         return len(self.X.index) * self.augmentNumber
219 |     
220 |     def real_length(self):
221 |         return len(self.X.index)
222 |     
223 |     def getLabelEncoder(self):
224 |         return self.mlb
225 |     
226 | #### Usage
227 | 
228 |     ############################################################
229 |     # Augmented part
230 |     # X_train = AugmentedAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg',
231 |     #                            ds_transform
232 |     #                            )
233 |     
234 |     # Creating a validation split
235 |     # train_idx, valid_idx = augmented_train_valid_split(X_train, 0.2)
236 |     
237 |     # nb_augment = X_train.augmentNumber
238 |     # augmented_train_idx = [i * nb_augment + idx for idx in train_idx for i in range(0,nb_augment)]
239 |                            
240 |     # train_sampler = SubsetRandomSampler(augmented_train_idx)
241 |     # valid_sampler = SubsetRandomSampler(valid_idx)
242 |     ###########################################################
243 |     
244 |     
245 | ##################################################
246 | ## DEPRECATED: AugmentedAmazonDataset is deprecated
247 | ## https://discuss.pytorch.org/t/feedback-on-pytorch-for-kaggle-competitions/2252/8?u=mratsim
248 | ## Augmentation on PyTorch are done randomly at each epoch
249 | 
250 | 
251 | def augmented_train_valid_split(dataset, test_size = 0.25, shuffle = False, random_seed = 0):
252 |     """ Return a list of splitted indices from a DataSet.
253 |     Indices can be used with DataLoader to build a train and validation set.
254 |     
255 |     Arguments:
256 |         A Dataset
257 |         A test_size, as a float between 0 and 1 (percentage split) or as an int (fixed number split)
258 |         Shuffling True or False
259 |         Random seed
260 |     """
261 |     length = dataset.real_length()
262 |     indices = list(range(1,length))
263 |     
264 |     if shuffle == True:
265 |         random.seed(random_seed)
266 |         random.shuffle(indices)
267 |     
268 |     if type(test_size) is float:
269 |         split = floor(test_size * length)
270 |     elif type(test_size) is int:
271 |         split = test_size
272 |     else:
273 |         raise ValueError('%s should be an int or a float' % str)
274 |     return indices[split:], indices[:split]
275 | 


--------------------------------------------------------------------------------
/src/k_dataloader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | from sklearn.preprocessing import MultiLabelBinarizer
  5 | from keras.preprocessing.image import ImageDataGenerator, Iterator, load_img, img_to_array
  6 | import pandas as pd
  7 | import os
  8 | import threading
  9 | import numpy as np
 10 | import keras.backend as K
 11 | 
 12 | ## For computing mean and std
 13 | from tqdm import tqdm
 14 | import cv2
 15 | 
 16 | class AmazonGenerator(ImageDataGenerator):
 17 |     def __init__(self, *args, **kwargs):
 18 |         super(AmazonGenerator, self).__init__(*args, **kwargs)
 19 |         self.iterator = None
 20 |     
 21 |     def flow_from_csv(self, csv_path, img_path, img_ext,
 22 |                      mode='fit',
 23 |                      target_size=(256, 256),
 24 |                      color_mode='rgb',
 25 |                      batch_size=32, shuffle=True, seed=None):
 26 |  
 27 |         self.iterator = AmazonCSVIterator(self, csv_path,
 28 |                               img_path, img_ext,
 29 |                               mode=mode,
 30 |                               target_size = target_size,
 31 |                               color_mode = color_mode,
 32 |                               batch_size = batch_size,
 33 |                               shuffle = shuffle,
 34 |                               seed = seed,
 35 |                               data_format=None)
 36 |         self.mlb = self.iterator.getLabelEncoder()
 37 |         return(self.iterator)
 38 |     
 39 |     def flow_from_df(self, dataframe, img_path, img_ext,
 40 |                      mode='fit',
 41 |                      target_size=(256, 256),
 42 |                      color_mode='rgb',
 43 |                      batch_size=32, shuffle=True, seed=None):
 44 |  
 45 |         self.iterator = AmazonDFIterator(self, dataframe,img_path, img_ext,
 46 |                               mode=mode,
 47 |                               target_size = target_size,
 48 |                               color_mode = color_mode,
 49 |                               batch_size = batch_size,
 50 |                               shuffle = shuffle,
 51 |                               seed = seed,
 52 |                               data_format=None)
 53 |         self.mlb = self.iterator.getLabelEncoder()
 54 |         return(self.iterator) 
 55 |     
 56 |     def getLabelEncoder(self):
 57 |         return self.iterator.getLabelEncoder()
 58 |     
 59 |     def fit_from_csv(self, csv_path, img_path, img_ext, rescale, target_size):
 60 |         '''Required for featurewise_center, featurewise_std_normalization 
 61 |         when using images loaded from csv.
 62 | 
 63 |         # Arguments
 64 |             csv_path: Path to the csv with image list
 65 |             img_path: Directory with all images
 66 |             img_ext: Extension of images
 67 |             rescaling factor: usually we rescale images from 0-255 to 0-1
 68 |             resolution: A tuple of int. Images will be rescaled to that resolution before computing mean as we need to hold them all in memory. Set as big as your memory allows
 69 |         '''
 70 |         
 71 |         # Computing mean and variance using Welford's algorithm for one pass only and numerical stability.        
 72 |         df = pd.read_csv(csv_path)
 73 |         
 74 |         # Pre-allocation
 75 |         shape = cv2.imread(os.path.join(
 76 |                              img_path,
 77 |                              df['image_name'].iloc[0] + img_ext)).shape
 78 |         
 79 |         mean= np.zeros(shape, dtype=np.float32)
 80 |         M2= np.zeros(shape, dtype=np.float32)
 81 | 
 82 |         print('Computing mean and standard deviation on the dataset')
 83 |         for n, file in enumerate(tqdm(df['image_name'], miniters=256), 1):
 84 |             img = cv2.imread(os.path.join(img_path, file + img_ext)).astype(np.float32)
 85 |             img *= rescale
 86 |             delta = img - mean
 87 |             mean += delta/n
 88 |             delta2 = img - mean
 89 |             M2 += delta*delta2
 90 |                 
 91 |         self.mean = mean
 92 |         self.std = M2 / (n-1)
 93 |         
 94 |         print("Mean has shape: " + str(self.mean.shape))
 95 |         print("Std has shape: " + str(self.std.shape))
 96 |         
 97 |     def dump_dataset_mean_std(self, path_mean, path_std):
 98 |         if self.mean is None or self.std is None:
 99 |             raise ValueError('Mean and Std must be computed before, fit the generator first')
100 |         np.save(path_mean, self.mean)
101 |         np.save(path_std, self.std)
102 |         
103 | 
104 |     def load_mean_std(self, path_mean, path_std):
105 |         self.mean = np.load(path_mean)
106 |         self.std = np.load(path_std)
107 |         print("Mean has shape: " + str(self.mean.shape))
108 |         print("Std has shape: " + str(self.std.shape))
109 | 
110 | class AmazonCSVIterator(Iterator):
111 |     def __init__(self, image_data_generator, csv_path,
112 |                  img_path, img_ext,
113 |                  mode='fit',
114 |                  target_size=(256, 256),
115 |                  color_mode='rgb',
116 |                  batch_size=32, shuffle=True, seed=None,
117 |                  data_format=None):
118 |         
119 |         ## Common initialization routines
120 |         self.target_size = tuple(target_size)
121 |         if color_mode not in {'rgb', 'grayscale'}:
122 |             raise ValueError('Invalid color mode:', color_mode,
123 |                              '; expected "rgb" or "grayscale".')
124 |         self.color_mode = color_mode
125 |         
126 |         if data_format is None:
127 |             self.data_format = K.image_data_format()
128 | 
129 |         if self.color_mode == 'rgb':
130 |             if self.data_format == 'channels_last':
131 |                 self.image_shape = self.target_size + (3,)
132 |             else:
133 |                 self.image_shape = (3,) + self.target_size
134 |         else:
135 |             if self.data_format == 'channels_last':
136 |                 self.image_shape = self.target_size + (1,)
137 |             else:
138 |                 self.image_shape = (1,) + self.target_size
139 |                 
140 |         self.image_data_generator = image_data_generator
141 |         
142 |         ## Specific to Amazon
143 |         tmp_df = pd.read_csv(csv_path)
144 |         assert tmp_df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \
145 | "Some images referenced in the CSV file were not found"
146 |         
147 |         self.mlb = MultiLabelBinarizer()
148 |         self.img_path = img_path
149 |         self.img_ext = img_ext
150 |         self.X = tmp_df['image_name']
151 |         self.mode = mode
152 |         if mode == 'fit':
153 |             self.y = self.mlb.fit_transform(tmp_df['tags'].str.split())
154 |         
155 |         ## Init parent class
156 |         super(AmazonCSVIterator, self).__init__(self.X.shape[0],
157 |                                              batch_size, shuffle, seed)
158 | 
159 |     def next(self):
160 |         """For python 2.x.
161 |         # Returns The next batch.
162 |         """
163 |         
164 |         with self.lock:
165 |             index_array, current_index, current_batch_size = next(self.index_generator)
166 |         
167 |         # The transformation of images is not under thread lock
168 |         # so it can be done in parallel
169 |         batch_x = np.zeros((current_batch_size,) + self.image_shape, dtype=K.floatx())
170 |         grayscale = self.color_mode == 'grayscale'
171 |         
172 |         # Build batch of images
173 |         for i, j in enumerate(index_array):
174 |             fpath = os.path.join(self.img_path,self.X[j] + self.img_ext)
175 |             img = load_img(fpath,
176 |                            grayscale=grayscale,
177 |                            target_size=self.target_size)
178 |             x = img_to_array(img, data_format=self.data_format)
179 |             x = self.image_data_generator.random_transform(x)
180 |             x = self.image_data_generator.standardize(x)
181 |             batch_x[i] = x
182 |         
183 |         # Build batch of labels.
184 |         if mode=='fit':
185 |             batch_y = self.y[index_array]
186 |             return batch_x, batch_y
187 |         elif mode=='predict':
188 |             return batch_x
189 |         else: raise ValueError('The mode should be either \'fit\' or \'predict\'')
190 |             
191 |     def getLabelEncoder(self):
192 |         return self.mlb
193 | 
194 | class AmazonDFIterator(Iterator):
195 |     def __init__(self, image_data_generator, df, img_path, img_ext,
196 |                  mode='fit',
197 |                  target_size=(256, 256),
198 |                  color_mode='rgb',
199 |                  batch_size=32, shuffle=True, seed=None,
200 |                  data_format=None):
201 |         
202 |         ## Common initialization routines
203 |         self.target_size = tuple(target_size)
204 |         if color_mode not in {'rgb', 'grayscale'}:
205 |             raise ValueError('Invalid color mode:', color_mode,
206 |                              '; expected "rgb" or "grayscale".')
207 |         self.color_mode = color_mode
208 |         
209 |         if data_format is None:
210 |             self.data_format = K.image_data_format()
211 | 
212 |         if self.color_mode == 'rgb':
213 |             if self.data_format == 'channels_last':
214 |                 self.image_shape = self.target_size + (3,)
215 |             else:
216 |                 self.image_shape = (3,) + self.target_size
217 |         else:
218 |             if self.data_format == 'channels_last':
219 |                 self.image_shape = self.target_size + (1,)
220 |             else:
221 |                 self.image_shape = (1,) + self.target_size
222 |                 
223 |         self.image_data_generator = image_data_generator
224 |         
225 |         ## Specific to Amazon
226 |         assert df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \
227 | "Some images referenced in the CSV file were not found"
228 |         
229 |         self.mlb = MultiLabelBinarizer()
230 |         self.img_path = img_path
231 |         self.img_ext = img_ext
232 |         self.X = df['image_name']
233 |         self.mode = mode
234 |         if mode == 'fit':
235 |             self.y = self.mlb.fit_transform(df['tags'].str.split())
236 |         
237 |         ## Init parent class
238 |         super(AmazonDFIterator, self).__init__(self.X.shape[0],
239 |                                              batch_size, shuffle, seed)
240 | 
241 |     def next(self):
242 |         """For python 2.x.
243 |         # Returns The next batch.
244 |         """
245 |         
246 |         with self.lock:
247 |             index_array, current_index, current_batch_size = next(self.index_generator)
248 |         
249 |         # The transformation of images is not under thread lock
250 |         # so it can be done in parallel
251 |         batch_x = np.zeros((current_batch_size,) + self.image_shape, dtype=K.floatx())
252 |         grayscale = self.color_mode == 'grayscale'
253 |         
254 |         # Build batch of images
255 |         for i, j in enumerate(index_array):
256 |             fpath = os.path.join(self.img_path,self.X[j] + self.img_ext)
257 |             img = load_img(fpath,
258 |                            grayscale=grayscale,
259 |                            target_size=self.target_size)
260 |             x = img_to_array(img, data_format=self.data_format)
261 |             x = self.image_data_generator.random_transform(x)
262 |             x = self.image_data_generator.standardize(x)
263 |             batch_x[i] = x
264 |         
265 |         # Build batch of labels.
266 |         if self.mode=='fit':
267 |             batch_y = self.y[index_array]
268 |             return batch_x, batch_y
269 |         elif self.mode=='predict':
270 |             return batch_x
271 |         else: raise ValueError('The mode should be either \'fit\' or \'predict\'')
272 |             
273 |     def getLabelEncoder(self):
274 |         return self.mlb


--------------------------------------------------------------------------------
/baseline/pytorch_scatter_gather_onehotencoding.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2017-05-06T05:31:03.921480Z",
  9 |      "start_time": "2017-05-06T05:31:03.919143Z"
 10 |     },
 11 |     "collapsed": true
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "import torch\n",
 16 |     "from torch import nn\n",
 17 |     "from torch.autograd import Variable"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 4,
 23 |    "metadata": {
 24 |     "ExecuteTime": {
 25 |      "end_time": "2017-05-06T05:31:05.240531Z",
 26 |      "start_time": "2017-05-06T05:31:04.686364Z"
 27 |     },
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "m = nn.LogSoftmax()\n",
 33 |     "loss = nn.NLLLoss()\n",
 34 |     "# input is of size nBatch x nClasses = 3 x 5\n",
 35 |     "input = Variable(torch.randn(3, 5), requires_grad=True)\n",
 36 |     "# each element in target has to have 0 <= value < nclasses\n",
 37 |     "target = Variable(torch.LongTensor([1, 0, 4]))\n",
 38 |     "output = loss(m(input), target)\n",
 39 |     "output.backward()"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 35,
 45 |    "metadata": {
 46 |     "ExecuteTime": {
 47 |      "end_time": "2017-05-06T05:49:58.231485Z",
 48 |      "start_time": "2017-05-06T05:49:58.229175Z"
 49 |     },
 50 |     "collapsed": true
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "target2 = Variable(torch.LongTensor([[0, 1, 0, 0, 0],\n",
 55 |     "                                     [1, 0, 0, 0, 0],\n",
 56 |     "                                     [0, 0, 0, 0, 1]]))"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 36,
 62 |    "metadata": {
 63 |     "ExecuteTime": {
 64 |      "end_time": "2017-05-06T05:49:58.819374Z",
 65 |      "start_time": "2017-05-06T05:49:58.816471Z"
 66 |     }
 67 |    },
 68 |    "outputs": [
 69 |     {
 70 |      "data": {
 71 |       "text/plain": [
 72 |        "Variable containing:\n",
 73 |        " 0  1  0  0  0\n",
 74 |        " 1  0  0  0  0\n",
 75 |        " 0  0  0  0  1\n",
 76 |        "[torch.LongTensor of size 3x5]"
 77 |       ]
 78 |      },
 79 |      "execution_count": 36,
 80 |      "metadata": {},
 81 |      "output_type": "execute_result"
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "target2"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 23,
 91 |    "metadata": {
 92 |     "ExecuteTime": {
 93 |      "end_time": "2017-05-06T05:40:40.728998Z",
 94 |      "start_time": "2017-05-06T05:40:40.726516Z"
 95 |     },
 96 |     "collapsed": true
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "target_onehot = Variable(torch.zeros(3, 5))"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 24,
106 |    "metadata": {
107 |     "ExecuteTime": {
108 |      "end_time": "2017-05-06T05:40:40.983895Z",
109 |      "start_time": "2017-05-06T05:40:40.980668Z"
110 |     }
111 |    },
112 |    "outputs": [
113 |     {
114 |      "data": {
115 |       "text/plain": [
116 |        "Variable containing:\n",
117 |        " 0  0  0  0  0\n",
118 |        " 0  0  0  0  0\n",
119 |        " 0  0  0  0  0\n",
120 |        "[torch.FloatTensor of size 3x5]"
121 |       ]
122 |      },
123 |      "execution_count": 24,
124 |      "metadata": {},
125 |      "output_type": "execute_result"
126 |     }
127 |    ],
128 |    "source": [
129 |     "target_onehot"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 29,
135 |    "metadata": {
136 |     "ExecuteTime": {
137 |      "end_time": "2017-05-06T05:42:54.036901Z",
138 |      "start_time": "2017-05-06T05:42:54.033351Z"
139 |     }
140 |    },
141 |    "outputs": [
142 |     {
143 |      "data": {
144 |       "text/plain": [
145 |        "Variable containing:\n",
146 |        " 1\n",
147 |        " 0\n",
148 |        " 4\n",
149 |        "[torch.LongTensor of size 3]"
150 |       ]
151 |      },
152 |      "execution_count": 29,
153 |      "metadata": {},
154 |      "output_type": "execute_result"
155 |     }
156 |    ],
157 |    "source": [
158 |     "target"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 30,
164 |    "metadata": {
165 |     "ExecuteTime": {
166 |      "end_time": "2017-05-06T05:43:22.951826Z",
167 |      "start_time": "2017-05-06T05:43:22.949248Z"
168 |     },
169 |     "collapsed": true
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "targetv = target.view(-1,1)"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 31,
179 |    "metadata": {
180 |     "ExecuteTime": {
181 |      "end_time": "2017-05-06T05:43:32.052043Z",
182 |      "start_time": "2017-05-06T05:43:32.049359Z"
183 |     }
184 |    },
185 |    "outputs": [
186 |     {
187 |      "data": {
188 |       "text/plain": [
189 |        "Variable containing:\n",
190 |        " 1\n",
191 |        " 0\n",
192 |        " 4\n",
193 |        "[torch.LongTensor of size 3x1]"
194 |       ]
195 |      },
196 |      "execution_count": 31,
197 |      "metadata": {},
198 |      "output_type": "execute_result"
199 |     }
200 |    ],
201 |    "source": [
202 |     "targetv"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 71,
208 |    "metadata": {
209 |     "ExecuteTime": {
210 |      "end_time": "2017-05-06T06:04:15.942080Z",
211 |      "start_time": "2017-05-06T06:04:15.938887Z"
212 |     }
213 |    },
214 |    "outputs": [
215 |     {
216 |      "data": {
217 |       "text/plain": [
218 |        "Variable containing:\n",
219 |        " 0  1  0  0  0\n",
220 |        " 1  0  0  0  0\n",
221 |        " 0  0  0  0  1\n",
222 |        "[torch.FloatTensor of size 3x5]"
223 |       ]
224 |      },
225 |      "execution_count": 71,
226 |      "metadata": {},
227 |      "output_type": "execute_result"
228 |     }
229 |    ],
230 |    "source": [
231 |     "target_onehot.scatter(1, targetv, 1)"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 96,
237 |    "metadata": {
238 |     "ExecuteTime": {
239 |      "end_time": "2017-05-06T06:34:33.131599Z",
240 |      "start_time": "2017-05-06T06:34:33.129170Z"
241 |     },
242 |     "collapsed": true
243 |    },
244 |    "outputs": [],
245 |    "source": [
246 |     "s = torch.arange(0,5).expand(3,5) #expand is torch broadcasting"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 97,
252 |    "metadata": {
253 |     "ExecuteTime": {
254 |      "end_time": "2017-05-06T06:34:33.451566Z",
255 |      "start_time": "2017-05-06T06:34:33.448207Z"
256 |     }
257 |    },
258 |    "outputs": [
259 |     {
260 |      "data": {
261 |       "text/plain": [
262 |        "\n",
263 |        " 0  1  2  3  4\n",
264 |        " 0  1  2  3  4\n",
265 |        " 0  1  2  3  4\n",
266 |        "[torch.FloatTensor of size 3x5]"
267 |       ]
268 |      },
269 |      "execution_count": 97,
270 |      "metadata": {},
271 |      "output_type": "execute_result"
272 |     }
273 |    ],
274 |    "source": [
275 |     "s"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 88,
281 |    "metadata": {
282 |     "ExecuteTime": {
283 |      "end_time": "2017-05-06T06:14:36.968468Z",
284 |      "start_time": "2017-05-06T06:14:36.964932Z"
285 |     }
286 |    },
287 |    "outputs": [
288 |     {
289 |      "data": {
290 |       "text/plain": [
291 |        "\n",
292 |        " 0  1  0  0  0\n",
293 |        " 1  0  0  0  0\n",
294 |        " 0  0  0  0  1\n",
295 |        "[torch.FloatTensor of size 3x5]"
296 |       ]
297 |      },
298 |      "execution_count": 88,
299 |      "metadata": {},
300 |      "output_type": "execute_result"
301 |     }
302 |    ],
303 |    "source": [
304 |     "s.gather(1, target_onehot.data.long())"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "metadata": {},
310 |    "source": []
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": 1,
315 |    "metadata": {
316 |     "ExecuteTime": {
317 |      "end_time": "2017-05-06T08:19:14.558481Z",
318 |      "start_time": "2017-05-06T08:19:14.437859Z"
319 |     },
320 |     "collapsed": true
321 |    },
322 |    "outputs": [],
323 |    "source": [
324 |     "import torch\n",
325 |     "from torch import nn\n",
326 |     "from torch.autograd import Variable"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": 4,
332 |    "metadata": {
333 |     "ExecuteTime": {
334 |      "end_time": "2017-05-06T08:19:38.356073Z",
335 |      "start_time": "2017-05-06T08:19:38.350191Z"
336 |     }
337 |    },
338 |    "outputs": [
339 |     {
340 |      "name": "stdout",
341 |      "output_type": "stream",
342 |      "text": [
343 |       "Variable containing:\n",
344 |       " 1\n",
345 |       " 0\n",
346 |       " 4\n",
347 |       "[torch.LongTensor of size 3]\n",
348 |       "\n",
349 |       "Variable containing:\n",
350 |       " 0  1  0  0  0\n",
351 |       " 1  0  0  0  0\n",
352 |       " 0  0  0  0  1\n",
353 |       "[torch.FloatTensor of size 3x5]\n",
354 |       "\n",
355 |       "\n",
356 |       " 0\n",
357 |       " 1\n",
358 |       " 2\n",
359 |       " 3\n",
360 |       " 4\n",
361 |       "[torch.FloatTensor of size 5]\n",
362 |       "\n",
363 |       "\n",
364 |       " 0  1  2  3  4\n",
365 |       " 0  1  2  3  4\n",
366 |       " 0  1  2  3  4\n",
367 |       "[torch.FloatTensor of size 3x5]\n",
368 |       "\n",
369 |       "\n",
370 |       " 1\n",
371 |       " 0\n",
372 |       " 4\n",
373 |       "[torch.FloatTensor of size 3]\n",
374 |       "\n"
375 |      ]
376 |     }
377 |    ],
378 |    "source": [
379 |     "target = Variable(torch.LongTensor([1, 0, 4]))\n",
380 |     "print(target)\n",
381 |     "target_onehot = Variable(torch.zeros(3, 5))\n",
382 |     "target_onehot.scatter_(1, target.view(-1,1), 1) #_ for inplace\n",
383 |     "print(target_onehot)\n",
384 |     "\n",
385 |     "val = torch.arange(0,5)\n",
386 |     "print(val)\n",
387 |     "val = val.expand(3,5) #expand is torch broadcasting\n",
388 |     "print(val)\n",
389 |     "\n",
390 |     "new_target=val.masked_select(target_onehot.data.byte())\n",
391 |     "print(new_target)"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": 95,
397 |    "metadata": {
398 |     "ExecuteTime": {
399 |      "end_time": "2017-05-06T06:34:27.269336Z",
400 |      "start_time": "2017-05-06T06:34:27.264843Z"
401 |     }
402 |    },
403 |    "outputs": [
404 |     {
405 |      "ename": "RuntimeError",
406 |      "evalue": "inconsistent tensor size at /pkg/makepkg/python-pytorch-git/src/pytorch/torch/lib/TH/generic/THTensorMath.c:193",
407 |      "output_type": "error",
408 |      "traceback": [
409 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
410 |       "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
411 |       "\u001b[0;32m<ipython-input-95-4730ba2bb717>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmasked_select\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget_onehot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbyte\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
412 |       "\u001b[0;31mRuntimeError\u001b[0m: inconsistent tensor size at /pkg/makepkg/python-pytorch-git/src/pytorch/torch/lib/TH/generic/THTensorMath.c:193"
413 |      ]
414 |     }
415 |    ],
416 |    "source": [
417 |     "s.masked_select(target_onehot.data.byte())"
418 |    ]
419 |   },
420 |   {
421 |    "cell_type": "code",
422 |    "execution_count": null,
423 |    "metadata": {
424 |     "collapsed": true
425 |    },
426 |    "outputs": [],
427 |    "source": []
428 |   }
429 |  ],
430 |  "metadata": {
431 |   "kernelspec": {
432 |    "display_name": "Python 3",
433 |    "language": "python",
434 |    "name": "python3"
435 |   },
436 |   "language_info": {
437 |    "codemirror_mode": {
438 |     "name": "ipython",
439 |     "version": 3
440 |    },
441 |    "file_extension": ".py",
442 |    "mimetype": "text/x-python",
443 |    "name": "python",
444 |    "nbconvert_exporter": "python",
445 |    "pygments_lexer": "ipython3",
446 |    "version": "3.6.0"
447 |   }
448 |  },
449 |  "nbformat": 4,
450 |  "nbformat_minor": 2
451 | }
452 | 


--------------------------------------------------------------------------------
/baseline/003-pytorch-kernel-baseline.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "_cell_guid": "82cb34f6-d7d9-5938-c2f9-2b231d073c04"
  7 |    },
  8 |    "source": [
  9 |     "# Starting kit for PyTorch Deep Learning\n",
 10 |     "\n",
 11 |     "Welcome to this tutorial to get started on PyTorch for this competition.\n",
 12 |     "PyTorch is a promising port of Facebook's Torch to Python.\n",
 13 |     "\n",
 14 |     "It's only 3 months old but has an already promising feature set.\n",
 15 |     "Unfortunately it's very very raw, and I had a lot of troubles to get started with very basic things:\n",
 16 |     "- data loading\n",
 17 |     "- building a basic CNN\n",
 18 |     "- training\n",
 19 |     "\n",
 20 |     "Hopefully this will help you getting started using PyTorch on this dataset."
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {
 26 |     "_cell_guid": "3763a794-0a61-f0ab-9215-56de74bf29df"
 27 |    },
 28 |    "source": [
 29 |     "## Importing libraries\n",
 30 |     "Please note that we do not import numpy but PyTorch wrapper for Numpy"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 1,
 36 |    "metadata": {
 37 |     "ExecuteTime": {
 38 |      "end_time": "2017-04-25T21:15:49.566831Z",
 39 |      "start_time": "2017-04-25T21:15:49.179045Z"
 40 |     },
 41 |     "_cell_guid": "f3ee9f39-55e1-ee69-2bb6-25c095155e1d",
 42 |     "collapsed": true
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "import pandas as pd\n",
 47 |     "from torch import np # Torch wrapper for Numpy\n",
 48 |     "\n",
 49 |     "import os\n",
 50 |     "from PIL import Image\n",
 51 |     "\n",
 52 |     "import torch\n",
 53 |     "from torch.utils.data.dataset import Dataset\n",
 54 |     "from torch.utils.data import DataLoader\n",
 55 |     "from torchvision import transforms\n",
 56 |     "from torch import nn\n",
 57 |     "import torch.nn.functional as F\n",
 58 |     "import torch.optim as optim\n",
 59 |     "from torch.autograd import Variable\n",
 60 |     "\n",
 61 |     "from sklearn.preprocessing import MultiLabelBinarizer"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {
 67 |     "_cell_guid": "e6fde4f7-e8f3-3782-673a-62ce72b652fa"
 68 |    },
 69 |    "source": [
 70 |     "## Setting up global variables"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 2,
 76 |    "metadata": {
 77 |     "ExecuteTime": {
 78 |      "end_time": "2017-04-25T21:15:49.570687Z",
 79 |      "start_time": "2017-04-25T21:15:49.568053Z"
 80 |     },
 81 |     "_cell_guid": "45d63034-a44c-47e8-7376-2deb00af03a9",
 82 |     "collapsed": true
 83 |    },
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "IMG_PATH = '../data/train-jpg/'\n",
 87 |     "IMG_EXT = '.jpg'\n",
 88 |     "TRAIN_DATA = '../data/train.csv'"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {
 94 |     "_cell_guid": "1aa8e64e-f2eb-f570-bfd3-6098638c5f40"
 95 |    },
 96 |    "source": [
 97 |     "## Loading the data - first part - DataSet\n",
 98 |     "\n",
 99 |     "This is probably the most obscure part of PyTorch. Most examples use well known datasets (MNIST ...) and have a custom loader or forces you to have a specific folder structure similar to this:\n",
100 |     "\n",
101 |     "* data\n",
102 |     "    * train\n",
103 |     "          * dogs\n",
104 |     "          * cats\n",
105 |     "     * validation\n",
106 |     "          * dogs\n",
107 |     "          * cats\n",
108 |     "     * test\n",
109 |     "          * test\n",
110 |     "\n",
111 |     "Data loading in PyTorch is in 2 parts\n",
112 |     "\n",
113 |     "First the data must be wrapped in a __Dataset__ class with a getitem method that from an index return X_train[index] and y_train[index] and a length method. A Dataset is basically a data storage.\n",
114 |     "\n",
115 |     "The following solution loads the image name from a CSV and file path + extension and can be adapted easily for most Kaggle challenges. You won't have to write your own ;).\n",
116 |     "\n",
117 |     "The code will:\n",
118 |     "\n",
119 |     "- Check that all images in CSV exist in the folder\n",
120 |     "- Use ScikitLearn MultiLabelBinarizer to OneHotEncode the labels, mlb.inverse_transform(predictions) can be used to get back the textual labels from the predictions\n",
121 |     "- Apply PIL transformations to the images. See [here](http://pytorch.org/docs/torchvision/transforms.html) for the supported list.\n",
122 |     "- Use ToTensor() to convert from an image with color scale 0-255 to a Tensor with color scale 0-1.\n",
123 |     "\n",
124 |     "Note: We use PIL instead of OpenCV because it's Torch default image loader and is compatible with `ToTensor()` method. An fast loader called accimage is currently in development and was published 3 days ago [here](https://github.com/pytorch/accimage).\n",
125 |     "\n",
126 |     "Note 2: This only provides a mapping to the data, **the data is not loaded in memory at this point**. The next part will show you how to load only what is needed for the batch in memory. This is a huge advantage compared to kernels that must load all images at once."
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 3,
132 |    "metadata": {
133 |     "ExecuteTime": {
134 |      "end_time": "2017-04-25T21:15:49.584068Z",
135 |      "start_time": "2017-04-25T21:15:49.572082Z"
136 |     },
137 |     "_cell_guid": "08a005ca-d963-5434-d60d-72d399cb7fe3",
138 |     "collapsed": true
139 |    },
140 |    "outputs": [],
141 |    "source": [
142 |     "class KaggleAmazonDataset(Dataset):\n",
143 |     "    \"\"\"Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition.\n",
144 |     "\n",
145 |     "    Arguments:\n",
146 |     "        A CSV file path\n",
147 |     "        Path to image folder\n",
148 |     "        Extension of images\n",
149 |     "        PIL transforms\n",
150 |     "    \"\"\"\n",
151 |     "\n",
152 |     "    def __init__(self, csv_path, img_path, img_ext, transform=None):\n",
153 |     "    \n",
154 |     "        tmp_df = pd.read_csv(csv_path)\n",
155 |     "        assert tmp_df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \\\n",
156 |     "\"Some images referenced in the CSV file were not found\"\n",
157 |     "        \n",
158 |     "        self.mlb = MultiLabelBinarizer()\n",
159 |     "        self.img_path = img_path\n",
160 |     "        self.img_ext = img_ext\n",
161 |     "        self.transform = transform\n",
162 |     "\n",
163 |     "        self.X_train = tmp_df['image_name']\n",
164 |     "        self.y_train = self.mlb.fit_transform(tmp_df['tags'].str.split()).astype(np.float32)\n",
165 |     "\n",
166 |     "    def __getitem__(self, index):\n",
167 |     "        img = Image.open(self.img_path + self.X_train[index] + self.img_ext)\n",
168 |     "        img = img.convert('RGB')\n",
169 |     "        if self.transform is not None:\n",
170 |     "            img = self.transform(img)\n",
171 |     "        \n",
172 |     "        label = torch.from_numpy(self.y_train[index])\n",
173 |     "        return img, label\n",
174 |     "\n",
175 |     "    def __len__(self):\n",
176 |     "        return len(self.X_train.index)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 4,
182 |    "metadata": {
183 |     "ExecuteTime": {
184 |      "end_time": "2017-04-25T21:15:49.784282Z",
185 |      "start_time": "2017-04-25T21:15:49.585273Z"
186 |     },
187 |     "_cell_guid": "98a20a0b-d39e-21a6-232b-990e916f6756",
188 |     "collapsed": true
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "transformations = transforms.Compose([transforms.Scale(32),transforms.ToTensor()])\n",
193 |     "\n",
194 |     "dset_train = KaggleAmazonDataset(TRAIN_DATA,IMG_PATH,IMG_EXT,transformations)"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "markdown",
199 |    "metadata": {
200 |     "_cell_guid": "2db00aac-0fb9-a1ab-1687-f373875de6bb"
201 |    },
202 |    "source": [
203 |     "## Loading the data - second part - DataLoader\n",
204 |     "\n",
205 |     "As was said, loading the data is in 2 parts, we provided PyTorch with a data storage, and we have to tell it how to load it. This is done with __DataLoader__\n",
206 |     "\n",
207 |     "The DataLoader defines how you retrieve the images + labels from the dataset. You can tell it to:\n",
208 |     "\n",
209 |     "* Set the batch size.\n",
210 |     "* Shuffle and sample the data randomly, hence implementing __train_test_split__ (check SubsetRandomSampler [here](http://pytorch.org/docs/data.html?highlight=sampler))\n",
211 |     "* Improve performance by loading data via  separate thread `num_worker` and using `pin_memory` for CUDA. Documentation [here](http://pytorch.org/docs/notes/cuda.html?highlight=dataloader)."
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 5,
217 |    "metadata": {
218 |     "ExecuteTime": {
219 |      "end_time": "2017-04-25T21:15:49.787999Z",
220 |      "start_time": "2017-04-25T21:15:49.785741Z"
221 |     },
222 |     "_cell_guid": "a2d57750-80fc-c8fe-9640-f276681f5549",
223 |     "collapsed": true
224 |    },
225 |    "outputs": [],
226 |    "source": [
227 |     "train_loader = DataLoader(dset_train,\n",
228 |     "                          batch_size=256,\n",
229 |     "                          shuffle=True,\n",
230 |     "                          num_workers=4 # 1 for CUDA\n",
231 |     "                         # pin_memory=True # CUDA only\n",
232 |     "                         )"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "metadata": {
238 |     "_cell_guid": "1a27f04f-d260-46ec-698b-21aba8631f71"
239 |    },
240 |    "source": [
241 |     "## Creating your Neural Network\n",
242 |     "\n",
243 |     "This is tricky, you need  to compute yourself the in_channels and out_channels of your filters hence the 2304 input for the Dense layer. The first input 3 corresponds to the number of channels of your image, the 17 output corresponds to the number of target labels."
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 6,
249 |    "metadata": {
250 |     "ExecuteTime": {
251 |      "end_time": "2017-04-25T21:15:51.046117Z",
252 |      "start_time": "2017-04-25T21:15:49.789400Z"
253 |     },
254 |     "_cell_guid": "c9a86c3d-b977-856a-7b71-5bf0cd509691",
255 |     "collapsed": true
256 |    },
257 |    "outputs": [],
258 |    "source": [
259 |     "class Net(nn.Module):\n",
260 |     "    def __init__(self):\n",
261 |     "        super(Net, self).__init__()\n",
262 |     "        self.conv1 = nn.Conv2d(3, 32, kernel_size=3)\n",
263 |     "        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)\n",
264 |     "        self.conv2_drop = nn.Dropout2d()\n",
265 |     "        self.fc1 = nn.Linear(2304, 256)\n",
266 |     "        self.fc2 = nn.Linear(256, 17)\n",
267 |     "\n",
268 |     "    def forward(self, x):\n",
269 |     "        x = F.relu(F.max_pool2d(self.conv1(x), 2))\n",
270 |     "        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))\n",
271 |     "        x = x.view(x.size(0), -1) # Flatten layer\n",
272 |     "        x = F.relu(self.fc1(x))\n",
273 |     "        x = F.dropout(x, training=self.training)\n",
274 |     "        x = self.fc2(x)\n",
275 |     "        return F.sigmoid(x)\n",
276 |     "\n",
277 |     "# model = Net() # On CPU\n",
278 |     "model = Net().cuda() # On GPU"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "markdown",
283 |    "metadata": {
284 |     "_cell_guid": "51e51a88-e8fc-467c-98cd-cab80f5e8679"
285 |    },
286 |    "source": [
287 |     "## Defining your training function"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 7,
293 |    "metadata": {
294 |     "ExecuteTime": {
295 |      "end_time": "2017-04-25T21:15:51.050255Z",
296 |      "start_time": "2017-04-25T21:15:51.047755Z"
297 |     },
298 |     "_cell_guid": "7c18ddb7-cd5a-86d3-b3b9-4c6bc467e7ea",
299 |     "collapsed": true
300 |    },
301 |    "outputs": [],
302 |    "source": [
303 |     "optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 8,
309 |    "metadata": {
310 |     "ExecuteTime": {
311 |      "end_time": "2017-04-25T21:15:51.057388Z",
312 |      "start_time": "2017-04-25T21:15:51.051613Z"
313 |     },
314 |     "_cell_guid": "745377b3-d942-a03a-76a9-e27cce51e01d",
315 |     "collapsed": true
316 |    },
317 |    "outputs": [],
318 |    "source": [
319 |     "def train(epoch):\n",
320 |     "    model.train()\n",
321 |     "    for batch_idx, (data, target) in enumerate(train_loader):\n",
322 |     "        data, target = data.cuda(async=True), target.cuda(async=True) # On GPU\n",
323 |     "        data, target = Variable(data), Variable(target)\n",
324 |     "        optimizer.zero_grad()\n",
325 |     "        output = model(data)\n",
326 |     "        loss = F.binary_cross_entropy(output, target)\n",
327 |     "        loss.backward()\n",
328 |     "        optimizer.step()\n",
329 |     "        if batch_idx % 10 == 0:\n",
330 |     "            print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
331 |     "                epoch, batch_idx * len(data), len(train_loader.dataset),\n",
332 |     "                100. * batch_idx / len(train_loader), loss.data[0]))"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "markdown",
337 |    "metadata": {
338 |     "_cell_guid": "65a8fce1-f2b6-28ea-a807-216db7011267"
339 |    },
340 |    "source": [
341 |     "## Training your model"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": 9,
347 |    "metadata": {
348 |     "ExecuteTime": {
349 |      "end_time": "2017-04-25T21:16:01.421896Z",
350 |      "start_time": "2017-04-25T21:15:51.058525Z"
351 |     },
352 |     "_cell_guid": "5e7ff060-19da-1b01-28ce-bd2e72430fee"
353 |    },
354 |    "outputs": [
355 |     {
356 |      "name": "stdout",
357 |      "output_type": "stream",
358 |      "text": [
359 |       "Train Epoch: 1 [0/40479 (0%)]\tLoss: 0.692961\n",
360 |       "Train Epoch: 1 [2560/40479 (6%)]\tLoss: 0.688596\n",
361 |       "Train Epoch: 1 [5120/40479 (13%)]\tLoss: 0.682759\n",
362 |       "Train Epoch: 1 [7680/40479 (19%)]\tLoss: 0.676263\n",
363 |       "Train Epoch: 1 [10240/40479 (25%)]\tLoss: 0.668731\n",
364 |       "Train Epoch: 1 [12800/40479 (31%)]\tLoss: 0.659607\n",
365 |       "Train Epoch: 1 [15360/40479 (38%)]\tLoss: 0.650362\n",
366 |       "Train Epoch: 1 [17920/40479 (44%)]\tLoss: 0.635071\n",
367 |       "Train Epoch: 1 [20480/40479 (50%)]\tLoss: 0.613103\n",
368 |       "Train Epoch: 1 [23040/40479 (57%)]\tLoss: 0.584744\n",
369 |       "Train Epoch: 1 [25600/40479 (63%)]\tLoss: 0.545705\n",
370 |       "Train Epoch: 1 [28160/40479 (69%)]\tLoss: 0.500222\n",
371 |       "Train Epoch: 1 [30720/40479 (75%)]\tLoss: 0.443742\n",
372 |       "Train Epoch: 1 [33280/40479 (82%)]\tLoss: 0.404282\n",
373 |       "Train Epoch: 1 [35840/40479 (88%)]\tLoss: 0.361815\n",
374 |       "Train Epoch: 1 [38400/40479 (94%)]\tLoss: 0.338667\n"
375 |      ]
376 |     }
377 |    ],
378 |    "source": [
379 |     "for epoch in range(1, 2):\n",
380 |     "    train(epoch)"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "metadata": {
386 |     "_cell_guid": "2e306e2f-87f0-f753-ab41-841a3b097afa"
387 |    },
388 |    "source": [
389 |     "# Thank you for your attention\n",
390 |     "\n",
391 |     "Hopefully that will help you get started. I still have a lot to figure out in PyTorch like:\n",
392 |     "\n",
393 |     "* Implementing the train / validation split\n",
394 |     "* Figure out data augmentation (and not just random transformations or images)\n",
395 |     "* Implementing early stopping\n",
396 |     "* Automating computation of intermediate layers\n",
397 |     "* Improving the display of each epochs\n",
398 |     "\n",
399 |     "If you liked the kernel don't forget to vote and don't hesitate to comment."
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": null,
405 |    "metadata": {
406 |     "_cell_guid": "16f21935-088c-8590-9233-2700afeb3922",
407 |     "collapsed": true
408 |    },
409 |    "outputs": [],
410 |    "source": []
411 |   }
412 |  ],
413 |  "metadata": {
414 |   "_change_revision": 0,
415 |   "_is_fork": false,
416 |   "kernelspec": {
417 |    "display_name": "Python 3",
418 |    "language": "python",
419 |    "name": "python3"
420 |   },
421 |   "language_info": {
422 |    "codemirror_mode": {
423 |     "name": "ipython",
424 |     "version": 3
425 |    },
426 |    "file_extension": ".py",
427 |    "mimetype": "text/x-python",
428 |    "name": "python",
429 |    "nbconvert_exporter": "python",
430 |    "pygments_lexer": "ipython3",
431 |    "version": "3.6.0"
432 |   }
433 |  },
434 |  "nbformat": 4,
435 |  "nbformat_minor": 1
436 | }
437 | 


--------------------------------------------------------------------------------
/Dual_Feed_Image_Label.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2017-05-08T10:52:32.499144Z",
  9 |      "start_time": "2017-05-08T10:52:32.352476Z"
 10 |     },
 11 |     "collapsed": true
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "from torch import nn, ones\n",
 16 |     "from torch.autograd import Variable\n",
 17 |     "from torchvision import models\n",
 18 |     "from torch.nn.init import kaiming_normal\n",
 19 |     "from torch import np\n",
 20 |     "import torch\n",
 21 |     "import torch.nn.functional as F\n",
 22 |     "import random\n",
 23 |     "import numpy as np\n",
 24 |     "from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence\n",
 25 |     "from src.p_data_augmentation import PowerPIL"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {
 32 |     "ExecuteTime": {
 33 |      "end_time": "2017-05-08T10:52:32.680830Z",
 34 |      "start_time": "2017-05-08T10:52:32.500741Z"
 35 |     },
 36 |     "collapsed": true
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "from torch.utils.data.dataset import Dataset\n",
 41 |     "from torchvision import transforms\n",
 42 |     "import pandas as pd\n",
 43 |     "import os\n",
 44 |     "from PIL import Image\n",
 45 |     "\n",
 46 |     "from torch import np, from_numpy # Numpy like wrapper\n",
 47 |     "\n",
 48 |     "class ImgTagsDualFeedDataset(Dataset):\n",
 49 |     "    \"\"\"Dataset wrapping images, labels and target labels for Kaggle - Planet Amazon from Space competition.\n",
 50 |     "\n",
 51 |     "    Arguments:\n",
 52 |     "        A CSV file path\n",
 53 |     "        Path to image folder\n",
 54 |     "        Extension of images\n",
 55 |     "    \"\"\"\n",
 56 |     "\n",
 57 |     "    def __init__(self, csv_path, img_path, img_ext, vocab_mapping, transform=None):\n",
 58 |     "    \n",
 59 |     "        self.df = pd.read_csv(csv_path)\n",
 60 |     "        assert self.df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \\\n",
 61 |     "\"Some images referenced in the CSV file were not found\"\n",
 62 |     "\n",
 63 |     "        self.img_path = img_path\n",
 64 |     "        self.img_ext = img_ext\n",
 65 |     "        self.transform = transform\n",
 66 |     "        \n",
 67 |     "        self.X = self.df['image_name']\n",
 68 |     "        \n",
 69 |     "        self.vocab_mapping = vocab_mapping\n",
 70 |     "\n",
 71 |     "        self.tags = self.df['tags'].str.split()\n",
 72 |     "        \n",
 73 |     "        \n",
 74 |     "    def X(self):\n",
 75 |     "        return self.X\n",
 76 |     "    \n",
 77 |     "    def __getitem__(self, index):\n",
 78 |     "        \n",
 79 |     "        img = Image.open(self.img_path + self.X[index] + self.img_ext)\n",
 80 |     "        img = img.convert('RGB')\n",
 81 |     "        if self.transform is not None:\n",
 82 |     "            img = self.transform(img)\n",
 83 |     "            \n",
 84 |     "        vocab = self.vocab_mapping\n",
 85 |     "        tags = []\n",
 86 |     "        tags.append(vocab['<BEGIN>'])\n",
 87 |     "        tags.extend([vocab[tag] for tag in self.tags[index]])\n",
 88 |     "        tags.append(vocab['<STOP>'])\n",
 89 |     "        \n",
 90 |     "        tags = torch.Tensor(tags)\n",
 91 |     "        return img, tags\n",
 92 |     "\n",
 93 |     "    def __len__(self):\n",
 94 |     "        return len(self.df.index)\n",
 95 |     "\n",
 96 |     "\n",
 97 |     "    def collate_fn(self, data):\n",
 98 |     "        \"\"\"Creates mini-batch tensors for tags with variable size\n",
 99 |     "\n",
100 |     "        Args:\n",
101 |     "            data: list of tuple (input, target). \n",
102 |     "                - image: torch tensor of shape (3, ?, ?).\n",
103 |     "                - target: torch tensor of same shape (?); variable length.\n",
104 |     "        Returns:\n",
105 |     "            images: torch tensor of shape (batch_size, 3, ?, ?).\n",
106 |     "            targets: torch tensor of shape (batch_size, padded_length).\n",
107 |     "            lengths: list; valid length for each padded tags.\n",
108 |     "        \"\"\"\n",
109 |     "        # Sort a data list by target length (descending order).\n",
110 |     "        data.sort(key=lambda x: len(x[1]), reverse=True)\n",
111 |     "        imgs, tags = zip(*data)\n",
112 |     "        \n",
113 |     "        # Merge images (from tuple of 3D tensor to 4D tensor).\n",
114 |     "        imgs = torch.stack(imgs, 0)\n",
115 |     "\n",
116 |     "        # Merge tags (from tuple of 1D tensor to 2D tensor).\n",
117 |     "        lengths = [len(tag) for tag in tags]\n",
118 |     "        targets = torch.zeros(len(tags), max(lengths)).long()\n",
119 |     "        for i, tag in enumerate(tags):\n",
120 |     "            end = lengths[i]\n",
121 |     "            targets[i, :end] = tag[:end]        \n",
122 |     "        return imgs, targets, lengths\n"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 3,
128 |    "metadata": {
129 |     "ExecuteTime": {
130 |      "end_time": "2017-05-08T10:52:32.685184Z",
131 |      "start_time": "2017-05-08T10:52:32.682334Z"
132 |     },
133 |     "collapsed": true
134 |    },
135 |    "outputs": [],
136 |    "source": [
137 |     "normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],\n",
138 |     "                                     std=[0.229, 0.224, 0.225])\n",
139 |     "ds_transform_augmented = transforms.Compose([\n",
140 |     "                     transforms.RandomSizedCrop(224),\n",
141 |     "                     PowerPIL(),\n",
142 |     "                     transforms.ToTensor(),\n",
143 |     "                     normalize,\n",
144 |     "    ])"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 4,
150 |    "metadata": {
151 |     "ExecuteTime": {
152 |      "end_time": "2017-05-08T10:52:32.692774Z",
153 |      "start_time": "2017-05-08T10:52:32.686438Z"
154 |     }
155 |    },
156 |    "outputs": [
157 |     {
158 |      "name": "stdout",
159 |      "output_type": "stream",
160 |      "text": [
161 |       "{'<BEGIN>': 0, '<STOP>': 1, 'clear': 2, 'cloudy': 3, 'haze': 4, 'partly_cloudy': 5, 'agriculture': 6, 'artisinal_mine': 7, 'bare_ground': 8, 'blooming': 9, 'blow_down': 10, 'conventional_mine': 11, 'cultivation': 12, 'habitation': 13, 'primary': 14, 'road': 15, 'selective_logging': 16, 'slash_burn': 17, 'water': 18}\n"
162 |      ]
163 |     }
164 |    ],
165 |    "source": [
166 |     "vocab = ['<BEGIN>','<STOP>','clear', 'cloudy', 'haze','partly_cloudy',\n",
167 |     "    'agriculture','artisinal_mine','bare_ground','blooming',\n",
168 |     "    'blow_down','conventional_mine','cultivation','habitation',\n",
169 |     "    'primary','road','selective_logging','slash_burn','water'\n",
170 |     "    ]\n",
171 |     "\n",
172 |     "word_to_ix = { word: i for i, word in enumerate(vocab) }\n",
173 |     "print(word_to_ix)\n",
174 |     "one_hot_mapping = {k:np.eye(19)[v] for k,v in word_to_ix.items()}\n",
175 |     "# print(one_hot_mapping)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 5,
181 |    "metadata": {
182 |     "ExecuteTime": {
183 |      "end_time": "2017-05-08T10:52:32.844270Z",
184 |      "start_time": "2017-05-08T10:52:32.693843Z"
185 |     },
186 |     "collapsed": true
187 |    },
188 |    "outputs": [],
189 |    "source": [
190 |     "X_train = ImgTagsDualFeedDataset('./data/train.csv','./data/train-jpg/','.jpg',\n",
191 |     "                                 word_to_ix,\n",
192 |     "                                 ds_transform_augmented\n",
193 |     "                                 )"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 6,
199 |    "metadata": {
200 |     "ExecuteTime": {
201 |      "end_time": "2017-05-08T10:52:32.848407Z",
202 |      "start_time": "2017-05-08T10:52:32.845707Z"
203 |     },
204 |     "collapsed": true
205 |    },
206 |    "outputs": [],
207 |    "source": [
208 |     "train_loader = torch.utils.data.DataLoader(dataset=X_train, \n",
209 |     "                                              batch_size=64,\n",
210 |     "                                              shuffle=True,\n",
211 |     "                                              num_workers=4,\n",
212 |     "                                              pin_memory = True,\n",
213 |     "                                              collate_fn=X_train.collate_fn)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 7,
219 |    "metadata": {
220 |     "ExecuteTime": {
221 |      "end_time": "2017-05-08T10:52:32.919202Z",
222 |      "start_time": "2017-05-08T10:52:32.849855Z"
223 |     }
224 |    },
225 |    "outputs": [
226 |     {
227 |      "data": {
228 |       "text/plain": [
229 |        "(\n",
230 |        " ( 0 ,.,.) = \n",
231 |        "  -0.8849 -0.8678 -0.7822  ...  -1.5699 -1.5699 -1.5528\n",
232 |        "  -0.8507 -0.8335 -0.7479  ...  -1.5699 -1.5699 -1.5699\n",
233 |        "  -0.7650 -0.7650 -0.6794  ...  -1.5699 -1.5870 -1.5870\n",
234 |        "            ...             ⋱             ...          \n",
235 |        "  -1.4500 -1.4500 -1.4329  ...  -1.5014 -1.5357 -1.5528\n",
236 |        "  -1.4500 -1.4500 -1.4329  ...  -1.5185 -1.5357 -1.5528\n",
237 |        "  -1.4500 -1.4500 -1.4329  ...  -1.5185 -1.5528 -1.5528\n",
238 |        " \n",
239 |        " ( 1 ,.,.) = \n",
240 |        "  -0.7577 -0.7577 -0.7052  ...  -1.3704 -1.3529 -1.3529\n",
241 |        "  -0.7227 -0.7227 -0.6527  ...  -1.3704 -1.3529 -1.3529\n",
242 |        "  -0.6527 -0.6352 -0.5826  ...  -1.3704 -1.3529 -1.3529\n",
243 |        "            ...             ⋱             ...          \n",
244 |        "  -1.2304 -1.2304 -1.2129  ...  -1.2654 -1.3004 -1.3004\n",
245 |        "  -1.2654 -1.2654 -1.2479  ...  -1.2654 -1.2829 -1.3004\n",
246 |        "  -1.3004 -1.2829 -1.2654  ...  -1.2654 -1.2829 -1.3004\n",
247 |        " \n",
248 |        " ( 2 ,.,.) = \n",
249 |        "  -0.7413 -0.7413 -0.7064  ...  -1.1944 -1.1770 -1.1596\n",
250 |        "  -0.7413 -0.7413 -0.6715  ...  -1.1944 -1.1770 -1.1770\n",
251 |        "  -0.7064 -0.6890 -0.6018  ...  -1.1596 -1.1770 -1.1770\n",
252 |        "            ...             ⋱             ...          \n",
253 |        "  -1.1073 -1.1073 -1.0724  ...  -1.1770 -1.1944 -1.1944\n",
254 |        "  -1.1247 -1.1247 -1.1073  ...  -1.1596 -1.1770 -1.1770\n",
255 |        "  -1.1421 -1.1247 -1.1247  ...  -1.1596 -1.1421 -1.1596\n",
256 |        " [torch.FloatTensor of size 3x224x224], \n",
257 |        "   0\n",
258 |        "   6\n",
259 |        "   2\n",
260 |        "  14\n",
261 |        "  18\n",
262 |        "   1\n",
263 |        " [torch.FloatTensor of size 6])"
264 |       ]
265 |      },
266 |      "execution_count": 7,
267 |      "metadata": {},
268 |      "output_type": "execute_result"
269 |     }
270 |    ],
271 |    "source": [
272 |     "X_train[1]"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 8,
278 |    "metadata": {
279 |     "ExecuteTime": {
280 |      "end_time": "2017-05-08T10:52:32.960983Z",
281 |      "start_time": "2017-05-08T10:52:32.920544Z"
282 |     },
283 |     "collapsed": true
284 |    },
285 |    "outputs": [],
286 |    "source": [
287 |     "class CNN_RNN_Fused(nn.Module):\n",
288 |     "    def __init__(self, vocab_size, embed_dim, num_rnn_layers):\n",
289 |     "        super(CNN_RNN_Fused, self).__init__()\n",
290 |     "        \n",
291 |     "        ## CNN part\n",
292 |     "        # Loading ResNet arch from PyTorch and weights from Pycaffe\n",
293 |     "        original_model = models.resnet50(pretrained=False)\n",
294 |     "        original_model.load_state_dict(torch.load('./zoo/resnet50.pth'))\n",
295 |     "        \n",
296 |     "        # Everything except the last linear layer\n",
297 |     "        self.convnet = nn.Sequential(*list(original_model.children())[:-1])\n",
298 |     "        \n",
299 |     "        # Get number of features of last layer\n",
300 |     "        num_feats_cnn = original_model.fc.in_features\n",
301 |     "        \n",
302 |     "        ## RNN part\n",
303 |     "        hidden_size = embed_dim # for simplification\n",
304 |     "        self.vocab_size = vocab_size\n",
305 |     "        self.embeds = nn.Embedding(vocab_size, embed_dim) # , padding_idx=0 Ignore the <start> (0 in vocab) for gradient\n",
306 |     "        self.rnn = nn.LSTM(embed_dim, hidden_size, num_rnn_layers, batch_first = True)\n",
307 |     "        self.num_rnn_layers = num_rnn_layers\n",
308 |     "        \n",
309 |     "        ## Projection\n",
310 |     "        self.prj_cnn = nn.Linear(num_feats_cnn, embed_dim)\n",
311 |     "        self.prj_rnn = nn.Linear(hidden_size, embed_dim)\n",
312 |     "        \n",
313 |     "\n",
314 |     "        ## Prediction\n",
315 |     "        # link embedding and decoding weight\n",
316 |     "        self.fc = nn.Linear(embed_dim, vocab_size)\n",
317 |     "        self.fc.weight = self.embeds.weight\n",
318 |     "    \n",
319 |     "    \n",
320 |     "    def forward(self, img, tags, lengths, hidden=None):\n",
321 |     "        ## CNN\n",
322 |     "        cnn_feats = self.convnet(img)\n",
323 |     "        cnn_feats = cnn_feats.view(cnn_feats.size(0), -1)\n",
324 |     "        cnn_feats = self.prj_cnn(cnn_feats)\n",
325 |     "        \n",
326 |     "        tag_ids = []\n",
327 |     "        embed = self.embeds(tags)\n",
328 |     "        for _ in tags:\n",
329 |     "            ## RNN\n",
330 |     "            rnn_out, hidden = self.rnn(embed, hidden)\n",
331 |     "\n",
332 |     "            ## Projection\n",
333 |     "            rnn_out = self.prj_rnn(rnn_out[:,0,:]) # Extract the first prediction from sequence\n",
334 |     "            fuse = cnn_feats + rnn_out\n",
335 |     "            fuse = self.fc(fuse)\n",
336 |     "            predicted = fuse.max(1)[1]\n",
337 |     "            tag_ids.append(predicted)\n",
338 |     "            packed = self.embeds(predicted)\n",
339 |     "        tag_ids = torch.cat(tag_ids, 1)\n",
340 |     "        print(tag_ids)\n",
341 |     "        return tag_ids.squeeze()\n",
342 |     "\n",
343 |     "    def genTags(self, inputs, states=None):\n",
344 |     "        tag_ids = []\n",
345 |     "        inputs = self.embeds(inputs)\n",
346 |     "        for i in range(self.vocab_size):                    # maximum sampling length\n",
347 |     "            hiddens, states = self.rnn(inputs, states)      # (batch_size, 1, hidden_size)\n",
348 |     "            outputs = self.fc(hiddens.squeeze(1))           # (batch_size, vocab_size)\n",
349 |     "            # outputs = F.softmax(outputs)\n",
350 |     "            predicted = outputs.max(1)[1]\n",
351 |     "            tag_ids.append(predicted)\n",
352 |     "            inputs = self.embeds(predicted)\n",
353 |     "        tag_ids = torch.cat(tag_ids, 1)                     # (batch_size, 19)\n",
354 |     "        return tag_ids.squeeze()"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "code",
359 |    "execution_count": 9,
360 |    "metadata": {
361 |     "ExecuteTime": {
362 |      "end_time": "2017-05-08T10:52:35.557189Z",
363 |      "start_time": "2017-05-08T10:52:32.962267Z"
364 |     },
365 |     "collapsed": true
366 |    },
367 |    "outputs": [],
368 |    "source": [
369 |     "model = CNN_RNN_Fused(19, 5, 2).cuda()"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": 10,
375 |    "metadata": {
376 |     "ExecuteTime": {
377 |      "end_time": "2017-05-08T10:52:35.561880Z",
378 |      "start_time": "2017-05-08T10:52:35.559070Z"
379 |     },
380 |     "collapsed": true
381 |    },
382 |    "outputs": [],
383 |    "source": [
384 |     "criterion = nn.CrossEntropyLoss()\n",
385 |     "optimizer = torch.optim.Adam(model.parameters(), lr=0.01)"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "code",
390 |    "execution_count": 11,
391 |    "metadata": {
392 |     "ExecuteTime": {
393 |      "end_time": "2017-05-08T10:52:37.039622Z",
394 |      "start_time": "2017-05-08T10:52:35.563145Z"
395 |     }
396 |    },
397 |    "outputs": [
398 |     {
399 |      "name": "stdout",
400 |      "output_type": "stream",
401 |      "text": [
402 |       "Variable containing:\n",
403 |       "   10    10    10  ...     10    10    10\n",
404 |       "   10    10    10  ...     10    10    10\n",
405 |       "    0     0     0  ...      0     0     0\n",
406 |       "       ...          ⋱          ...       \n",
407 |       "    0    16    16  ...     16    16    16\n",
408 |       "   10    10    10  ...     10    10    10\n",
409 |       "   10    10    10  ...     10    10    10\n",
410 |       "[torch.cuda.LongTensor of size 64x64 (GPU 0)]\n",
411 |       "\n",
412 |       "Variable containing:\n",
413 |       "  0\n",
414 |       "  0\n",
415 |       "  0\n",
416 |       "  0\n",
417 |       "  0\n",
418 |       "  0\n",
419 |       "  0\n",
420 |       "  0\n",
421 |       "  0\n",
422 |       "  0\n",
423 |       "  0\n",
424 |       "  0\n",
425 |       "  0\n",
426 |       "  0\n",
427 |       "  0\n",
428 |       "  0\n",
429 |       "  0\n",
430 |       "  0\n",
431 |       "  0\n",
432 |       "  0\n",
433 |       "  0\n",
434 |       "  0\n",
435 |       "  0\n",
436 |       "  0\n",
437 |       "  0\n",
438 |       "  0\n",
439 |       "  0\n",
440 |       "  0\n",
441 |       "  0\n",
442 |       "  0\n",
443 |       "  0\n",
444 |       "  0\n",
445 |       "  0\n",
446 |       "  0\n",
447 |       "  0\n",
448 |       "  0\n",
449 |       "  0\n",
450 |       "  0\n",
451 |       "  0\n",
452 |       "  0\n",
453 |       "  0\n",
454 |       "  0\n",
455 |       "  0\n",
456 |       "  0\n",
457 |       "  0\n",
458 |       "  0\n",
459 |       "  0\n",
460 |       "  0\n",
461 |       "  0\n",
462 |       "  0\n",
463 |       "  0\n",
464 |       "  0\n",
465 |       "  0\n",
466 |       "  0\n",
467 |       "  0\n",
468 |       "  0\n",
469 |       "  0\n",
470 |       "  0\n",
471 |       "  0\n",
472 |       "  0\n",
473 |       "  0\n",
474 |       "  0\n",
475 |       "  0\n",
476 |       "  0\n",
477 |       "  6\n",
478 |       "  6\n",
479 |       "  6\n",
480 |       "  6\n",
481 |       "  6\n",
482 |       "  6\n",
483 |       "  6\n",
484 |       "  6\n",
485 |       "  6\n",
486 |       "  6\n",
487 |       "  6\n",
488 |       "  7\n",
489 |       "  6\n",
490 |       "  6\n",
491 |       "  6\n",
492 |       "  6\n",
493 |       "  6\n",
494 |       "  2\n",
495 |       "  2\n",
496 |       "  2\n",
497 |       "  2\n",
498 |       "  2\n",
499 |       "  5\n",
500 |       "  2\n",
501 |       "  5\n",
502 |       "  2\n",
503 |       "  5\n",
504 |       "  5\n",
505 |       "  2\n",
506 |       "  4\n",
507 |       "  2\n",
508 |       "  5\n",
509 |       "  2\n",
510 |       "  2\n",
511 |       "  2\n",
512 |       "  2\n",
513 |       "  2\n",
514 |       "  5\n",
515 |       "  2\n",
516 |       "  2\n",
517 |       "  2\n",
518 |       "  2\n",
519 |       "  2\n",
520 |       "  5\n",
521 |       "  5\n",
522 |       "  4\n",
523 |       "  5\n",
524 |       "  2\n",
525 |       "  4\n",
526 |       "  5\n",
527 |       "  5\n",
528 |       "  2\n",
529 |       "  4\n",
530 |       "  4\n",
531 |       "  4\n",
532 |       "  2\n",
533 |       "  4\n",
534 |       "  2\n",
535 |       "  2\n",
536 |       "  3\n",
537 |       "  3\n",
538 |       "  3\n",
539 |       "  3\n",
540 |       "  3\n",
541 |       "  2\n",
542 |       "  8\n",
543 |       "  2\n",
544 |       "  2\n",
545 |       "  2\n",
546 |       "  2\n",
547 |       "  2\n",
548 |       "  5\n",
549 |       " 12\n",
550 |       "  2\n",
551 |       "  2\n",
552 |       "  2\n",
553 |       "  2\n",
554 |       "  2\n",
555 |       "  5\n",
556 |       "  2\n",
557 |       "  4\n",
558 |       " 14\n",
559 |       " 14\n",
560 |       " 14\n",
561 |       " 14\n",
562 |       " 14\n",
563 |       " 14\n",
564 |       " 14\n",
565 |       " 14\n",
566 |       " 14\n",
567 |       " 14\n",
568 |       " 14\n",
569 |       " 14\n",
570 |       " 14\n",
571 |       " 14\n",
572 |       " 14\n",
573 |       " 14\n",
574 |       " 14\n",
575 |       " 14\n",
576 |       " 14\n",
577 |       " 14\n",
578 |       " 14\n",
579 |       " 14\n",
580 |       " 14\n",
581 |       " 14\n",
582 |       " 14\n",
583 |       " 14\n",
584 |       " 14\n",
585 |       " 14\n",
586 |       " 14\n",
587 |       " 14\n",
588 |       " 14\n",
589 |       " 14\n",
590 |       " 14\n",
591 |       " 14\n",
592 |       " 14\n",
593 |       " 14\n",
594 |       " 14\n",
595 |       " 14\n",
596 |       " 14\n",
597 |       " 14\n",
598 |       " 14\n",
599 |       " 14\n",
600 |       "  1\n",
601 |       "  1\n",
602 |       "  1\n",
603 |       "  1\n",
604 |       "  1\n",
605 |       " 12\n",
606 |       "  2\n",
607 |       " 13\n",
608 |       " 14\n",
609 |       " 13\n",
610 |       " 13\n",
611 |       " 14\n",
612 |       " 14\n",
613 |       "  5\n",
614 |       " 12\n",
615 |       " 14\n",
616 |       " 14\n",
617 |       " 12\n",
618 |       " 12\n",
619 |       " 14\n",
620 |       " 14\n",
621 |       " 14\n",
622 |       " 18\n",
623 |       " 18\n",
624 |       " 18\n",
625 |       " 18\n",
626 |       " 18\n",
627 |       " 18\n",
628 |       " 16\n",
629 |       " 15\n",
630 |       " 18\n",
631 |       "  1\n",
632 |       "  1\n",
633 |       "  1\n",
634 |       "  1\n",
635 |       "  1\n",
636 |       "  1\n",
637 |       "  1\n",
638 |       "  1\n",
639 |       "  1\n",
640 |       "  1\n",
641 |       "  1\n",
642 |       "  1\n",
643 |       "  1\n",
644 |       "  1\n",
645 |       "  1\n",
646 |       "  1\n",
647 |       "  1\n",
648 |       "  1\n",
649 |       "  1\n",
650 |       "  1\n",
651 |       "  1\n",
652 |       "  1\n",
653 |       "  1\n",
654 |       "  1\n",
655 |       "  1\n",
656 |       "  1\n",
657 |       "  1\n",
658 |       "  1\n",
659 |       "  1\n",
660 |       "  1\n",
661 |       "  1\n",
662 |       "  1\n",
663 |       "  1\n",
664 |       " 13\n",
665 |       " 13\n",
666 |       " 14\n",
667 |       " 15\n",
668 |       " 14\n",
669 |       " 14\n",
670 |       " 15\n",
671 |       " 15\n",
672 |       " 14\n",
673 |       " 14\n",
674 |       " 18\n",
675 |       " 18\n",
676 |       " 14\n",
677 |       " 14\n",
678 |       " 15\n",
679 |       "  1\n",
680 |       "  1\n",
681 |       "  1\n",
682 |       "  1\n",
683 |       "  1\n",
684 |       "  1\n",
685 |       "  1\n",
686 |       "  1\n",
687 |       "  1\n",
688 |       "  1\n",
689 |       "  1\n",
690 |       " 14\n",
691 |       " 14\n",
692 |       " 15\n",
693 |       " 18\n",
694 |       " 15\n",
695 |       " 15\n",
696 |       " 18\n",
697 |       "  1\n",
698 |       "  1\n",
699 |       "  1\n",
700 |       "  1\n",
701 |       "  1\n",
702 |       "  1\n",
703 |       "  1\n",
704 |       "  1\n",
705 |       " 15\n",
706 |       "  1\n",
707 |       "  1\n",
708 |       "  1\n",
709 |       "  1\n",
710 |       "  1\n",
711 |       "  1\n",
712 |       " 18\n",
713 |       "  1\n",
714 |       "[torch.cuda.LongTensor of size 301 (GPU 0)]\n",
715 |       "\n"
716 |      ]
717 |     },
718 |     {
719 |      "ename": "KeyError",
720 |      "evalue": "<class 'torch.cuda.LongTensor'>",
721 |      "output_type": "error",
722 |      "traceback": [
723 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
724 |       "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
725 |       "\u001b[0;32m<ipython-input-11-3813a9be8c09>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     13\u001b[0m     \u001b[0;31m# check one tag\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtargets\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m     \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtargets\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m     \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m     \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
726 |       "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    205\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 206\u001b[0;31m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    207\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    208\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
727 |       "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/nn/modules/loss.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input, target)\u001b[0m\n\u001b[1;32m    319\u001b[0m         \u001b[0m_assert_no_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    320\u001b[0m         return F.cross_entropy(input, target,\n\u001b[0;32m--> 321\u001b[0;31m                                self.weight, self.size_average)\n\u001b[0m\u001b[1;32m    322\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    323\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
728 |       "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mcross_entropy\u001b[0;34m(input, target, weight, size_average)\u001b[0m\n\u001b[1;32m    533\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0meach\u001b[0m \u001b[0mminibatch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    534\u001b[0m     \"\"\"\n\u001b[0;32m--> 535\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mnll_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlog_softmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msize_average\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    536\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    537\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
729 |       "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mlog_softmax\u001b[0;34m(input)\u001b[0m\n\u001b[1;32m    434\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    435\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mlog_softmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 436\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0m_functions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mthnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLogSoftmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    437\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    438\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
730 |       "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/nn/_functions/thnn/auto.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input, *params)\u001b[0m\n\u001b[1;32m    108\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    109\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 110\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtype2backend\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    111\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    112\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mparam\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
731 |       "\u001b[0;32m/usr/lib/python3.6/site-packages/torch/_thnn/__init__.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackends\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
732 |       "\u001b[0;31mKeyError\u001b[0m: <class 'torch.cuda.LongTensor'>"
733 |      ]
734 |     }
735 |    ],
736 |    "source": [
737 |     "epoch =0\n",
738 |     "for batch_idx, (img, tags, lengths) in enumerate(train_loader):\n",
739 |     "    img = Variable(img).cuda()\n",
740 |     "    tags = Variable(tags).cuda()\n",
741 |     "    targets = pack_padded_sequence(tags, lengths, batch_first=True)[0]\n",
742 |     "\n",
743 |     "    \n",
744 |     "    model.zero_grad()\n",
745 |     "    \n",
746 |     "    # Predict one tag at a time\n",
747 |     "    outputs = model(img, tags, lengths)\n",
748 |     "    \n",
749 |     "    # check one tag\n",
750 |     "    print(targets)\n",
751 |     "    loss = criterion(outputs, targets)\n",
752 |     "    loss.backward()\n",
753 |     "    optimizer.step()\n",
754 |     "    \n",
755 |     "    if batch_idx % 100 == 0:\n",
756 |     "        print('Train Epoch: {:03d} [{:05d}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
757 |     "            epoch, batch_idx * len(data), len(train_loader) * len(data),\n",
758 |     "            100. * batch_idx / len(train_loader), loss.data[0]))"
759 |    ]
760 |   },
761 |   {
762 |    "cell_type": "code",
763 |    "execution_count": null,
764 |    "metadata": {
765 |     "collapsed": true
766 |    },
767 |    "outputs": [],
768 |    "source": []
769 |   }
770 |  ],
771 |  "metadata": {
772 |   "kernelspec": {
773 |    "display_name": "Python 3",
774 |    "language": "python",
775 |    "name": "python3"
776 |   },
777 |   "language_info": {
778 |    "codemirror_mode": {
779 |     "name": "ipython",
780 |     "version": 3
781 |    },
782 |    "file_extension": ".py",
783 |    "mimetype": "text/x-python",
784 |    "name": "python",
785 |    "nbconvert_exporter": "python",
786 |    "pygments_lexer": "ipython3",
787 |    "version": "3.6.0"
788 |   }
789 |  },
790 |  "nbformat": 4,
791 |  "nbformat_minor": 2
792 | }
793 | 


--------------------------------------------------------------------------------
/baseline/RNN_experiment_1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2017-05-06T18:57:25.579069Z",
  9 |      "start_time": "2017-05-06T18:57:25.455726Z"
 10 |     },
 11 |     "collapsed": true
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "from torchvision import models\n",
 16 |     "from torch.autograd import Variable\n",
 17 |     "\n",
 18 |     "## Utilities\n",
 19 |     "import random\n",
 20 |     "\n",
 21 |     "## Libraries\n",
 22 |     "import numpy as np\n",
 23 |     "\n",
 24 |     "\n",
 25 |     "## Torch\n",
 26 |     "import torch.optim as optim\n",
 27 |     "import torch.nn.functional as F\n",
 28 |     "from torchvision import transforms\n",
 29 |     "from torch.utils.data import DataLoader\n",
 30 |     "from torch.utils.data.sampler import SubsetRandomSampler\n",
 31 |     "import torch\n",
 32 |     "import torch.nn as nn"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 2,
 38 |    "metadata": {
 39 |     "ExecuteTime": {
 40 |      "end_time": "2017-05-06T18:57:25.582917Z",
 41 |      "start_time": "2017-05-06T18:57:25.580329Z"
 42 |     },
 43 |     "collapsed": true
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "## Normalization on ImageNet mean/std for finetuning\n",
 48 |     "normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],\n",
 49 |     "                                     std=[0.229, 0.224, 0.225])\n",
 50 |     "\n",
 51 |     "save_dir = './snapshots'\n",
 52 |     "batch_size = 64"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "metadata": {
 59 |     "ExecuteTime": {
 60 |      "end_time": "2017-05-06T18:57:26.107434Z",
 61 |      "start_time": "2017-05-06T18:57:25.584027Z"
 62 |     },
 63 |     "collapsed": true
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic)\n",
 68 |     "torch.manual_seed(1337)\n",
 69 |     "torch.cuda.manual_seed(1337)\n",
 70 |     "np.random.seed(1337)\n",
 71 |     "random.seed(1337)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {
 78 |     "ExecuteTime": {
 79 |      "end_time": "2017-05-06T18:57:26.114413Z",
 80 |      "start_time": "2017-05-06T18:57:26.109005Z"
 81 |     },
 82 |     "collapsed": true
 83 |    },
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "## Normalization only for validation and test\n",
 87 |     "ds_transform_raw = transforms.Compose([\n",
 88 |     "                     transforms.CenterCrop(224),\n",
 89 |     "                     transforms.ToTensor(),\n",
 90 |     "                     normalize\n",
 91 |     "                     ])"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 57,
 97 |    "metadata": {
 98 |     "ExecuteTime": {
 99 |      "end_time": "2017-05-06T19:12:06.896968Z",
100 |      "start_time": "2017-05-06T19:12:06.885442Z"
101 |     },
102 |     "collapsed": true
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "# Load model from best iteration\n",
107 |     "from src.p_neuro import ResNet50\n",
108 |     "\n",
109 |     "class CNN(nn.Module):\n",
110 |     "    ## We use ResNet weights from PyCaffe.\n",
111 |     "    def __init__(self, embed_size):\n",
112 |     "        super(CNN, self).__init__()\n",
113 |     "        \n",
114 |     "        # Loading pretrained ResNet as feature extractor\n",
115 |     "        original_model = ResNet50(17)\n",
116 |     "        model_path = './snapshots/2017-05-06_1235-cloud-habitation-PowerPIL-model_best.pth'\n",
117 |     "        checkpoint = torch.load(model_path)\n",
118 |     "        original_model.load_state_dict(checkpoint['state_dict'])\n",
119 |     "        \n",
120 |     "        # Everything except the last linear layer\n",
121 |     "        self.features = nn.Sequential(*list(original_model.children())[:-1])\n",
122 |     "        \n",
123 |     "        # Freeze those weights\n",
124 |     "        for p in self.features.parameters():\n",
125 |     "            p.requires_grad = False\n",
126 |     "\n",
127 |     "        # Get number of features of last layer\n",
128 |     "        num_feats = original_model.classifier[0].in_features\n",
129 |     "            \n",
130 |     "        self.fc = nn.Linear(num_feats, embed_size)\n",
131 |     "        self.bn = nn.BatchNorm1d(embed_size, momentum=0.01)\n",
132 |     "\n",
133 |     "    def forward(self, x):\n",
134 |     "        f = self.features(x)\n",
135 |     "        f = f.view(f.size(0), -1)\n",
136 |     "        out = self.fc(f)\n",
137 |     "        out = self.bn(out)\n",
138 |     "        return out"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 58,
144 |    "metadata": {
145 |     "ExecuteTime": {
146 |      "end_time": "2017-05-06T19:12:08.919347Z",
147 |      "start_time": "2017-05-06T19:12:07.431937Z"
148 |     },
149 |     "collapsed": true
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "encoderCNN = CNN(2048).cuda()"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 59,
159 |    "metadata": {
160 |     "ExecuteTime": {
161 |      "end_time": "2017-05-06T19:12:08.922481Z",
162 |      "start_time": "2017-05-06T19:12:08.920546Z"
163 |     },
164 |     "collapsed": true
165 |    },
166 |    "outputs": [],
167 |    "source": [
168 |     "from torch.nn.init import kaiming_normal"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 94,
174 |    "metadata": {
175 |     "ExecuteTime": {
176 |      "end_time": "2017-05-06T19:37:08.514664Z",
177 |      "start_time": "2017-05-06T19:37:08.507521Z"
178 |     },
179 |     "collapsed": true
180 |    },
181 |    "outputs": [],
182 |    "source": [
183 |     "class DecoderRNN(nn.Module):\n",
184 |     "    def __init__(self, num_feats, num_classes, hidden_size, num_layers):\n",
185 |     "        super(DecoderRNN, self).__init__()\n",
186 |     "        self.rnn = nn.GRU(input_size=num_feats,\n",
187 |     "                            hidden_size=hidden_size,\n",
188 |     "                            num_layers=num_layers,\n",
189 |     "                            batch_first = True)\n",
190 |     "        self.classifier = nn.Linear(hidden_size, num_classes)\n",
191 |     "        self.hidden_size = hidden_size\n",
192 |     "        \n",
193 |     "        # Init of last layer\n",
194 |     "        kaiming_normal(self.classifier.weight)\n",
195 |     "    \n",
196 |     "\n",
197 |     "    def forward(self, feats, hidden=None):\n",
198 |     "        x, hidden = self.rnn(feats.unsqueeze(1), hidden)\n",
199 |     "        x = x.view(-1, self.hidden_size)\n",
200 |     "        x = self.classifier(x)\n",
201 |     "        return x"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 95,
207 |    "metadata": {
208 |     "ExecuteTime": {
209 |      "end_time": "2017-05-06T19:37:09.236010Z",
210 |      "start_time": "2017-05-06T19:37:09.227380Z"
211 |     },
212 |     "collapsed": true
213 |    },
214 |    "outputs": [],
215 |    "source": [
216 |     "decoderRNN = DecoderRNN(2048, 17, 64, 10).cuda()"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 96,
222 |    "metadata": {
223 |     "ExecuteTime": {
224 |      "end_time": "2017-05-06T19:37:09.888453Z",
225 |      "start_time": "2017-05-06T19:37:09.883933Z"
226 |     },
227 |     "collapsed": true
228 |    },
229 |    "outputs": [],
230 |    "source": [
231 |     "## Normalization on ImageNet mean/std for finetuning\n",
232 |     "normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],\n",
233 |     "                                     std=[0.229, 0.224, 0.225])\n",
234 |     "\n",
235 |     "# Note, p_training has lr_decay automated\n",
236 |     "optimizer = optim.SGD(decoderRNN.parameters(), lr=1e-1, momentum=0.9) # Finetuning whole model\n",
237 |     "\n",
238 |     "# criterion = ConvolutedLoss()\n",
239 |     "criterion = torch.nn.MultiLabelSoftMarginLoss(\n",
240 |     "    weight = torch.from_numpy(\n",
241 |     "                 1/np.array([1,  3,  2,  1,\n",
242 |     "                             1,  3,  2,  3,\n",
243 |     "                             4,  4,  1,  2,\n",
244 |     "                             1,  1,  3,  4,  1])\n",
245 |     "    )).float().cuda()"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 97,
251 |    "metadata": {
252 |     "ExecuteTime": {
253 |      "end_time": "2017-05-06T19:37:10.454299Z",
254 |      "start_time": "2017-05-06T19:37:10.451752Z"
255 |     },
256 |     "collapsed": true
257 |    },
258 |    "outputs": [],
259 |    "source": [
260 |     "from src.p_data_augmentation import PowerPIL\n",
261 |     "from src.p2_dataload import KaggleAmazonDataset\n",
262 |     "from src.p_model_selection import train_valid_split\n",
263 |     "from src.p_sampler import SubsetSampler, balance_weights"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 98,
269 |    "metadata": {
270 |     "ExecuteTime": {
271 |      "end_time": "2017-05-06T19:37:11.149969Z",
272 |      "start_time": "2017-05-06T19:37:10.773470Z"
273 |     },
274 |     "collapsed": true
275 |    },
276 |    "outputs": [],
277 |    "source": [
278 |     "# Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic)\n",
279 |     "torch.manual_seed(1337)\n",
280 |     "torch.cuda.manual_seed(1337)\n",
281 |     "np.random.seed(1337)\n",
282 |     "random.seed(1337)\n",
283 |     "\n",
284 |     "##############################################################\n",
285 |     "## Loading the dataset\n",
286 |     "\n",
287 |     "## Augmentation + Normalization for full training\n",
288 |     "ds_transform_augmented = transforms.Compose([\n",
289 |     "                 transforms.RandomSizedCrop(224),\n",
290 |     "                 PowerPIL(),\n",
291 |     "                 transforms.ToTensor(),\n",
292 |     "                 normalize\n",
293 |     "])\n",
294 |     "\n",
295 |     "## Normalization only for validation and test\n",
296 |     "ds_transform_raw = transforms.Compose([\n",
297 |     "                 transforms.Scale(224),\n",
298 |     "                 transforms.ToTensor(),\n",
299 |     "                 normalize\n",
300 |     "                 ])\n",
301 |     "\n",
302 |     "####     #########     ########     ###########     #####\n",
303 |     "\n",
304 |     "X_train = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg',\n",
305 |     "                             ds_transform_augmented\n",
306 |     "                             )\n",
307 |     "X_val = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg',\n",
308 |     "                             ds_transform_raw\n",
309 |     "                             )\n",
310 |     "\n",
311 |     "# Creating a validation split\n",
312 |     "train_idx, valid_idx = train_valid_split(X_train, 0.2)\n",
313 |     "\n",
314 |     "train_sampler = SubsetRandomSampler(train_idx)\n",
315 |     "valid_sampler = SubsetSampler(valid_idx)\n",
316 |     "\n",
317 |     "######    ##########    ##########    ########    #########\n",
318 |     "\n",
319 |     "# Both dataloader loads from the same dataset but with different indices\n",
320 |     "train_loader = DataLoader(X_train,\n",
321 |     "                      batch_size=batch_size,\n",
322 |     "                      sampler=train_sampler,\n",
323 |     "                      num_workers=4,\n",
324 |     "                      pin_memory=True)\n",
325 |     "\n",
326 |     "valid_loader = DataLoader(X_val,\n",
327 |     "                      batch_size=batch_size,\n",
328 |     "                      sampler=valid_sampler,\n",
329 |     "                      num_workers=4,\n",
330 |     "                      pin_memory=True)"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 99,
336 |    "metadata": {
337 |     "ExecuteTime": {
338 |      "end_time": "2017-05-06T19:37:11.358273Z",
339 |      "start_time": "2017-05-06T19:37:11.352047Z"
340 |     },
341 |     "collapsed": true
342 |    },
343 |    "outputs": [],
344 |    "source": [
345 |     "def train(epoch, train_loader, encoder, decoder, criterion, optimizer):\n",
346 |     "    encoder.eval()\n",
347 |     "    decoder.train()\n",
348 |     "    for batch_idx, (data, target) in enumerate(train_loader):\n",
349 |     "        data, target = data.cuda(async=True), target.cuda(async=True) # On GPU\n",
350 |     "        data, target = Variable(data), Variable(target, requires_grad=False)\n",
351 |     "        optimizer.zero_grad()\n",
352 |     "        encoded = encoder(data)\n",
353 |     "        output = decoder(encoded)\n",
354 |     "        loss = criterion(output, target)\n",
355 |     "        loss.backward()\n",
356 |     "        optimizer.step()\n",
357 |     "        if batch_idx % 10 == 0:\n",
358 |     "            print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
359 |     "                epoch, batch_idx * len(data), len(train_loader.dataset),\n",
360 |     "                100. * batch_idx / len(train_loader), loss.data[0]))"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": 100,
366 |    "metadata": {
367 |     "ExecuteTime": {
368 |      "end_time": "2017-05-06T19:51:05.592592Z",
369 |      "start_time": "2017-05-06T19:37:12.612141Z"
370 |     }
371 |    },
372 |    "outputs": [
373 |     {
374 |      "name": "stdout",
375 |      "output_type": "stream",
376 |      "text": [
377 |       "Train Epoch: 1 [0/40479 (0%)]\tLoss: 0.435556\n",
378 |       "Train Epoch: 1 [640/40479 (2%)]\tLoss: 0.409556\n",
379 |       "Train Epoch: 1 [1280/40479 (4%)]\tLoss: 0.369687\n",
380 |       "Train Epoch: 1 [1920/40479 (6%)]\tLoss: 0.339583\n",
381 |       "Train Epoch: 1 [2560/40479 (8%)]\tLoss: 0.303115\n",
382 |       "Train Epoch: 1 [3200/40479 (10%)]\tLoss: 0.287817\n",
383 |       "Train Epoch: 1 [3840/40479 (12%)]\tLoss: 0.251417\n",
384 |       "Train Epoch: 1 [4480/40479 (14%)]\tLoss: 0.244486\n",
385 |       "Train Epoch: 1 [5120/40479 (16%)]\tLoss: 0.251688\n",
386 |       "Train Epoch: 1 [5760/40479 (18%)]\tLoss: 0.218834\n",
387 |       "Train Epoch: 1 [6400/40479 (20%)]\tLoss: 0.228922\n",
388 |       "Train Epoch: 1 [7040/40479 (22%)]\tLoss: 0.257038\n",
389 |       "Train Epoch: 1 [7680/40479 (24%)]\tLoss: 0.240509\n",
390 |       "Train Epoch: 1 [8320/40479 (26%)]\tLoss: 0.249612\n",
391 |       "Train Epoch: 1 [8960/40479 (28%)]\tLoss: 0.212539\n",
392 |       "Train Epoch: 1 [9600/40479 (30%)]\tLoss: 0.213244\n",
393 |       "Train Epoch: 1 [10240/40479 (32%)]\tLoss: 0.232863\n",
394 |       "Train Epoch: 1 [10880/40479 (34%)]\tLoss: 0.229000\n",
395 |       "Train Epoch: 1 [11520/40479 (36%)]\tLoss: 0.217483\n",
396 |       "Train Epoch: 1 [12160/40479 (38%)]\tLoss: 0.240352\n",
397 |       "Train Epoch: 1 [12800/40479 (40%)]\tLoss: 0.228196\n",
398 |       "Train Epoch: 1 [13440/40479 (42%)]\tLoss: 0.215130\n",
399 |       "Train Epoch: 1 [14080/40479 (43%)]\tLoss: 0.215860\n",
400 |       "Train Epoch: 1 [14720/40479 (45%)]\tLoss: 0.209214\n",
401 |       "Train Epoch: 1 [15360/40479 (47%)]\tLoss: 0.213253\n",
402 |       "Train Epoch: 1 [16000/40479 (49%)]\tLoss: 0.215986\n",
403 |       "Train Epoch: 1 [16640/40479 (51%)]\tLoss: 0.204092\n",
404 |       "Train Epoch: 1 [17280/40479 (53%)]\tLoss: 0.217447\n",
405 |       "Train Epoch: 1 [17920/40479 (55%)]\tLoss: 0.224149\n",
406 |       "Train Epoch: 1 [18560/40479 (57%)]\tLoss: 0.208092\n",
407 |       "Train Epoch: 1 [19200/40479 (59%)]\tLoss: 0.208012\n",
408 |       "Train Epoch: 1 [19840/40479 (61%)]\tLoss: 0.228090\n",
409 |       "Train Epoch: 1 [20480/40479 (63%)]\tLoss: 0.227201\n",
410 |       "Train Epoch: 1 [21120/40479 (65%)]\tLoss: 0.218654\n",
411 |       "Train Epoch: 1 [21760/40479 (67%)]\tLoss: 0.245048\n",
412 |       "Train Epoch: 1 [22400/40479 (69%)]\tLoss: 0.210235\n",
413 |       "Train Epoch: 1 [23040/40479 (71%)]\tLoss: 0.247935\n",
414 |       "Train Epoch: 1 [23680/40479 (73%)]\tLoss: 0.236987\n",
415 |       "Train Epoch: 1 [24320/40479 (75%)]\tLoss: 0.216959\n",
416 |       "Train Epoch: 1 [24960/40479 (77%)]\tLoss: 0.235669\n",
417 |       "Train Epoch: 1 [25600/40479 (79%)]\tLoss: 0.226261\n",
418 |       "Train Epoch: 1 [26240/40479 (81%)]\tLoss: 0.234269\n",
419 |       "Train Epoch: 1 [26880/40479 (83%)]\tLoss: 0.218615\n",
420 |       "Train Epoch: 1 [27520/40479 (85%)]\tLoss: 0.207581\n",
421 |       "Train Epoch: 1 [28160/40479 (87%)]\tLoss: 0.211591\n",
422 |       "Train Epoch: 1 [28800/40479 (89%)]\tLoss: 0.204346\n",
423 |       "Train Epoch: 1 [29440/40479 (91%)]\tLoss: 0.221209\n",
424 |       "Train Epoch: 1 [30080/40479 (93%)]\tLoss: 0.231252\n",
425 |       "Train Epoch: 1 [30720/40479 (95%)]\tLoss: 0.233583\n",
426 |       "Train Epoch: 1 [31360/40479 (97%)]\tLoss: 0.213050\n",
427 |       "Train Epoch: 1 [32000/40479 (99%)]\tLoss: 0.208800\n",
428 |       "Train Epoch: 2 [0/40479 (0%)]\tLoss: 0.221830\n",
429 |       "Train Epoch: 2 [640/40479 (2%)]\tLoss: 0.226345\n",
430 |       "Train Epoch: 2 [1280/40479 (4%)]\tLoss: 0.236332\n",
431 |       "Train Epoch: 2 [1920/40479 (6%)]\tLoss: 0.220857\n",
432 |       "Train Epoch: 2 [2560/40479 (8%)]\tLoss: 0.199616\n",
433 |       "Train Epoch: 2 [3200/40479 (10%)]\tLoss: 0.217765\n",
434 |       "Train Epoch: 2 [3840/40479 (12%)]\tLoss: 0.224895\n",
435 |       "Train Epoch: 2 [4480/40479 (14%)]\tLoss: 0.192970\n",
436 |       "Train Epoch: 2 [5120/40479 (16%)]\tLoss: 0.223005\n",
437 |       "Train Epoch: 2 [5760/40479 (18%)]\tLoss: 0.221301\n",
438 |       "Train Epoch: 2 [6400/40479 (20%)]\tLoss: 0.232181\n",
439 |       "Train Epoch: 2 [7040/40479 (22%)]\tLoss: 0.223385\n",
440 |       "Train Epoch: 2 [7680/40479 (24%)]\tLoss: 0.221813\n",
441 |       "Train Epoch: 2 [8320/40479 (26%)]\tLoss: 0.224682\n",
442 |       "Train Epoch: 2 [8960/40479 (28%)]\tLoss: 0.222579\n",
443 |       "Train Epoch: 2 [9600/40479 (30%)]\tLoss: 0.200874\n",
444 |       "Train Epoch: 2 [10240/40479 (32%)]\tLoss: 0.214944\n",
445 |       "Train Epoch: 2 [10880/40479 (34%)]\tLoss: 0.207476\n",
446 |       "Train Epoch: 2 [11520/40479 (36%)]\tLoss: 0.220802\n",
447 |       "Train Epoch: 2 [12160/40479 (38%)]\tLoss: 0.221033\n",
448 |       "Train Epoch: 2 [12800/40479 (40%)]\tLoss: 0.197412\n",
449 |       "Train Epoch: 2 [13440/40479 (42%)]\tLoss: 0.231282\n",
450 |       "Train Epoch: 2 [14080/40479 (43%)]\tLoss: 0.219897\n",
451 |       "Train Epoch: 2 [14720/40479 (45%)]\tLoss: 0.215508\n",
452 |       "Train Epoch: 2 [15360/40479 (47%)]\tLoss: 0.213763\n",
453 |       "Train Epoch: 2 [16000/40479 (49%)]\tLoss: 0.226117\n",
454 |       "Train Epoch: 2 [16640/40479 (51%)]\tLoss: 0.209263\n",
455 |       "Train Epoch: 2 [17280/40479 (53%)]\tLoss: 0.221371\n",
456 |       "Train Epoch: 2 [17920/40479 (55%)]\tLoss: 0.203588\n",
457 |       "Train Epoch: 2 [18560/40479 (57%)]\tLoss: 0.208028\n",
458 |       "Train Epoch: 2 [19200/40479 (59%)]\tLoss: 0.240881\n",
459 |       "Train Epoch: 2 [19840/40479 (61%)]\tLoss: 0.219481\n",
460 |       "Train Epoch: 2 [20480/40479 (63%)]\tLoss: 0.224081\n",
461 |       "Train Epoch: 2 [21120/40479 (65%)]\tLoss: 0.226633\n",
462 |       "Train Epoch: 2 [21760/40479 (67%)]\tLoss: 0.209731\n",
463 |       "Train Epoch: 2 [22400/40479 (69%)]\tLoss: 0.196602\n",
464 |       "Train Epoch: 2 [23040/40479 (71%)]\tLoss: 0.224780\n",
465 |       "Train Epoch: 2 [23680/40479 (73%)]\tLoss: 0.221504\n",
466 |       "Train Epoch: 2 [24320/40479 (75%)]\tLoss: 0.196250\n",
467 |       "Train Epoch: 2 [24960/40479 (77%)]\tLoss: 0.209817\n",
468 |       "Train Epoch: 2 [25600/40479 (79%)]\tLoss: 0.215482\n",
469 |       "Train Epoch: 2 [26240/40479 (81%)]\tLoss: 0.245271\n",
470 |       "Train Epoch: 2 [26880/40479 (83%)]\tLoss: 0.240716\n",
471 |       "Train Epoch: 2 [27520/40479 (85%)]\tLoss: 0.234947\n",
472 |       "Train Epoch: 2 [28160/40479 (87%)]\tLoss: 0.207777\n",
473 |       "Train Epoch: 2 [28800/40479 (89%)]\tLoss: 0.217220\n",
474 |       "Train Epoch: 2 [29440/40479 (91%)]\tLoss: 0.225086\n",
475 |       "Train Epoch: 2 [30080/40479 (93%)]\tLoss: 0.200565\n",
476 |       "Train Epoch: 2 [30720/40479 (95%)]\tLoss: 0.245808\n",
477 |       "Train Epoch: 2 [31360/40479 (97%)]\tLoss: 0.217046\n",
478 |       "Train Epoch: 2 [32000/40479 (99%)]\tLoss: 0.228471\n",
479 |       "Train Epoch: 3 [0/40479 (0%)]\tLoss: 0.251557\n",
480 |       "Train Epoch: 3 [640/40479 (2%)]\tLoss: 0.232586\n",
481 |       "Train Epoch: 3 [1280/40479 (4%)]\tLoss: 0.202972\n",
482 |       "Train Epoch: 3 [1920/40479 (6%)]\tLoss: 0.223908\n",
483 |       "Train Epoch: 3 [2560/40479 (8%)]\tLoss: 0.201460\n",
484 |       "Train Epoch: 3 [3200/40479 (10%)]\tLoss: 0.214049\n",
485 |       "Train Epoch: 3 [3840/40479 (12%)]\tLoss: 0.208666\n",
486 |       "Train Epoch: 3 [4480/40479 (14%)]\tLoss: 0.195132\n",
487 |       "Train Epoch: 3 [5120/40479 (16%)]\tLoss: 0.253805\n",
488 |       "Train Epoch: 3 [5760/40479 (18%)]\tLoss: 0.200275\n",
489 |       "Train Epoch: 3 [6400/40479 (20%)]\tLoss: 0.211788\n",
490 |       "Train Epoch: 3 [7040/40479 (22%)]\tLoss: 0.223072\n",
491 |       "Train Epoch: 3 [7680/40479 (24%)]\tLoss: 0.238602\n",
492 |       "Train Epoch: 3 [8320/40479 (26%)]\tLoss: 0.219171\n",
493 |       "Train Epoch: 3 [8960/40479 (28%)]\tLoss: 0.233595\n",
494 |       "Train Epoch: 3 [9600/40479 (30%)]\tLoss: 0.215720\n",
495 |       "Train Epoch: 3 [10240/40479 (32%)]\tLoss: 0.237069\n",
496 |       "Train Epoch: 3 [10880/40479 (34%)]\tLoss: 0.219968\n",
497 |       "Train Epoch: 3 [11520/40479 (36%)]\tLoss: 0.210888\n",
498 |       "Train Epoch: 3 [12160/40479 (38%)]\tLoss: 0.251297\n",
499 |       "Train Epoch: 3 [12800/40479 (40%)]\tLoss: 0.202935\n",
500 |       "Train Epoch: 3 [13440/40479 (42%)]\tLoss: 0.212312\n",
501 |       "Train Epoch: 3 [14080/40479 (43%)]\tLoss: 0.232535\n",
502 |       "Train Epoch: 3 [14720/40479 (45%)]\tLoss: 0.229978\n",
503 |       "Train Epoch: 3 [15360/40479 (47%)]\tLoss: 0.226644\n",
504 |       "Train Epoch: 3 [16000/40479 (49%)]\tLoss: 0.222986\n",
505 |       "Train Epoch: 3 [16640/40479 (51%)]\tLoss: 0.216303\n",
506 |       "Train Epoch: 3 [17280/40479 (53%)]\tLoss: 0.214122\n",
507 |       "Train Epoch: 3 [17920/40479 (55%)]\tLoss: 0.226599\n",
508 |       "Train Epoch: 3 [18560/40479 (57%)]\tLoss: 0.245155\n",
509 |       "Train Epoch: 3 [19200/40479 (59%)]\tLoss: 0.217142\n",
510 |       "Train Epoch: 3 [19840/40479 (61%)]\tLoss: 0.237862\n",
511 |       "Train Epoch: 3 [20480/40479 (63%)]\tLoss: 0.210611\n",
512 |       "Train Epoch: 3 [21120/40479 (65%)]\tLoss: 0.210613\n",
513 |       "Train Epoch: 3 [21760/40479 (67%)]\tLoss: 0.220819\n",
514 |       "Train Epoch: 3 [22400/40479 (69%)]\tLoss: 0.206627\n",
515 |       "Train Epoch: 3 [23040/40479 (71%)]\tLoss: 0.218339\n",
516 |       "Train Epoch: 3 [23680/40479 (73%)]\tLoss: 0.211441\n",
517 |       "Train Epoch: 3 [24320/40479 (75%)]\tLoss: 0.205327\n",
518 |       "Train Epoch: 3 [24960/40479 (77%)]\tLoss: 0.204353\n",
519 |       "Train Epoch: 3 [25600/40479 (79%)]\tLoss: 0.206513\n",
520 |       "Train Epoch: 3 [26240/40479 (81%)]\tLoss: 0.217377\n",
521 |       "Train Epoch: 3 [26880/40479 (83%)]\tLoss: 0.227448\n",
522 |       "Train Epoch: 3 [27520/40479 (85%)]\tLoss: 0.204768\n",
523 |       "Train Epoch: 3 [28160/40479 (87%)]\tLoss: 0.212927\n",
524 |       "Train Epoch: 3 [28800/40479 (89%)]\tLoss: 0.238617\n",
525 |       "Train Epoch: 3 [29440/40479 (91%)]\tLoss: 0.206868\n",
526 |       "Train Epoch: 3 [30080/40479 (93%)]\tLoss: 0.224896\n",
527 |       "Train Epoch: 3 [30720/40479 (95%)]\tLoss: 0.215602\n",
528 |       "Train Epoch: 3 [31360/40479 (97%)]\tLoss: 0.230982\n",
529 |       "Train Epoch: 3 [32000/40479 (99%)]\tLoss: 0.221313\n",
530 |       "Train Epoch: 4 [0/40479 (0%)]\tLoss: 0.209418\n",
531 |       "Train Epoch: 4 [640/40479 (2%)]\tLoss: 0.218788\n",
532 |       "Train Epoch: 4 [1280/40479 (4%)]\tLoss: 0.244478\n",
533 |       "Train Epoch: 4 [1920/40479 (6%)]\tLoss: 0.213261\n",
534 |       "Train Epoch: 4 [2560/40479 (8%)]\tLoss: 0.205852\n",
535 |       "Train Epoch: 4 [3200/40479 (10%)]\tLoss: 0.238808\n",
536 |       "Train Epoch: 4 [3840/40479 (12%)]\tLoss: 0.210391\n",
537 |       "Train Epoch: 4 [4480/40479 (14%)]\tLoss: 0.228555\n",
538 |       "Train Epoch: 4 [5120/40479 (16%)]\tLoss: 0.202205\n",
539 |       "Train Epoch: 4 [5760/40479 (18%)]\tLoss: 0.244985\n",
540 |       "Train Epoch: 4 [6400/40479 (20%)]\tLoss: 0.225250\n",
541 |       "Train Epoch: 4 [7040/40479 (22%)]\tLoss: 0.228128\n",
542 |       "Train Epoch: 4 [7680/40479 (24%)]\tLoss: 0.201752\n"
543 |      ]
544 |     },
545 |     {
546 |      "name": "stdout",
547 |      "output_type": "stream",
548 |      "text": [
549 |       "Train Epoch: 4 [8320/40479 (26%)]\tLoss: 0.192985\n",
550 |       "Train Epoch: 4 [8960/40479 (28%)]\tLoss: 0.232153\n",
551 |       "Train Epoch: 4 [9600/40479 (30%)]\tLoss: 0.209456\n",
552 |       "Train Epoch: 4 [10240/40479 (32%)]\tLoss: 0.212727\n",
553 |       "Train Epoch: 4 [10880/40479 (34%)]\tLoss: 0.218378\n",
554 |       "Train Epoch: 4 [11520/40479 (36%)]\tLoss: 0.216620\n",
555 |       "Train Epoch: 4 [12160/40479 (38%)]\tLoss: 0.208027\n",
556 |       "Train Epoch: 4 [12800/40479 (40%)]\tLoss: 0.234140\n",
557 |       "Train Epoch: 4 [13440/40479 (42%)]\tLoss: 0.216869\n",
558 |       "Train Epoch: 4 [14080/40479 (43%)]\tLoss: 0.192126\n",
559 |       "Train Epoch: 4 [14720/40479 (45%)]\tLoss: 0.213921\n",
560 |       "Train Epoch: 4 [15360/40479 (47%)]\tLoss: 0.208655\n",
561 |       "Train Epoch: 4 [16000/40479 (49%)]\tLoss: 0.221401\n",
562 |       "Train Epoch: 4 [16640/40479 (51%)]\tLoss: 0.226007\n",
563 |       "Train Epoch: 4 [17280/40479 (53%)]\tLoss: 0.245658\n",
564 |       "Train Epoch: 4 [17920/40479 (55%)]\tLoss: 0.216689\n",
565 |       "Train Epoch: 4 [18560/40479 (57%)]\tLoss: 0.204786\n",
566 |       "Train Epoch: 4 [19200/40479 (59%)]\tLoss: 0.231551\n",
567 |       "Train Epoch: 4 [19840/40479 (61%)]\tLoss: 0.233882\n",
568 |       "Train Epoch: 4 [20480/40479 (63%)]\tLoss: 0.237672\n",
569 |       "Train Epoch: 4 [21120/40479 (65%)]\tLoss: 0.229086\n",
570 |       "Train Epoch: 4 [21760/40479 (67%)]\tLoss: 0.223865\n",
571 |       "Train Epoch: 4 [22400/40479 (69%)]\tLoss: 0.223414\n",
572 |       "Train Epoch: 4 [23040/40479 (71%)]\tLoss: 0.232349\n",
573 |       "Train Epoch: 4 [23680/40479 (73%)]\tLoss: 0.221572\n",
574 |       "Train Epoch: 4 [24320/40479 (75%)]\tLoss: 0.216628\n",
575 |       "Train Epoch: 4 [24960/40479 (77%)]\tLoss: 0.214147\n",
576 |       "Train Epoch: 4 [25600/40479 (79%)]\tLoss: 0.225763\n",
577 |       "Train Epoch: 4 [26240/40479 (81%)]\tLoss: 0.213933\n",
578 |       "Train Epoch: 4 [26880/40479 (83%)]\tLoss: 0.208559\n",
579 |       "Train Epoch: 4 [27520/40479 (85%)]\tLoss: 0.212612\n",
580 |       "Train Epoch: 4 [28160/40479 (87%)]\tLoss: 0.209097\n",
581 |       "Train Epoch: 4 [28800/40479 (89%)]\tLoss: 0.228192\n",
582 |       "Train Epoch: 4 [29440/40479 (91%)]\tLoss: 0.221341\n",
583 |       "Train Epoch: 4 [30080/40479 (93%)]\tLoss: 0.219709\n",
584 |       "Train Epoch: 4 [30720/40479 (95%)]\tLoss: 0.216520\n",
585 |       "Train Epoch: 4 [31360/40479 (97%)]\tLoss: 0.206181\n",
586 |       "Train Epoch: 4 [32000/40479 (99%)]\tLoss: 0.200449\n",
587 |       "Train Epoch: 5 [0/40479 (0%)]\tLoss: 0.202878\n",
588 |       "Train Epoch: 5 [640/40479 (2%)]\tLoss: 0.223886\n",
589 |       "Train Epoch: 5 [1280/40479 (4%)]\tLoss: 0.232988\n",
590 |       "Train Epoch: 5 [1920/40479 (6%)]\tLoss: 0.212176\n",
591 |       "Train Epoch: 5 [2560/40479 (8%)]\tLoss: 0.208125\n",
592 |       "Train Epoch: 5 [3200/40479 (10%)]\tLoss: 0.219078\n",
593 |       "Train Epoch: 5 [3840/40479 (12%)]\tLoss: 0.197650\n",
594 |       "Train Epoch: 5 [4480/40479 (14%)]\tLoss: 0.218611\n",
595 |       "Train Epoch: 5 [5120/40479 (16%)]\tLoss: 0.194874\n",
596 |       "Train Epoch: 5 [5760/40479 (18%)]\tLoss: 0.238438\n",
597 |       "Train Epoch: 5 [6400/40479 (20%)]\tLoss: 0.204757\n",
598 |       "Train Epoch: 5 [7040/40479 (22%)]\tLoss: 0.207183\n",
599 |       "Train Epoch: 5 [7680/40479 (24%)]\tLoss: 0.205667\n",
600 |       "Train Epoch: 5 [8320/40479 (26%)]\tLoss: 0.218935\n",
601 |       "Train Epoch: 5 [8960/40479 (28%)]\tLoss: 0.214910\n",
602 |       "Train Epoch: 5 [9600/40479 (30%)]\tLoss: 0.245063\n",
603 |       "Train Epoch: 5 [10240/40479 (32%)]\tLoss: 0.212979\n",
604 |       "Train Epoch: 5 [10880/40479 (34%)]\tLoss: 0.214062\n",
605 |       "Train Epoch: 5 [11520/40479 (36%)]\tLoss: 0.214615\n",
606 |       "Train Epoch: 5 [12160/40479 (38%)]\tLoss: 0.208764\n",
607 |       "Train Epoch: 5 [12800/40479 (40%)]\tLoss: 0.216662\n",
608 |       "Train Epoch: 5 [13440/40479 (42%)]\tLoss: 0.225370\n",
609 |       "Train Epoch: 5 [14080/40479 (43%)]\tLoss: 0.225904\n",
610 |       "Train Epoch: 5 [14720/40479 (45%)]\tLoss: 0.237777\n",
611 |       "Train Epoch: 5 [15360/40479 (47%)]\tLoss: 0.220133\n",
612 |       "Train Epoch: 5 [16000/40479 (49%)]\tLoss: 0.230607\n",
613 |       "Train Epoch: 5 [16640/40479 (51%)]\tLoss: 0.222831\n",
614 |       "Train Epoch: 5 [17280/40479 (53%)]\tLoss: 0.246762\n",
615 |       "Train Epoch: 5 [17920/40479 (55%)]\tLoss: 0.230246\n",
616 |       "Train Epoch: 5 [18560/40479 (57%)]\tLoss: 0.216454\n",
617 |       "Train Epoch: 5 [19200/40479 (59%)]\tLoss: 0.229916\n",
618 |       "Train Epoch: 5 [19840/40479 (61%)]\tLoss: 0.201469\n",
619 |       "Train Epoch: 5 [20480/40479 (63%)]\tLoss: 0.197106\n",
620 |       "Train Epoch: 5 [21120/40479 (65%)]\tLoss: 0.205562\n",
621 |       "Train Epoch: 5 [21760/40479 (67%)]\tLoss: 0.213962\n",
622 |       "Train Epoch: 5 [22400/40479 (69%)]\tLoss: 0.211499\n",
623 |       "Train Epoch: 5 [23040/40479 (71%)]\tLoss: 0.205921\n",
624 |       "Train Epoch: 5 [23680/40479 (73%)]\tLoss: 0.224541\n",
625 |       "Train Epoch: 5 [24320/40479 (75%)]\tLoss: 0.203990\n",
626 |       "Train Epoch: 5 [24960/40479 (77%)]\tLoss: 0.200465\n",
627 |       "Train Epoch: 5 [25600/40479 (79%)]\tLoss: 0.233774\n",
628 |       "Train Epoch: 5 [26240/40479 (81%)]\tLoss: 0.238049\n",
629 |       "Train Epoch: 5 [26880/40479 (83%)]\tLoss: 0.215357\n",
630 |       "Train Epoch: 5 [27520/40479 (85%)]\tLoss: 0.239355\n",
631 |       "Train Epoch: 5 [28160/40479 (87%)]\tLoss: 0.206513\n",
632 |       "Train Epoch: 5 [28800/40479 (89%)]\tLoss: 0.223596\n",
633 |       "Train Epoch: 5 [29440/40479 (91%)]\tLoss: 0.219268\n",
634 |       "Train Epoch: 5 [30080/40479 (93%)]\tLoss: 0.217147\n",
635 |       "Train Epoch: 5 [30720/40479 (95%)]\tLoss: 0.222204\n",
636 |       "Train Epoch: 5 [31360/40479 (97%)]\tLoss: 0.244053\n",
637 |       "Train Epoch: 5 [32000/40479 (99%)]\tLoss: 0.200348\n",
638 |       "Train Epoch: 6 [0/40479 (0%)]\tLoss: 0.221157\n",
639 |       "Train Epoch: 6 [640/40479 (2%)]\tLoss: 0.252395\n",
640 |       "Train Epoch: 6 [1280/40479 (4%)]\tLoss: 0.241545\n",
641 |       "Train Epoch: 6 [1920/40479 (6%)]\tLoss: 0.228716\n",
642 |       "Train Epoch: 6 [2560/40479 (8%)]\tLoss: 0.191572\n",
643 |       "Train Epoch: 6 [3200/40479 (10%)]\tLoss: 0.194431\n",
644 |       "Train Epoch: 6 [3840/40479 (12%)]\tLoss: 0.239263\n",
645 |       "Train Epoch: 6 [4480/40479 (14%)]\tLoss: 0.207937\n",
646 |       "Train Epoch: 6 [5120/40479 (16%)]\tLoss: 0.201746\n",
647 |       "Train Epoch: 6 [5760/40479 (18%)]\tLoss: 0.223089\n",
648 |       "Train Epoch: 6 [6400/40479 (20%)]\tLoss: 0.215151\n",
649 |       "Train Epoch: 6 [7040/40479 (22%)]\tLoss: 0.210253\n",
650 |       "Train Epoch: 6 [7680/40479 (24%)]\tLoss: 0.230108\n",
651 |       "Train Epoch: 6 [8320/40479 (26%)]\tLoss: 0.209366\n",
652 |       "Train Epoch: 6 [8960/40479 (28%)]\tLoss: 0.195442\n",
653 |       "Train Epoch: 6 [9600/40479 (30%)]\tLoss: 0.225066\n",
654 |       "Train Epoch: 6 [10240/40479 (32%)]\tLoss: 0.206362\n",
655 |       "Train Epoch: 6 [10880/40479 (34%)]\tLoss: 0.231513\n",
656 |       "Train Epoch: 6 [11520/40479 (36%)]\tLoss: 0.224703\n",
657 |       "Train Epoch: 6 [12160/40479 (38%)]\tLoss: 0.223797\n",
658 |       "Train Epoch: 6 [12800/40479 (40%)]\tLoss: 0.206543\n",
659 |       "Train Epoch: 6 [13440/40479 (42%)]\tLoss: 0.249504\n",
660 |       "Train Epoch: 6 [14080/40479 (43%)]\tLoss: 0.215576\n",
661 |       "Train Epoch: 6 [14720/40479 (45%)]\tLoss: 0.198119\n",
662 |       "Train Epoch: 6 [15360/40479 (47%)]\tLoss: 0.208393\n",
663 |       "Train Epoch: 6 [16000/40479 (49%)]\tLoss: 0.214696\n",
664 |       "Train Epoch: 6 [16640/40479 (51%)]\tLoss: 0.203025\n",
665 |       "Train Epoch: 6 [17280/40479 (53%)]\tLoss: 0.192496\n",
666 |       "Train Epoch: 6 [17920/40479 (55%)]\tLoss: 0.239695\n",
667 |       "Train Epoch: 6 [18560/40479 (57%)]\tLoss: 0.230435\n",
668 |       "Train Epoch: 6 [19200/40479 (59%)]\tLoss: 0.204453\n",
669 |       "Train Epoch: 6 [19840/40479 (61%)]\tLoss: 0.234823\n",
670 |       "Train Epoch: 6 [20480/40479 (63%)]\tLoss: 0.226867\n",
671 |       "Train Epoch: 6 [21120/40479 (65%)]\tLoss: 0.200829\n",
672 |       "Train Epoch: 6 [21760/40479 (67%)]\tLoss: 0.227352\n",
673 |       "Train Epoch: 6 [22400/40479 (69%)]\tLoss: 0.254058\n",
674 |       "Train Epoch: 6 [23040/40479 (71%)]\tLoss: 0.200713\n",
675 |       "Train Epoch: 6 [23680/40479 (73%)]\tLoss: 0.219419\n",
676 |       "Train Epoch: 6 [24320/40479 (75%)]\tLoss: 0.211055\n",
677 |       "Train Epoch: 6 [24960/40479 (77%)]\tLoss: 0.224809\n",
678 |       "Train Epoch: 6 [25600/40479 (79%)]\tLoss: 0.229782\n",
679 |       "Train Epoch: 6 [26240/40479 (81%)]\tLoss: 0.224255\n",
680 |       "Train Epoch: 6 [26880/40479 (83%)]\tLoss: 0.214961\n",
681 |       "Train Epoch: 6 [27520/40479 (85%)]\tLoss: 0.235325\n",
682 |       "Train Epoch: 6 [28160/40479 (87%)]\tLoss: 0.232684\n",
683 |       "Train Epoch: 6 [28800/40479 (89%)]\tLoss: 0.219771\n",
684 |       "Train Epoch: 6 [29440/40479 (91%)]\tLoss: 0.206950\n",
685 |       "Train Epoch: 6 [30080/40479 (93%)]\tLoss: 0.206973\n",
686 |       "Train Epoch: 6 [30720/40479 (95%)]\tLoss: 0.205198\n",
687 |       "Train Epoch: 6 [31360/40479 (97%)]\tLoss: 0.202008\n",
688 |       "Train Epoch: 6 [32000/40479 (99%)]\tLoss: 0.237647\n",
689 |       "Train Epoch: 7 [0/40479 (0%)]\tLoss: 0.238941\n",
690 |       "Train Epoch: 7 [640/40479 (2%)]\tLoss: 0.206254\n",
691 |       "Train Epoch: 7 [1280/40479 (4%)]\tLoss: 0.202189\n",
692 |       "Train Epoch: 7 [1920/40479 (6%)]\tLoss: 0.222286\n",
693 |       "Train Epoch: 7 [2560/40479 (8%)]\tLoss: 0.212710\n",
694 |       "Train Epoch: 7 [3200/40479 (10%)]\tLoss: 0.261827\n",
695 |       "Train Epoch: 7 [3840/40479 (12%)]\tLoss: 0.233636\n",
696 |       "Train Epoch: 7 [4480/40479 (14%)]\tLoss: 0.217446\n",
697 |       "Train Epoch: 7 [5120/40479 (16%)]\tLoss: 0.197451\n",
698 |       "Train Epoch: 7 [5760/40479 (18%)]\tLoss: 0.218938\n",
699 |       "Train Epoch: 7 [6400/40479 (20%)]\tLoss: 0.206823\n",
700 |       "Train Epoch: 7 [7040/40479 (22%)]\tLoss: 0.215967\n",
701 |       "Train Epoch: 7 [7680/40479 (24%)]\tLoss: 0.234034\n",
702 |       "Train Epoch: 7 [8320/40479 (26%)]\tLoss: 0.222782\n",
703 |       "Train Epoch: 7 [8960/40479 (28%)]\tLoss: 0.221467\n",
704 |       "Train Epoch: 7 [9600/40479 (30%)]\tLoss: 0.215337\n",
705 |       "Train Epoch: 7 [10240/40479 (32%)]\tLoss: 0.225604\n",
706 |       "Train Epoch: 7 [10880/40479 (34%)]\tLoss: 0.243185\n",
707 |       "Train Epoch: 7 [11520/40479 (36%)]\tLoss: 0.216148\n",
708 |       "Train Epoch: 7 [12160/40479 (38%)]\tLoss: 0.229720\n",
709 |       "Train Epoch: 7 [12800/40479 (40%)]\tLoss: 0.205371\n",
710 |       "Train Epoch: 7 [13440/40479 (42%)]\tLoss: 0.222294\n",
711 |       "Train Epoch: 7 [14080/40479 (43%)]\tLoss: 0.223919\n",
712 |       "Train Epoch: 7 [14720/40479 (45%)]\tLoss: 0.215905\n",
713 |       "Train Epoch: 7 [15360/40479 (47%)]\tLoss: 0.219890\n",
714 |       "Train Epoch: 7 [16000/40479 (49%)]\tLoss: 0.232056\n"
715 |      ]
716 |     },
717 |     {
718 |      "name": "stdout",
719 |      "output_type": "stream",
720 |      "text": [
721 |       "Train Epoch: 7 [16640/40479 (51%)]\tLoss: 0.211867\n",
722 |       "Train Epoch: 7 [17280/40479 (53%)]\tLoss: 0.213061\n",
723 |       "Train Epoch: 7 [17920/40479 (55%)]\tLoss: 0.182352\n",
724 |       "Train Epoch: 7 [18560/40479 (57%)]\tLoss: 0.204680\n",
725 |       "Train Epoch: 7 [19200/40479 (59%)]\tLoss: 0.204601\n",
726 |       "Train Epoch: 7 [19840/40479 (61%)]\tLoss: 0.204036\n",
727 |       "Train Epoch: 7 [20480/40479 (63%)]\tLoss: 0.214126\n",
728 |       "Train Epoch: 7 [21120/40479 (65%)]\tLoss: 0.235875\n",
729 |       "Train Epoch: 7 [21760/40479 (67%)]\tLoss: 0.211310\n",
730 |       "Train Epoch: 7 [22400/40479 (69%)]\tLoss: 0.219646\n",
731 |       "Train Epoch: 7 [23040/40479 (71%)]\tLoss: 0.223003\n",
732 |       "Train Epoch: 7 [23680/40479 (73%)]\tLoss: 0.220125\n",
733 |       "Train Epoch: 7 [24320/40479 (75%)]\tLoss: 0.221888\n",
734 |       "Train Epoch: 7 [24960/40479 (77%)]\tLoss: 0.193348\n",
735 |       "Train Epoch: 7 [25600/40479 (79%)]\tLoss: 0.231392\n",
736 |       "Train Epoch: 7 [26240/40479 (81%)]\tLoss: 0.215880\n",
737 |       "Train Epoch: 7 [26880/40479 (83%)]\tLoss: 0.220085\n",
738 |       "Train Epoch: 7 [27520/40479 (85%)]\tLoss: 0.259755\n",
739 |       "Train Epoch: 7 [28160/40479 (87%)]\tLoss: 0.229210\n",
740 |       "Train Epoch: 7 [28800/40479 (89%)]\tLoss: 0.228965\n",
741 |       "Train Epoch: 7 [29440/40479 (91%)]\tLoss: 0.238347\n",
742 |       "Train Epoch: 7 [30080/40479 (93%)]\tLoss: 0.237505\n",
743 |       "Train Epoch: 7 [30720/40479 (95%)]\tLoss: 0.242703\n",
744 |       "Train Epoch: 7 [31360/40479 (97%)]\tLoss: 0.201824\n",
745 |       "Train Epoch: 7 [32000/40479 (99%)]\tLoss: 0.222159\n",
746 |       "Train Epoch: 8 [0/40479 (0%)]\tLoss: 0.210300\n",
747 |       "Train Epoch: 8 [640/40479 (2%)]\tLoss: 0.213195\n",
748 |       "Train Epoch: 8 [1280/40479 (4%)]\tLoss: 0.236923\n",
749 |       "Train Epoch: 8 [1920/40479 (6%)]\tLoss: 0.214789\n",
750 |       "Train Epoch: 8 [2560/40479 (8%)]\tLoss: 0.189022\n",
751 |       "Train Epoch: 8 [3200/40479 (10%)]\tLoss: 0.202073\n",
752 |       "Train Epoch: 8 [3840/40479 (12%)]\tLoss: 0.223525\n",
753 |       "Train Epoch: 8 [4480/40479 (14%)]\tLoss: 0.212316\n",
754 |       "Train Epoch: 8 [5120/40479 (16%)]\tLoss: 0.197626\n",
755 |       "Train Epoch: 8 [5760/40479 (18%)]\tLoss: 0.229944\n",
756 |       "Train Epoch: 8 [6400/40479 (20%)]\tLoss: 0.208881\n",
757 |       "Train Epoch: 8 [7040/40479 (22%)]\tLoss: 0.222905\n",
758 |       "Train Epoch: 8 [7680/40479 (24%)]\tLoss: 0.215646\n",
759 |       "Train Epoch: 8 [8320/40479 (26%)]\tLoss: 0.208961\n",
760 |       "Train Epoch: 8 [8960/40479 (28%)]\tLoss: 0.201373\n",
761 |       "Train Epoch: 8 [9600/40479 (30%)]\tLoss: 0.187059\n",
762 |       "Train Epoch: 8 [10240/40479 (32%)]\tLoss: 0.205768\n",
763 |       "Train Epoch: 8 [10880/40479 (34%)]\tLoss: 0.195439\n",
764 |       "Train Epoch: 8 [11520/40479 (36%)]\tLoss: 0.229307\n",
765 |       "Train Epoch: 8 [12160/40479 (38%)]\tLoss: 0.233949\n",
766 |       "Train Epoch: 8 [12800/40479 (40%)]\tLoss: 0.208750\n",
767 |       "Train Epoch: 8 [13440/40479 (42%)]\tLoss: 0.210449\n",
768 |       "Train Epoch: 8 [14080/40479 (43%)]\tLoss: 0.223516\n",
769 |       "Train Epoch: 8 [14720/40479 (45%)]\tLoss: 0.224399\n",
770 |       "Train Epoch: 8 [15360/40479 (47%)]\tLoss: 0.222601\n",
771 |       "Train Epoch: 8 [16000/40479 (49%)]\tLoss: 0.215521\n",
772 |       "Train Epoch: 8 [16640/40479 (51%)]\tLoss: 0.217097\n",
773 |       "Train Epoch: 8 [17280/40479 (53%)]\tLoss: 0.210878\n",
774 |       "Train Epoch: 8 [17920/40479 (55%)]\tLoss: 0.240644\n",
775 |       "Train Epoch: 8 [18560/40479 (57%)]\tLoss: 0.234446\n",
776 |       "Train Epoch: 8 [19200/40479 (59%)]\tLoss: 0.220509\n",
777 |       "Train Epoch: 8 [19840/40479 (61%)]\tLoss: 0.214906\n",
778 |       "Train Epoch: 8 [20480/40479 (63%)]\tLoss: 0.231343\n",
779 |       "Train Epoch: 8 [21120/40479 (65%)]\tLoss: 0.221846\n",
780 |       "Train Epoch: 8 [21760/40479 (67%)]\tLoss: 0.231584\n",
781 |       "Train Epoch: 8 [22400/40479 (69%)]\tLoss: 0.201782\n",
782 |       "Train Epoch: 8 [23040/40479 (71%)]\tLoss: 0.234168\n",
783 |       "Train Epoch: 8 [23680/40479 (73%)]\tLoss: 0.225944\n",
784 |       "Train Epoch: 8 [24320/40479 (75%)]\tLoss: 0.219733\n",
785 |       "Train Epoch: 8 [24960/40479 (77%)]\tLoss: 0.200213\n",
786 |       "Train Epoch: 8 [25600/40479 (79%)]\tLoss: 0.222768\n",
787 |       "Train Epoch: 8 [26240/40479 (81%)]\tLoss: 0.224571\n",
788 |       "Train Epoch: 8 [26880/40479 (83%)]\tLoss: 0.227366\n",
789 |       "Train Epoch: 8 [27520/40479 (85%)]\tLoss: 0.192402\n",
790 |       "Train Epoch: 8 [28160/40479 (87%)]\tLoss: 0.206933\n",
791 |       "Train Epoch: 8 [28800/40479 (89%)]\tLoss: 0.222230\n",
792 |       "Train Epoch: 8 [29440/40479 (91%)]\tLoss: 0.241823\n",
793 |       "Train Epoch: 8 [30080/40479 (93%)]\tLoss: 0.219849\n",
794 |       "Train Epoch: 8 [30720/40479 (95%)]\tLoss: 0.223774\n",
795 |       "Train Epoch: 8 [31360/40479 (97%)]\tLoss: 0.221740\n",
796 |       "Train Epoch: 8 [32000/40479 (99%)]\tLoss: 0.226302\n",
797 |       "Train Epoch: 9 [0/40479 (0%)]\tLoss: 0.246171\n",
798 |       "Train Epoch: 9 [640/40479 (2%)]\tLoss: 0.194035\n",
799 |       "Train Epoch: 9 [1280/40479 (4%)]\tLoss: 0.226316\n",
800 |       "Train Epoch: 9 [1920/40479 (6%)]\tLoss: 0.219594\n",
801 |       "Train Epoch: 9 [2560/40479 (8%)]\tLoss: 0.192174\n",
802 |       "Train Epoch: 9 [3200/40479 (10%)]\tLoss: 0.208604\n",
803 |       "Train Epoch: 9 [3840/40479 (12%)]\tLoss: 0.238914\n",
804 |       "Train Epoch: 9 [4480/40479 (14%)]\tLoss: 0.227050\n",
805 |       "Train Epoch: 9 [5120/40479 (16%)]\tLoss: 0.218689\n",
806 |       "Train Epoch: 9 [5760/40479 (18%)]\tLoss: 0.241844\n",
807 |       "Train Epoch: 9 [6400/40479 (20%)]\tLoss: 0.230105\n",
808 |       "Train Epoch: 9 [7040/40479 (22%)]\tLoss: 0.221337\n",
809 |       "Train Epoch: 9 [7680/40479 (24%)]\tLoss: 0.226021\n",
810 |       "Train Epoch: 9 [8320/40479 (26%)]\tLoss: 0.206170\n",
811 |       "Train Epoch: 9 [8960/40479 (28%)]\tLoss: 0.189948\n",
812 |       "Train Epoch: 9 [9600/40479 (30%)]\tLoss: 0.218153\n",
813 |       "Train Epoch: 9 [10240/40479 (32%)]\tLoss: 0.221108\n",
814 |       "Train Epoch: 9 [10880/40479 (34%)]\tLoss: 0.200809\n",
815 |       "Train Epoch: 9 [11520/40479 (36%)]\tLoss: 0.224982\n",
816 |       "Train Epoch: 9 [12160/40479 (38%)]\tLoss: 0.232980\n",
817 |       "Train Epoch: 9 [12800/40479 (40%)]\tLoss: 0.199956\n",
818 |       "Train Epoch: 9 [13440/40479 (42%)]\tLoss: 0.225886\n",
819 |       "Train Epoch: 9 [14080/40479 (43%)]\tLoss: 0.234917\n",
820 |       "Train Epoch: 9 [14720/40479 (45%)]\tLoss: 0.215907\n",
821 |       "Train Epoch: 9 [15360/40479 (47%)]\tLoss: 0.201263\n",
822 |       "Train Epoch: 9 [16000/40479 (49%)]\tLoss: 0.200151\n",
823 |       "Train Epoch: 9 [16640/40479 (51%)]\tLoss: 0.200983\n",
824 |       "Train Epoch: 9 [17280/40479 (53%)]\tLoss: 0.199013\n",
825 |       "Train Epoch: 9 [17920/40479 (55%)]\tLoss: 0.238366\n",
826 |       "Train Epoch: 9 [18560/40479 (57%)]\tLoss: 0.212182\n",
827 |       "Train Epoch: 9 [19200/40479 (59%)]\tLoss: 0.216866\n",
828 |       "Train Epoch: 9 [19840/40479 (61%)]\tLoss: 0.223518\n",
829 |       "Train Epoch: 9 [20480/40479 (63%)]\tLoss: 0.206608\n",
830 |       "Train Epoch: 9 [21120/40479 (65%)]\tLoss: 0.221796\n",
831 |       "Train Epoch: 9 [21760/40479 (67%)]\tLoss: 0.243650\n",
832 |       "Train Epoch: 9 [22400/40479 (69%)]\tLoss: 0.215382\n",
833 |       "Train Epoch: 9 [23040/40479 (71%)]\tLoss: 0.212058\n",
834 |       "Train Epoch: 9 [23680/40479 (73%)]\tLoss: 0.225021\n",
835 |       "Train Epoch: 9 [24320/40479 (75%)]\tLoss: 0.227688\n",
836 |       "Train Epoch: 9 [24960/40479 (77%)]\tLoss: 0.235619\n",
837 |       "Train Epoch: 9 [25600/40479 (79%)]\tLoss: 0.238336\n",
838 |       "Train Epoch: 9 [26240/40479 (81%)]\tLoss: 0.207296\n",
839 |       "Train Epoch: 9 [26880/40479 (83%)]\tLoss: 0.208340\n",
840 |       "Train Epoch: 9 [27520/40479 (85%)]\tLoss: 0.220505\n",
841 |       "Train Epoch: 9 [28160/40479 (87%)]\tLoss: 0.222068\n",
842 |       "Train Epoch: 9 [28800/40479 (89%)]\tLoss: 0.221910\n",
843 |       "Train Epoch: 9 [29440/40479 (91%)]\tLoss: 0.185835\n",
844 |       "Train Epoch: 9 [30080/40479 (93%)]\tLoss: 0.206984\n",
845 |       "Train Epoch: 9 [30720/40479 (95%)]\tLoss: 0.236037\n",
846 |       "Train Epoch: 9 [31360/40479 (97%)]\tLoss: 0.230958\n",
847 |       "Train Epoch: 9 [32000/40479 (99%)]\tLoss: 0.197022\n"
848 |      ]
849 |     }
850 |    ],
851 |    "source": [
852 |     "for epoch in range(1, 10):\n",
853 |     "    train(epoch, train_loader, encoderCNN, decoderRNN, criterion, optimizer)"
854 |    ]
855 |   },
856 |   {
857 |    "cell_type": "code",
858 |    "execution_count": null,
859 |    "metadata": {
860 |     "collapsed": true
861 |    },
862 |    "outputs": [],
863 |    "source": []
864 |   },
865 |   {
866 |    "cell_type": "code",
867 |    "execution_count": null,
868 |    "metadata": {
869 |     "collapsed": true
870 |    },
871 |    "outputs": [],
872 |    "source": []
873 |   },
874 |   {
875 |    "cell_type": "code",
876 |    "execution_count": null,
877 |    "metadata": {
878 |     "collapsed": true
879 |    },
880 |    "outputs": [],
881 |    "source": []
882 |   },
883 |   {
884 |    "cell_type": "code",
885 |    "execution_count": null,
886 |    "metadata": {
887 |     "collapsed": true
888 |    },
889 |    "outputs": [],
890 |    "source": []
891 |   },
892 |   {
893 |    "cell_type": "code",
894 |    "execution_count": null,
895 |    "metadata": {
896 |     "collapsed": true
897 |    },
898 |    "outputs": [],
899 |    "source": []
900 |   }
901 |  ],
902 |  "metadata": {
903 |   "kernelspec": {
904 |    "display_name": "Python 3",
905 |    "language": "python",
906 |    "name": "python3"
907 |   },
908 |   "language_info": {
909 |    "codemirror_mode": {
910 |     "name": "ipython",
911 |     "version": 3
912 |    },
913 |    "file_extension": ".py",
914 |    "mimetype": "text/x-python",
915 |    "name": "python",
916 |    "nbconvert_exporter": "python",
917 |    "pygments_lexer": "ipython3",
918 |    "version": "3.6.0"
919 |   }
920 |  },
921 |  "nbformat": 4,
922 |  "nbformat_minor": 2
923 | }
924 | 


--------------------------------------------------------------------------------