├── source
    ├── models
    │   ├── __init__.py
    │   └── se_resnet.py
    ├── utils
    │   ├── __init__.py
    │   └── average_precision_calculator.py
    ├── datasets.py
    ├── config.yaml
    ├── prepare_dataset.py
    ├── generate_submission.py
    ├── train_se_resnet101.py
    └── tune_se_resnet101.py
├── .gitignore
├── Dockerfile
├── README.md
└── LICENSE


/source/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/source/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/source/datasets.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | from torch.utils.data import Dataset
 5 | from torchvision.datasets.folder import pil_loader
 6 | 
 7 | 
 8 | class LabeledImages(Dataset):
 9 |     def __init__(self, list_path, images_root, transform=None):
10 |         self._transform = transform
11 |         self._images_root = images_root
12 | 
13 |         self._content = []
14 |         self._labels_list = []
15 | 
16 |         with open(list_path, 'r') as f:
17 |             for line in f:
18 |                 parts = line.strip().split(' ')
19 | 
20 |                 self._content.append(parts[0])
21 |                 labels = [int(label) for label in parts[1:]]
22 |                 self._labels_list.append(labels)
23 | 
24 |     def __len__(self):
25 |         return len(self._content)
26 | 
27 |     def __getitem__(self, idx):
28 |         img_path = os.path.join(self._images_root, self._content[idx])
29 |         labels = np.array(self._labels_list[idx], dtype=np.int64)
30 | 
31 |         img = pil_loader(img_path)
32 |         if self._transform is not None:
33 |             img = self._transform(img)
34 | 
35 |         return {'image': img, 'labels': labels}


--------------------------------------------------------------------------------
/source/config.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   ORIGINAL_IMAGES_ROOT: "/original_images"
 3 |   RESIZED_IMAGES_ROOT: "/resized_images"
 4 |   SHORTEST_SIZE: 256
 5 |   HUMAN_LABELS_PATH: "/inclusive/train_human_labels.csv"
 6 |   TRAIN_LIST_PATH: "/artifacts/train.txt"
 7 |   VAL_LIST_PATH: "/artifacts/val.txt"
 8 |   VAL_RATIO: 0.01
 9 |   TUNING_LABELS_PATH: "/inclusive/tuning_labels.csv"
10 |   TUNING_LIST_PATH: "/artifacts/tuning.txt"
11 |   TUNING_TRAIN_RATIO: 0.99
12 |   NAME_TO_LABEL_PATH: "/artifacts/name_to_label.pkl"
13 |   STAGE1_TEST_IMAGES_ROOT: "/inclusive/stage_1_test_images"
14 |   STAGE1_SAMPLE_SUBMISSION_PATH: "/inclusive/stage_1_sample_submission.csv"
15 |   STAGE2_TEST_IMAGES_ROOT: "/inclusive/stage_2_test_images"
16 |   STAGE2_SAMPLE_SUBMISSION_PATH: "/inclusive/stage_2_sample_submission.csv"
17 | TRAINING:
18 |   SEED: 0xDEADFACE
19 |   ARTIFACTS_ROOT: "/artifacts/training"
20 |   BATCH_SIZE: 32
21 |   INITIAL_LR: 0.01
22 |   LR_DECAY: 0.1
23 |   LR_STEP: 10
24 |   MOMENTUM: 0.9
25 |   WEIGHT_DECAY: 0.0001
26 |   N_EPOCH: 25
27 |   LOG_FREQUENCY: 100
28 | VALIDATION:
29 |   TOP_K: 150
30 | TUNING:
31 |   SEED: 0xDEADFACE
32 |   ARTIFACTS_ROOT: "/artifacts/tuning"
33 |   BATCH_SIZE: 32
34 |   INITIAL_LR: 0.0001
35 |   LR_DECAY: 0.1
36 |   LR_STEP: 100
37 |   MOMENTUM: 0.9
38 |   WEIGHT_DECAY: 0.0001
39 |   N_EPOCH: 80
40 |   LOG_FREQUENCY: 30
41 | SUBMISSION:
42 |   OUTPUT_ROOT: "/output"
43 |   BATCH_SIZE: 32
44 |   TOP_K: 150
45 |   MIN_PREDS: 2
46 |   MAX_PREDS: 5
47 |   THRESHOLD: 0.55
48 | 
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:8.0-cudnn7-devel-ubuntu16.04
 2 | 
 3 | SHELL ["/bin/bash", "-c"]
 4 | 
 5 | RUN rm -rf /var/lib/apt/lists/* \
 6 |            /etc/apt/sources.list.d/cuda.list \
 7 |            /etc/apt/sources.list.d/nvidia-ml.list && \
 8 |     apt-get update && \
 9 |     DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
10 |         build-essential \
11 |         ca-certificates \
12 |         cmake \
13 |         wget \
14 |         git \
15 |         vim \
16 |         nano \
17 |         less \
18 |         tmux \
19 |         htop \
20 |         screen \
21 |         curl \
22 |         mc \
23 |         openssh-server \
24 |         openssh-client && \
25 |     DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
26 |         python3 \
27 |         python3-dev && \
28 |     wget -O ~/get-pip.py \
29 |         https://bootstrap.pypa.io/get-pip.py && \
30 |     python3 ~/get-pip.py && \
31 |     pip3 --no-cache-dir install \
32 |         setuptools \
33 |         numpy==1.14.1 \
34 |         scipy==1.0.0 \
35 |         matplotlib==2.1.2 \
36 |         pandas==0.22.0 \
37 |         scikit-learn==0.19.1 \
38 |         opencv-python==3.2.0.8 \
39 |         Cython==0.27.3 \
40 |         jupyterlab==0.32.1 \
41 |         pyyaml==3.12 \
42 |         scikit-image>=0.9.3 \
43 |         h5py>=2.2.0 \
44 |         networkx>=1.8.1 \
45 |         nose>=1.3.0 \
46 |         pytest && \
47 |     cd /tmp && \
48 |     git clone https://github.com/pytorch/pytorch.git && \
49 |     cd pytorch && \
50 |     git checkout v0.4.1 && \
51 |     git submodule update --init --recursive && \
52 |     python3 setup.py install && \
53 |     pip --no-cache-dir install \
54 |         torchvision==0.2.1 \
55 |         tensorboardX==1.2 && \
56 |     printf "export LC_ALL=C.UTF-8\n" >> /etc/environment && \
57 |     apt-get clean && \
58 |     apt-get autoremove && \
59 |     rm -rf /var/lib/apt/lists/* /tmp/*
60 | 
61 | COPY ./source /source
62 | 


--------------------------------------------------------------------------------
/source/prepare_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | import yaml
 4 | import random
 5 | from collections import defaultdict
 6 | import cv2
 7 | 
 8 | 
 9 | CONFIG_PATH = '/source/config.yaml'
10 | LOG_FREQUENCY = 10000
11 | 
12 | 
13 | def main():
14 |     with open(CONFIG_PATH, 'r') as f:
15 |         config = yaml.load(f)
16 | 
17 |     name_to_label = {}
18 |     img_path_to_labels = defaultdict(list)
19 | 
20 |     with open(config['DATASET']['HUMAN_LABELS_PATH'], 'r') as f:
21 |         f.readline()
22 |         for line in f:
23 |             parts = line.strip().split(',')
24 |             img_path = os.path.join('train', parts[0] + '.jpg')
25 |             name = parts[2]
26 |             if name not in name_to_label:
27 |                 name_to_label[name] = len(name_to_label)
28 |             label = name_to_label[name]
29 |             img_path_to_labels[img_path].append(label)
30 | 
31 |     print('Total number of images: {}. Total number of labels: {}.'.format(len(img_path_to_labels), len(name_to_label)))
32 | 
33 |     content = sorted(img_path_to_labels)
34 |     print('Resizing images...')
35 |     for i, rel_path in enumerate(content):
36 |         src_path = os.path.join(config['DATASET']['ORIGINAL_IMAGES_ROOT'], rel_path)
37 |         dst_path = os.path.join(config['DATASET']['RESIZED_IMAGES_ROOT'], rel_path)
38 | 
39 |         os.makedirs(os.path.dirname(dst_path), exist_ok=True)
40 | 
41 |         img = cv2.imread(src_path)
42 |         height, width, channels = img.shape
43 |         if height < width:
44 |             dst_height = config['DATASET']['SHORTEST_SIZE']
45 |             dst_width = round((config['DATASET']['SHORTEST_SIZE'] / height) * width)
46 |         else:
47 |             dst_width = config['DATASET']['SHORTEST_SIZE']
48 |             dst_height = round((config['DATASET']['SHORTEST_SIZE'] / width) * height)
49 |         img = cv2.resize(img, (dst_width, dst_height))
50 | 
51 |         cv2.imwrite(dst_path, img)
52 |         if i % LOG_FREQUENCY == 0:
53 |             print('{} / {} processed'.format(i + 1, len(content)))
54 | 
55 |     random.seed(0xDEADFACE)
56 |     random.shuffle(content)
57 | 
58 |     n_val = round(config['DATASET']['VAL_RATIO'] * len(content))
59 |     n_train = len(content) - n_val
60 | 
61 |     train_content = content[:n_train]
62 |     val_content = content[n_train:]
63 | 
64 |     print('Train size: {}. Val size: {}.'.format(len(train_content), len(val_content)))
65 | 
66 |     with open(config['DATASET']['TRAIN_LIST_PATH'], 'w') as f:
67 |         for img_path in train_content:
68 |             labels = img_path_to_labels[img_path]
69 |             line = ' '.join([img_path] + [str(label) for label in labels])
70 |             f.write('{}\n'.format(line))
71 | 
72 |     with open(config['DATASET']['VAL_LIST_PATH'], 'w') as f:
73 |         for img_path in val_content:
74 |             labels = img_path_to_labels[img_path]
75 |             line = ' '.join([img_path] + [str(label) for label in labels])
76 |             f.write('{}\n'.format(line))
77 | 
78 |     with open(config['DATASET']['NAME_TO_LABEL_PATH'], 'wb') as f:
79 |         pickle.dump(name_to_label, f)
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     main()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # inclusive-images-challenge
 2 | 4th place solution for the [Inclusive Images Challenge on Kaggle](https://www.kaggle.com/c/inclusive-images-challenge).  
 3 | Private LB score: **0.33184**
 4 | ## The hardware I used
 5 | - CPU: Intel Core i7 5930k
 6 | - GPU: 1x NVIDIA GTX 1080
 7 | - RAM: 64 GB
 8 | - SSD: 2x 512GB
 9 | - HDD: 1x 3TB
10 | ## Prerequisites
11 | ### Environment
12 | The model was trained in the docker container. It is highly recommended to use [nvidia-docker2](https://github.com/NVIDIA/nvidia-docker) if you want to reproduce the result. 
13 | The code assumes that you have at least 1 NVIDIA GPU and CUDA 8 compatible driver. Run the following command to build the docker image:
14 | ```bash
15 | cd path/to/solution
16 | sudo docker build -t inclusive .
17 | ```
18 | ### Free space
19 | - HDD: ~600 GB (525 GB for the Open Images Training dataset + 71 GB for checkpoints, logs, etc)
20 | - SSD: ~100 GB (77 GB for the resized Open Images Training dataset + 13 GB for the competition data)
21 | ### Data
22 | Download the [open-images-dataset](https://www.kaggle.com/c/inclusive-images-challenge#Data-Download-&-Getting-Started) to `/path/to/hdd/open-images-dataset/train`
23 | ```bash
24 | mkdir -p /path/to/hdd/open-images-dataset
25 | cd /path/to/hdd/open-images-dataset
26 | aws s3 --no-sign-request sync s3://open-images-dataset/train train/
27 | ```
28 | Download the [inclusive-images-challenge-data](https://www.kaggle.com/c/inclusive-images-challenge/data) to `/path/to/ssd/inclusive-images-challenge/data`
29 | ```bash
30 | mkdir -p /path/to/ssd/inclusive-images-challenge/data
31 | cd /path/to/ssd/inclusive-images-challenge/data
32 | kaggle competitions download -c inclusive-images-challenge
33 | unzip train_human_labels.csv.zip
34 | unzip stage_1_sample_submission.csv.zip
35 | unzip stage_2_sample_submission.csv.zip
36 | unzip stage_1_test_images.zip -d stage_1_test_images
37 | unzip stage_2_images.zip -d stage_2_test_images
38 | ```
39 | _Note: there are some missing files in the Inclusive Images Challenge Stage 1 data. You have to delete them manually from the `tuning_labels.csv`_
40 | ### Output directories
41 | Create a directory for the resized Open Images Training dataset
42 | ```bash
43 | mkdir -p /path/to/ssd/open-images-dataset/train-resized
44 | ```
45 | Create a directory for training artifacts (checkpoints, logs, etc)
46 | ```bash
47 | mkdir -p /path/to/hdd/inclusive-images-challenge/artifacts
48 | ```
49 | Create a directory for the model output (submissions)
50 | ```bash
51 | mkdir -p /path/to/hdd/inclusive-images-challenge/output
52 | ```
53 | ## How to train the model
54 | Run the docker container with the paths correctly mounted
55 | ```bash
56 | sudo docker run --runtime=nvidia -i -t -d --rm --ipc=host -v /path/to/hdd/open-images-dataset:/original_images -v /path/to/ssd/open-images-dataset/train-resized:/resized_images -v /path/to/ssd/inclusive-images-challenge/data:/inclusive -v /path/to/hdd/inclusive-images-challenge/artifacts:/artifacts -v /path/to/hdd/inclusive-images-challenge/output:/output --name inclusive inclusive
57 | sudo docker exec -it inclusive /bin/bash
58 | ``` 
59 | Prepare the training dataset (inside the container)
60 | ```bash
61 | python3 /source/prepare_dataset.py
62 | ```
63 | Train the model on the Open Images Training dataset (inside the container)
64 | ```bash
65 | python3 /source/train_se_resnet101.py
66 | ```
67 | Finetune the model on the Inclusive Images Challenge Stage 1 tuning set (inside the container)
68 | ```bash
69 | python3 /source/tune_se_resnet101.py
70 | ```
71 | ## How to generate submissions
72 | Run the following command (inside the container)
73 | ```bash
74 | python3 /source/generate_submission.py stage_id (1 or 2)
75 | ```
76 | Submissions will appear in the output directory: `/path/to/hdd/inclusive-images-challenge/output`  
77 | The fastest way to get predictions for a new test dataset is to replace dataset from the second stage with new one.
78 | ## `source/config.yaml`
79 | This file specifies the path to the train, test, model, and output directories.
80 | - This is the only place that specifies the path to these directories.
81 | - Any code that is doing I/O uses the appropriate base paths from `config.yaml`  
82 | _Note: If you are using the docker container, then you do not need to change the paths in this file._
83 | ##  Serialized copy of the trained model
84 | You can download my artifacts folder which I used to generate my final submissions: [GoogleDrive](https://drive.google.com/file/d/1rg5m7xKXGdc3jnaI-QKLKtpwUPAmieeP/view?usp=sharing)
85 | 
86 | 


--------------------------------------------------------------------------------
/source/models/se_resnet.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | import math
  3 | 
  4 | 
  5 | class SELayer(nn.Module):
  6 | 
  7 |     def __init__(self, channel, reduction=16):
  8 |         super(SELayer, self).__init__()
  9 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 10 |         self.fc = nn.Sequential(
 11 |                 nn.Linear(channel, channel // reduction),
 12 |                 nn.ReLU(inplace=True),
 13 |                 nn.Linear(channel // reduction, channel),
 14 |                 nn.Sigmoid())
 15 | 
 16 |     def forward(self, x):
 17 |         b, c, _, _ = x.size()
 18 |         y = self.avg_pool(x).view(b, c)
 19 |         y = self.fc(y).view(b, c, 1, 1)
 20 |         return x * y
 21 | 
 22 | 
 23 | class SEBottleneck(nn.Module):
 24 |     expansion = 4
 25 | 
 26 |     def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=16):
 27 |         super(SEBottleneck, self).__init__()
 28 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
 29 |         self.bn1 = nn.BatchNorm2d(planes)
 30 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, bias=False)
 31 |         self.bn2 = nn.BatchNorm2d(planes)
 32 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
 33 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
 34 |         self.relu = nn.ReLU(inplace=True)
 35 |         self.se = SELayer(planes * self.expansion, reduction)
 36 |         self.downsample = downsample
 37 |         self.stride = stride
 38 | 
 39 |     def forward(self, x):
 40 |         residual = x
 41 | 
 42 |         out = self.conv1(x)
 43 |         out = self.bn1(out)
 44 |         out = self.relu(out)
 45 | 
 46 |         out = self.conv2(out)
 47 |         out = self.bn2(out)
 48 |         out = self.relu(out)
 49 | 
 50 |         out = self.conv3(out)
 51 |         out = self.bn3(out)
 52 |         out = self.se(out)
 53 | 
 54 |         if self.downsample is not None:
 55 |             residual = self.downsample(x)
 56 | 
 57 |         out += residual
 58 |         out = self.relu(out)
 59 | 
 60 |         return out
 61 | 
 62 | 
 63 | class SEResNet(nn.Module):
 64 | 
 65 |     def __init__(self, block, layers):
 66 |         self.inplanes = 64
 67 |         super(SEResNet, self).__init__()
 68 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 69 |                                bias=False)
 70 |         self.bn1 = nn.BatchNorm2d(64)
 71 |         self.relu = nn.ReLU(inplace=True)
 72 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
 73 | 
 74 |         self.layer1 = self._make_layer(block, 64, layers[0])
 75 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 76 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 77 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 78 |         self.avgpool = nn.AvgPool2d(5)
 79 | 
 80 |         for m in self.modules():
 81 |             if isinstance(m, nn.Conv2d):
 82 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 83 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 84 |             elif isinstance(m, nn.BatchNorm2d):
 85 |                 m.weight.data.fill_(1)
 86 |                 m.bias.data.zero_()
 87 | 
 88 |     def _make_layer(self, block, planes, blocks, stride=1):
 89 |         downsample = None
 90 |         if stride != 1 or self.inplanes != planes * block.expansion:
 91 |             downsample = nn.Sequential(
 92 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
 93 |                           kernel_size=1, stride=stride, padding=0, bias=False),
 94 |                 nn.BatchNorm2d(planes * block.expansion))
 95 | 
 96 |         layers = []
 97 |         layers.append(block(self.inplanes, planes, stride, downsample))
 98 |         self.inplanes = planes * block.expansion
 99 |         for i in range(1, blocks):
100 |             layers.append(block(self.inplanes, planes))
101 | 
102 |         return nn.Sequential(*layers)
103 | 
104 |     def forward(self, x):
105 |         x = self.conv1(x)
106 |         x = self.bn1(x)
107 |         x = self.relu(x)
108 | 
109 |         x = self.maxpool(x)
110 | 
111 |         x = self.layer1(x)
112 |         x = self.layer2(x)
113 |         x = self.layer3(x)
114 |         x = self.layer4(x)
115 | 
116 |         x = self.avgpool(x)
117 |         x = x.view(x.size(0), -1)
118 | 
119 |         return x
120 | 
121 | 
122 | def se_resnet101(**kwargs):
123 |     """Constructs a SE-ResNet-101 model.
124 |     """
125 |     model = SEResNet(SEBottleneck, [3, 4, 23, 3], **kwargs)
126 |     return model


--------------------------------------------------------------------------------
/source/generate_submission.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import yaml
  3 | import pickle
  4 | import argparse
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | torch.backends.cudnn.benchmark=True
  9 | from torch import nn
 10 | from torch.nn import functional as F
 11 | from torch.utils.data import DataLoader
 12 | 
 13 | from datasets import LabeledImages
 14 | from torchvision import transforms
 15 | from models.se_resnet import se_resnet101
 16 | 
 17 | 
 18 | CONFIG_PATH = '/source/config.yaml'
 19 | 
 20 | 
 21 | class Classifier(nn.Module):
 22 |     def __init__(self, n_classes):
 23 |         super(Classifier, self).__init__()
 24 |         self.n_classes = n_classes
 25 |         self.features = se_resnet101()
 26 |         self.classifier = nn.Linear(2048, n_classes)
 27 | 
 28 |     def forward(self, x):
 29 |         x = self.features(x)
 30 |         x = self.classifier(x)
 31 | 
 32 |         return x
 33 | 
 34 | 
 35 | class ModelWithLoss(nn.Module):
 36 |     def __init__(self, classifier):
 37 |         super(ModelWithLoss, self).__init__()
 38 |         self.classifier = classifier
 39 |         self.criterion = nn.CrossEntropyLoss(size_average=False)
 40 | 
 41 |     def forward(self, x, labels):
 42 |         assert len(x) == len(labels)
 43 | 
 44 |         predictions = self.classifier(x)
 45 | 
 46 |         all_classes = np.arange(self.classifier.n_classes, dtype=np.int64)
 47 |         zero_label = torch.tensor([0]).to(x.device)
 48 | 
 49 |         loss = 0
 50 |         denominator = 0
 51 |         for prediction, positives in zip(predictions, labels):
 52 |             negatives = np.setdiff1d(all_classes, positives, assume_unique=True)
 53 |             negatives_tensor = torch.tensor(negatives).to(x.device)
 54 |             positives_tensor = torch.tensor(positives).to(x.device).unsqueeze(dim=1)
 55 | 
 56 |             for positive in positives_tensor:
 57 |                 indices = torch.cat((positive, negatives_tensor))
 58 |                 loss = loss + self.criterion(prediction[indices].unsqueeze(dim=0), zero_label)
 59 |                 denominator += 1
 60 | 
 61 |         loss /= denominator
 62 | 
 63 |         return loss
 64 | 
 65 |     def predict(self, x, top_k):
 66 |         input_shape = x.shape
 67 |         if len(input_shape) == 5:
 68 |             x = x.view(-1, input_shape[2], input_shape[3], input_shape[4])
 69 |             predictions = self.classifier(x)
 70 |             predictions = predictions.view(input_shape[0], input_shape[1], -1).mean(dim=1)
 71 |         else:
 72 |             predictions = self.classifier(x)
 73 | 
 74 |         scores, labels = predictions.sort(dim=1, descending=True)
 75 | 
 76 |         pred_scores = np.zeros(shape=(len(scores), top_k), dtype=np.float32)
 77 |         pred_labels = labels[:, :top_k].cpu().numpy()
 78 | 
 79 |         for i in range(top_k):
 80 |             i_scores = torch.cat((scores[:, i:i + 1], scores[:, top_k:]), dim=1)
 81 |             pred_scores[:, i] = F.softmax(i_scores, dim=1)[:, 0].cpu().numpy()
 82 | 
 83 |         return pred_scores, pred_labels
 84 | 
 85 | 
 86 | def main():
 87 |     parser = argparse.ArgumentParser(description='Generates submission (stage 1 or stage 2)')
 88 |     parser.add_argument('stage', type=int, choices=[1, 2])
 89 |     args = parser.parse_args()
 90 | 
 91 |     with open(CONFIG_PATH, 'r') as f:
 92 |         config = yaml.load(f)
 93 | 
 94 |     dataset_root = config['DATASET']['STAGE{}_TEST_IMAGES_ROOT'.format(args.stage)]
 95 |     assert os.path.exists(dataset_root), dataset_root
 96 |     sample_submission_path = config['DATASET']['STAGE{}_SAMPLE_SUBMISSION_PATH'.format(args.stage)]
 97 |     assert os.path.exists(sample_submission_path), sample_submission_path
 98 | 
 99 |     output_root = config['SUBMISSION']['OUTPUT_ROOT']
100 |     assert os.path.exists(output_root), output_root
101 | 
102 |     test_list_path = os.path.join(output_root, 'test_stage{}.txt'.format(args.stage))
103 | 
104 |     with open(sample_submission_path, 'r') as f_in, open(test_list_path, 'w') as f_out:
105 |         f_in.readline()
106 |         for line in f_in:
107 |             img_id, _ = line.split(',')
108 |             img_name = img_id + '.jpg'
109 |             if os.path.exists(os.path.join(dataset_root, img_name)):
110 |                 f_out.write('{}\n'.format(img_name))
111 |             else:
112 |                 print('Warning: file {} does not exist'.format(os.path.join(dataset_root, img_name)))
113 | 
114 |     with open(config['DATASET']['NAME_TO_LABEL_PATH'], 'rb') as f:
115 |         name_to_label = pickle.load(f)
116 |     label_to_name = {label: name for name, label in name_to_label.items()}
117 |     n_classes = len(name_to_label)
118 | 
119 |     classifier = Classifier(n_classes)
120 |     model = ModelWithLoss(classifier).cuda().eval()
121 | 
122 |     snapshot_path = os.path.join(config['TUNING']['ARTIFACTS_ROOT'], 'snapshots',
123 |                                  'snapshot_epoch_{}.pth.tar'.format(config['TUNING']['N_EPOCH']))
124 |     state = torch.load(snapshot_path, map_location=lambda storage, loc: storage)
125 |     model.load_state_dict(state['model'])
126 | 
127 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
128 |     to_tensor = transforms.Compose([transforms.ToTensor(), normalize])
129 |     tta_transform = transforms.Compose([transforms.Resize(256),
130 |                                         transforms.TenCrop(224),
131 |                                         transforms.Lambda(
132 |                                             lambda crops: torch.stack([to_tensor(crop) for crop in crops]))])
133 |     dataset = LabeledImages(test_list_path, dataset_root, tta_transform)
134 |     loader = DataLoader(dataset, config['SUBMISSION']['BATCH_SIZE'], num_workers=4, collate_fn=lambda X: X)
135 | 
136 |     pred_scores_all = []
137 |     pred_labels_all = []
138 | 
139 |     for samples in loader:
140 |         input_tensor = torch.stack([sample['image'] for sample in samples]).cuda()
141 | 
142 |         with torch.no_grad():
143 |             pred_scores, pred_labels = model.predict(input_tensor, config['SUBMISSION']['TOP_K'])
144 |             pred_scores_all.extend(pred_scores)
145 |             pred_labels_all.extend(pred_labels)
146 | 
147 |     image_id_to_names = {}
148 | 
149 |     threshold = config['SUBMISSION']['THRESHOLD']
150 |     min_preds = config['SUBMISSION']['MIN_PREDS']
151 |     max_preds = config['SUBMISSION']['MAX_PREDS']
152 | 
153 |     for pred_scores, pred_labels, img_name in zip(pred_scores_all, pred_labels_all, dataset._content):
154 |         best_indices = np.argsort(pred_scores)
155 |         best_labels = pred_labels[best_indices]
156 |         best_scores = pred_scores[best_indices]
157 |         pred_labels = best_labels[best_scores > threshold]
158 |         if len(pred_labels) > max_preds:
159 |             pred_labels = pred_labels[-max_preds:]
160 |         if len(pred_labels) >= min_preds:
161 |             pred_names = [label_to_name[label] for label in pred_labels.tolist()]
162 |         else:
163 |             pred_names = [label_to_name[label] for label in best_labels[-min_preds:].tolist()]
164 |         image_id = img_name.split('.')[0]
165 |         image_id_to_names[image_id] = pred_names
166 | 
167 |     submission_path = os.path.join(output_root, 'submission_stage{}.csv'.format(args.stage))
168 | 
169 |     with open(sample_submission_path, 'r') as f_in, open(submission_path, 'w') as f_out:
170 |         f_out.write(f_in.readline())
171 |         for line in f_in:
172 |             img_id, _ = line.split(',')
173 |             if img_id in image_id_to_names:
174 |                 names = image_id_to_names[img_id]
175 |                 f_out.write('{},'.format(img_id))
176 |                 f_out.write('{}\n'.format(' '.join(names)))
177 |             else:
178 |                 f_out.write('{},\n'.format(img_id))
179 | 
180 | 
181 | if __name__ == '__main__':
182 |     main()
183 | 


--------------------------------------------------------------------------------
/source/train_se_resnet101.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import yaml
  3 | import pickle
  4 | import logging
  5 | import random
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | torch.backends.cudnn.benchmark=True
 10 | from torch import nn
 11 | from torch.nn import functional as F
 12 | from torch.utils.data import DataLoader
 13 | 
 14 | from tensorboardX import SummaryWriter
 15 | 
 16 | from datasets import LabeledImages
 17 | from torchvision import transforms
 18 | from models.se_resnet import se_resnet101
 19 | 
 20 | from utils.average_precision_calculator import AveragePrecisionCalculator
 21 | 
 22 | 
 23 | CONFIG_PATH = '/source/config.yaml'
 24 | 
 25 | 
 26 | def get_path(rel_path, config):
 27 |     return os.path.join(config['TRAINING']['ARTIFACTS_ROOT'], rel_path)
 28 | 
 29 | 
 30 | class Classifier(nn.Module):
 31 |     def __init__(self, n_classes):
 32 |         super(Classifier, self).__init__()
 33 |         self.n_classes = n_classes
 34 |         self.features = se_resnet101()
 35 |         self.classifier = nn.Linear(2048, n_classes)
 36 | 
 37 |     def forward(self, x):
 38 |         x = self.features(x)
 39 |         x = self.classifier(x)
 40 | 
 41 |         return x
 42 | 
 43 | 
 44 | class ModelWithLoss(nn.Module):
 45 |     def __init__(self, classifier):
 46 |         super(ModelWithLoss, self).__init__()
 47 |         self.classifier = classifier
 48 |         self.criterion = nn.CrossEntropyLoss(size_average=False)
 49 | 
 50 |     def forward(self, x, labels):
 51 |         assert len(x) == len(labels)
 52 | 
 53 |         predictions = self.classifier(x)
 54 | 
 55 |         all_classes = np.arange(self.classifier.n_classes, dtype=np.int64)
 56 |         zero_label = torch.tensor([0]).to(x.device)
 57 | 
 58 |         loss = 0
 59 |         denominator = 0
 60 |         for prediction, positives in zip(predictions, labels):
 61 |             negatives = np.setdiff1d(all_classes, positives, assume_unique=True)
 62 |             negatives_tensor = torch.tensor(negatives).to(x.device)
 63 |             positives_tensor = torch.tensor(positives).to(x.device).unsqueeze(dim=1)
 64 | 
 65 |             for positive in positives_tensor:
 66 |                 indices = torch.cat((positive, negatives_tensor))
 67 |                 loss = loss + self.criterion(prediction[indices].unsqueeze(dim=0), zero_label)
 68 |                 denominator += 1
 69 | 
 70 |         loss /= denominator
 71 | 
 72 |         return loss
 73 | 
 74 |     def predict(self, x, top_k):
 75 |         predictions = self.classifier(x)
 76 |         scores, labels = predictions.sort(dim=1, descending=True)
 77 | 
 78 |         pred_scores = np.zeros(shape=(len(scores), top_k), dtype=np.float32)
 79 |         pred_labels = labels[:, :top_k].cpu().numpy()
 80 | 
 81 |         for i in range(top_k):
 82 |             i_scores = torch.cat((scores[:, i:i + 1], scores[:, top_k:]), dim=1)
 83 |             pred_scores[:, i] = F.softmax(i_scores, dim=1)[:, 0].cpu().numpy()
 84 | 
 85 |         return pred_scores, pred_labels
 86 | 
 87 | 
 88 | class AverageMeter(object):
 89 |     def __init__(self):
 90 |         self.reset()
 91 | 
 92 |     def reset(self):
 93 |         self.val = 0
 94 |         self.avg = 0
 95 |         self.sum = 0
 96 |         self.count = 0
 97 | 
 98 |     def update(self, val, n=1):
 99 |         self.val = val
100 |         self.sum += val * n
101 |         self.count += n
102 |         self.avg = self.sum / self.count
103 | 
104 | 
105 | def validate(model, val_dataset, batch_size, top_k):
106 |     model.eval()
107 | 
108 |     predictions = []
109 |     actuals = []
110 |     total_num_positives = 0
111 | 
112 |     loss_meter = AverageMeter()
113 | 
114 |     val_loader = DataLoader(val_dataset, batch_size, num_workers=4, collate_fn=lambda X: X)
115 |     for samples in val_loader:
116 |         input_tensor = torch.stack([sample['image'] for sample in samples]).cuda()
117 |         labels = [sample['labels'] for sample in samples]
118 | 
119 |         with torch.no_grad():
120 |             pred_scores, pred_labels = model.predict(input_tensor, top_k)
121 |             loss = model(input_tensor, labels)
122 | 
123 |         loss_meter.update(loss.item(), len(samples))
124 | 
125 |         cur_actuals = np.zeros_like(pred_labels, dtype=np.bool)
126 |         for i in range(len(pred_labels)):
127 |             assert len(labels[i]) <= top_k
128 |             total_num_positives += len(labels[i])
129 |             for label in labels[i]:
130 |                 cur_actuals[i] = np.logical_or(cur_actuals[i], pred_labels[i] == label)
131 | 
132 |         predictions.extend(pred_scores.flatten().tolist())
133 |         actuals.extend(cur_actuals.astype(np.uint8).flatten().tolist())
134 | 
135 |     gap = AveragePrecisionCalculator.ap_at_n(predictions, actuals, n=None, total_num_positives=total_num_positives)
136 |     return loss_meter.avg, gap
137 | 
138 | 
139 | def adjust_learning_rate(optimizer, epoch, initial_lr, lr_decay, lr_step):
140 |     lr = initial_lr * (lr_decay ** (epoch // lr_step))
141 |     for param_group in optimizer.param_groups:
142 |         param_group['lr'] = lr
143 | 
144 | 
145 | def main():
146 |     with open(CONFIG_PATH, 'r') as f:
147 |         config = yaml.load(f)
148 |     os.makedirs(get_path('snapshots', config), exist_ok=True)
149 | 
150 |     logger = logging.getLogger('train')
151 |     logger.setLevel(logging.DEBUG)
152 |     fh = logging.FileHandler(get_path('train.log', config))
153 |     fh.setLevel(logging.DEBUG)
154 |     ch = logging.StreamHandler()
155 |     ch.setLevel(logging.DEBUG)
156 |     formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
157 |     fh.setFormatter(formatter)
158 |     ch.setFormatter(formatter)
159 |     logger.addHandler(fh)
160 |     logger.addHandler(ch)
161 | 
162 |     random.seed(config['TRAINING']['SEED'])
163 |     np.random.seed(config['TRAINING']['SEED'])
164 |     torch.manual_seed(config['TRAINING']['SEED'])
165 | 
166 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
167 |     train_transform = transforms.Compose([transforms.Resize(256),
168 |                                           transforms.RandomCrop(224),
169 |                                           transforms.RandomHorizontalFlip(),
170 |                                           transforms.ToTensor(),
171 |                                           normalize])
172 |     val_transform = transforms.Compose([transforms.Resize(256),
173 |                                         transforms.CenterCrop(224),
174 |                                         transforms.ToTensor(),
175 |                                         normalize])
176 |     train_dataset = LabeledImages(config['DATASET']['TRAIN_LIST_PATH'], config['DATASET']['RESIZED_IMAGES_ROOT'],
177 |                                   train_transform)
178 |     val_dataset = LabeledImages(config['DATASET']['VAL_LIST_PATH'], config['DATASET']['RESIZED_IMAGES_ROOT'],
179 |                                   val_transform)
180 |     logger.info('Train size: {}. Val size: {}.'.format(len(train_dataset), len(val_dataset)))
181 | 
182 |     with open(config['DATASET']['NAME_TO_LABEL_PATH'], 'rb') as f:
183 |         name_to_label = pickle.load(f)
184 |     n_classes = len(name_to_label)
185 |     logger.info('Total number of classes: {}.'.format(n_classes))
186 | 
187 |     classifier = Classifier(n_classes)
188 |     model_with_loss = ModelWithLoss(classifier).cuda()
189 | 
190 |     optimizer = torch.optim.SGD(model_with_loss.parameters(),
191 |                                 lr=config['TRAINING']['INITIAL_LR'],
192 |                                 momentum=config['TRAINING']['MOMENTUM'],
193 |                                 weight_decay=config['TRAINING']['WEIGHT_DECAY'],
194 |                                 nesterov=True)
195 | 
196 |     loss_meter = AverageMeter()
197 | 
198 |     iteration = 0
199 |     snapshots = [(int(path.split('_epoch_')[-1].split('.')[0]), path) for path in os.listdir(get_path('snapshots', config))
200 |                  if path.startswith('snapshot') and path.endswith('.pth.tar')]
201 |     if len(snapshots) > 0:
202 |         snapshots.sort(key=lambda t: t[0])
203 |         logger.info('Finetuning from {}'.format(snapshots[-1][1]))
204 |         state = torch.load(os.path.join(get_path('snapshots', config), snapshots[-1][1]), map_location=lambda storage, loc: storage)
205 |         model_with_loss.load_state_dict(state['model'])
206 |         optimizer.load_state_dict(state['optimizer'])
207 |         start_epoch = state['epoch'] + 1
208 |     else:
209 |         start_epoch = 1
210 |     writer = SummaryWriter(get_path(os.path.join('tensorboard', 'run_epoch_{}'.format(start_epoch)), config))
211 | 
212 |     for epoch in range(start_epoch, config['TRAINING']['N_EPOCH'] + 1):
213 |         adjust_learning_rate(optimizer, epoch, config['TRAINING']['INITIAL_LR'], config['TRAINING']['LR_DECAY'],
214 |                              config['TRAINING']['LR_STEP'])
215 |         logger.info('Start epoch {} / {}.'.format(epoch, config['TRAINING']['N_EPOCH']))
216 |         val_loss, val_gap = validate(model_with_loss, val_dataset, config['TRAINING']['BATCH_SIZE'],
217 |                                      config['VALIDATION']['TOP_K'])
218 |         logger.info('Val loss: {}. Val GAP: {}.'.format(val_loss, val_gap))
219 |         writer.add_scalar('val_loss', val_loss, iteration)
220 |         writer.add_scalar('val_gap', val_gap, iteration)
221 | 
222 |         model_with_loss.train()
223 |         train_loader = DataLoader(train_dataset,
224 |                                   batch_size=config['TRAINING']['BATCH_SIZE'],
225 |                                   shuffle=True,
226 |                                   num_workers=4,
227 |                                   collate_fn=lambda X: X,
228 |                                   drop_last=True)
229 |         for samples in train_loader:
230 |             input_tensor = torch.stack([sample['image'] for sample in samples]).cuda()
231 |             labels = [sample['labels'] for sample in samples]
232 | 
233 |             optimizer.zero_grad()
234 |             loss = model_with_loss(input_tensor, labels)
235 |             loss.backward()
236 |             optimizer.step()
237 | 
238 |             loss_meter.update(loss.item(), len(input_tensor))
239 | 
240 |             if iteration % config['TRAINING']['LOG_FREQUENCY'] == 0:
241 |                 logger.info('Iteration {}. Loss {}.'.format(iteration, loss_meter.avg))
242 |                 writer.add_scalar('train_loss', loss_meter.avg, iteration)
243 |                 loss_meter.reset()
244 | 
245 |                 for i, param_group in enumerate(optimizer.param_groups):
246 |                     writer.add_scalar('lr/group_{}'.format(i), param_group['lr'], iteration)
247 | 
248 |             iteration += 1
249 | 
250 |         state = {'model': model_with_loss.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch}
251 |         torch.save(state, get_path(os.path.join('snapshots', 'snapshot_epoch_{}.pth.tar'.format(epoch)), config))
252 | 
253 | 
254 | if __name__ == '__main__':
255 |     main()


--------------------------------------------------------------------------------
/source/utils/average_precision_calculator.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Calculate or keep track of the interpolated average precision.
 16 | 
 17 | It provides an interface for calculating interpolated average precision for an
 18 | entire list or the top-n ranked items. For the definition of the
 19 | (non-)interpolated average precision:
 20 | http://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf
 21 | 
 22 | Example usages:
 23 | 1) Use it as a static function call to directly calculate average precision for
 24 | a short ranked list in the memory.
 25 | 
 26 | ```
 27 | import random
 28 | 
 29 | p = np.array([random.random() for _ in xrange(10)])
 30 | a = np.array([random.choice([0, 1]) for _ in xrange(10)])
 31 | 
 32 | ap = average_precision_calculator.AveragePrecisionCalculator.ap(p, a)
 33 | ```
 34 | 
 35 | 2) Use it as an object for long ranked list that cannot be stored in memory or
 36 | the case where partial predictions can be observed at a time (Tensorflow
 37 | predictions). In this case, we first call the function accumulate many times
 38 | to process parts of the ranked list. After processing all the parts, we call
 39 | peek_interpolated_ap_at_n.
 40 | ```
 41 | p1 = np.array([random.random() for _ in xrange(5)])
 42 | a1 = np.array([random.choice([0, 1]) for _ in xrange(5)])
 43 | p2 = np.array([random.random() for _ in xrange(5)])
 44 | a2 = np.array([random.choice([0, 1]) for _ in xrange(5)])
 45 | 
 46 | # interpolated average precision at 10 using 1000 break points
 47 | calculator = average_precision_calculator.AveragePrecisionCalculator(10)
 48 | calculator.accumulate(p1, a1)
 49 | calculator.accumulate(p2, a2)
 50 | ap3 = calculator.peek_ap_at_n()
 51 | ```
 52 | """
 53 | 
 54 | import heapq
 55 | import random
 56 | import numbers
 57 | 
 58 | import numpy
 59 | 
 60 | 
 61 | class AveragePrecisionCalculator(object):
 62 |   """Calculate the average precision and average precision at n."""
 63 | 
 64 |   def __init__(self, top_n=None):
 65 |     """Construct an AveragePrecisionCalculator to calculate average precision.
 66 | 
 67 |     This class is used to calculate the average precision for a single label.
 68 | 
 69 |     Args:
 70 |       top_n: A positive Integer specifying the average precision at n, or
 71 |         None to use all provided data points.
 72 | 
 73 |     Raises:
 74 |       ValueError: An error occurred when the top_n is not a positive integer.
 75 |     """
 76 |     if not ((isinstance(top_n, int) and top_n >= 0) or top_n is None):
 77 |       raise ValueError("top_n must be a positive integer or None.")
 78 | 
 79 |     self._top_n = top_n  # average precision at n
 80 |     self._total_positives = 0  # total number of positives have seen
 81 |     self._heap = []  # max heap of (prediction, actual)
 82 | 
 83 |   @property
 84 |   def heap_size(self):
 85 |     """Gets the heap size maintained in the class."""
 86 |     return len(self._heap)
 87 | 
 88 |   @property
 89 |   def num_accumulated_positives(self):
 90 |     """Gets the number of positive samples that have been accumulated."""
 91 |     return self._total_positives
 92 | 
 93 |   def accumulate(self, predictions, actuals, num_positives=None):
 94 |     """Accumulate the predictions and their ground truth labels.
 95 | 
 96 |     After the function call, we may call peek_ap_at_n to actually calculate
 97 |     the average precision.
 98 |     Note predictions and actuals must have the same shape.
 99 | 
100 |     Args:
101 |       predictions: a list storing the prediction scores.
102 |       actuals: a list storing the ground truth labels. Any value
103 |       larger than 0 will be treated as positives, otherwise as negatives.
104 |       num_positives = If the 'predictions' and 'actuals' inputs aren't complete,
105 |       then it's possible some true positives were missed in them. In that case,
106 |       you can provide 'num_positives' in order to accurately track recall.
107 | 
108 |     Raises:
109 |       ValueError: An error occurred when the format of the input is not the
110 |       numpy 1-D array or the shape of predictions and actuals does not match.
111 |     """
112 |     if len(predictions) != len(actuals):
113 |       raise ValueError("the shape of predictions and actuals does not match.")
114 | 
115 |     if not num_positives is None:
116 |       if not isinstance(num_positives, numbers.Number) or num_positives < 0:
117 |         raise ValueError("'num_positives' was provided but it wan't a nonzero number.")
118 | 
119 |     if not num_positives is None:
120 |       self._total_positives += num_positives
121 |     else:
122 |       self._total_positives += numpy.size(numpy.where(actuals > 0))
123 |     topk = self._top_n
124 |     heap = self._heap
125 | 
126 |     for i in range(numpy.size(predictions)):
127 |       if topk is None or len(heap) < topk:
128 |         heapq.heappush(heap, (predictions[i], actuals[i]))
129 |       else:
130 |         if predictions[i] > heap[0][0]:  # heap[0] is the smallest
131 |           heapq.heappop(heap)
132 |           heapq.heappush(heap, (predictions[i], actuals[i]))
133 | 
134 |   def clear(self):
135 |     """Clear the accumulated predictions."""
136 |     self._heap = []
137 |     self._total_positives = 0
138 | 
139 |   def peek_ap_at_n(self):
140 |     """Peek the non-interpolated average precision at n.
141 | 
142 |     Returns:
143 |       The non-interpolated average precision at n (default 0).
144 |       If n is larger than the length of the ranked list,
145 |       the average precision will be returned.
146 |     """
147 |     if self.heap_size <= 0:
148 |       return 0
149 |     predlists = numpy.array(list(zip(*self._heap)))
150 | 
151 |     ap = self.ap_at_n(predlists[0],
152 |                       predlists[1],
153 |                       n=self._top_n,
154 |                       total_num_positives=self._total_positives)
155 |     return ap
156 | 
157 |   @staticmethod
158 |   def ap(predictions, actuals):
159 |     """Calculate the non-interpolated average precision.
160 | 
161 |     Args:
162 |       predictions: a numpy 1-D array storing the sparse prediction scores.
163 |       actuals: a numpy 1-D array storing the ground truth labels. Any value
164 |       larger than 0 will be treated as positives, otherwise as negatives.
165 | 
166 |     Returns:
167 |       The non-interpolated average precision at n.
168 |       If n is larger than the length of the ranked list,
169 |       the average precision will be returned.
170 | 
171 |     Raises:
172 |       ValueError: An error occurred when the format of the input is not the
173 |       numpy 1-D array or the shape of predictions and actuals does not match.
174 |     """
175 |     return AveragePrecisionCalculator.ap_at_n(predictions,
176 |                                               actuals,
177 |                                               n=None)
178 | 
179 |   @staticmethod
180 |   def ap_at_n(predictions, actuals, n=20, total_num_positives=None):
181 |     """Calculate the non-interpolated average precision.
182 | 
183 |     Args:
184 |       predictions: a numpy 1-D array storing the sparse prediction scores.
185 |       actuals: a numpy 1-D array storing the ground truth labels. Any value
186 |       larger than 0 will be treated as positives, otherwise as negatives.
187 |       n: the top n items to be considered in ap@n.
188 |       total_num_positives : (optionally) you can specify the number of total
189 |         positive
190 |       in the list. If specified, it will be used in calculation.
191 | 
192 |     Returns:
193 |       The non-interpolated average precision at n.
194 |       If n is larger than the length of the ranked list,
195 |       the average precision will be returned.
196 | 
197 |     Raises:
198 |       ValueError: An error occurred when
199 |       1) the format of the input is not the numpy 1-D array;
200 |       2) the shape of predictions and actuals does not match;
201 |       3) the input n is not a positive integer.
202 |     """
203 |     if len(predictions) != len(actuals):
204 |       raise ValueError("the shape of predictions and actuals does not match.")
205 | 
206 |     if n is not None:
207 |       if not isinstance(n, int) or n <= 0:
208 |         raise ValueError("n must be 'None' or a positive integer."
209 |                          " It was '%s'." % n)
210 | 
211 |     ap = 0.0
212 | 
213 |     predictions = numpy.array(predictions)
214 |     actuals = numpy.array(actuals)
215 | 
216 |     # add a shuffler to avoid overestimating the ap
217 |     predictions, actuals = AveragePrecisionCalculator._shuffle(predictions,
218 |                                                                actuals)
219 |     sortidx = sorted(
220 |         range(len(predictions)),
221 |         key=lambda k: predictions[k],
222 |         reverse=True)
223 | 
224 |     if total_num_positives is None:
225 |       numpos = numpy.size(numpy.where(actuals > 0))
226 |     else:
227 |       numpos = total_num_positives
228 | 
229 |     if numpos == 0:
230 |       return 0
231 | 
232 |     if n is not None:
233 |       numpos = min(numpos, n)
234 |     delta_recall = 1.0 / numpos
235 |     poscount = 0.0
236 | 
237 |     # calculate the ap
238 |     r = len(sortidx)
239 |     if n is not None:
240 |       r = min(r, n)
241 |     for i in range(r):
242 |       if actuals[sortidx[i]] > 0:
243 |         poscount += 1
244 |         ap += poscount / (i + 1) * delta_recall
245 |     return ap
246 | 
247 |   @staticmethod
248 |   def _shuffle(predictions, actuals):
249 |     random.seed(0)
250 |     suffidx = random.sample(range(len(predictions)), len(predictions))
251 |     predictions = predictions[suffidx]
252 |     actuals = actuals[suffidx]
253 |     return predictions, actuals
254 | 
255 |   @staticmethod
256 |   def _zero_one_normalize(predictions, epsilon=1e-7):
257 |     """Normalize the predictions to the range between 0.0 and 1.0.
258 | 
259 |     For some predictions like SVM predictions, we need to normalize them before
260 |     calculate the interpolated average precision. The normalization will not
261 |     change the rank in the original list and thus won't change the average
262 |     precision.
263 | 
264 |     Args:
265 |       predictions: a numpy 1-D array storing the sparse prediction scores.
266 |       epsilon: a small constant to avoid denominator being zero.
267 | 
268 |     Returns:
269 |       The normalized prediction.
270 |     """
271 |     denominator = numpy.max(predictions) - numpy.min(predictions)
272 |     ret = (predictions - numpy.min(predictions)) / numpy.max(denominator,
273 |                                                              epsilon)
274 |     return ret
275 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/source/tune_se_resnet101.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import yaml
  3 | import pickle
  4 | import logging
  5 | import random
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | torch.backends.cudnn.benchmark=True
 10 | from torch import nn
 11 | from torch.nn import functional as F
 12 | from torch.utils.data import DataLoader
 13 | from torch.utils.data.sampler import SubsetRandomSampler
 14 | 
 15 | from tensorboardX import SummaryWriter
 16 | 
 17 | from datasets import LabeledImages
 18 | from torchvision import transforms
 19 | from models.se_resnet import se_resnet101
 20 | 
 21 | 
 22 | CONFIG_PATH = '/source/config.yaml'
 23 | 
 24 | 
 25 | def get_path(rel_path, config):
 26 |     return os.path.join(config['TUNING']['ARTIFACTS_ROOT'], rel_path)
 27 | 
 28 | 
 29 | class Classifier(nn.Module):
 30 |     def __init__(self, n_classes):
 31 |         super(Classifier, self).__init__()
 32 |         self.n_classes = n_classes
 33 |         self.features = se_resnet101()
 34 |         self.classifier = nn.Linear(2048, n_classes)
 35 | 
 36 |     def forward(self, x):
 37 |         x = self.features(x)
 38 |         x = self.classifier(x)
 39 | 
 40 |         return x
 41 | 
 42 | 
 43 | class ModelWithLoss(nn.Module):
 44 |     def __init__(self, classifier):
 45 |         super(ModelWithLoss, self).__init__()
 46 |         self.classifier = classifier
 47 |         self.criterion = nn.CrossEntropyLoss(size_average=False)
 48 | 
 49 |     def forward(self, x, labels):
 50 |         assert len(x) == len(labels)
 51 | 
 52 |         predictions = self.classifier(x)
 53 | 
 54 |         all_classes = np.arange(self.classifier.n_classes, dtype=np.int64)
 55 |         zero_label = torch.tensor([0]).to(x.device)
 56 | 
 57 |         loss = 0
 58 |         denominator = 0
 59 |         for prediction, positives in zip(predictions, labels):
 60 |             negatives = np.setdiff1d(all_classes, positives, assume_unique=True)
 61 |             negatives_tensor = torch.tensor(negatives).to(x.device)
 62 |             positives_tensor = torch.tensor(positives).to(x.device).unsqueeze(dim=1)
 63 | 
 64 |             for positive in positives_tensor:
 65 |                 indices = torch.cat((positive, negatives_tensor))
 66 |                 loss = loss + self.criterion(prediction[indices].unsqueeze(dim=0), zero_label)
 67 |                 denominator += 1
 68 | 
 69 |         loss /= denominator
 70 | 
 71 |         return loss
 72 | 
 73 |     def predict(self, x, top_k):
 74 |         predictions = self.classifier(x)
 75 |         scores, labels = predictions.sort(dim=1, descending=True)
 76 | 
 77 |         pred_scores = np.zeros(shape=(len(scores), top_k), dtype=np.float32)
 78 |         pred_labels = labels[:, :top_k].cpu().numpy()
 79 | 
 80 |         for i in range(top_k):
 81 |             i_scores = torch.cat((scores[:, i:i + 1], scores[:, top_k:]), dim=1)
 82 |             pred_scores[:, i] = F.softmax(i_scores, dim=1)[:, 0].cpu().numpy()
 83 | 
 84 |         return pred_scores, pred_labels
 85 | 
 86 | 
 87 | class AverageMeter(object):
 88 |     def __init__(self):
 89 |         self.reset()
 90 | 
 91 |     def reset(self):
 92 |         self.val = 0
 93 |         self.avg = 0
 94 |         self.sum = 0
 95 |         self.count = 0
 96 | 
 97 |     def update(self, val, n=1):
 98 |         self.val = val
 99 |         self.sum += val * n
100 |         self.count += n
101 |         self.avg = self.sum / self.count
102 | 
103 | 
104 | def calculate_f2_measure(pred_labels, pred_scores, gt_labels, thresh):
105 |     pred_labels = pred_labels[pred_scores > thresh]
106 |     pred_labels = set(pred_labels.tolist())
107 |     gt_labels = set(gt_labels.tolist())
108 |     tp = pred_labels & gt_labels
109 |     prec = (len(tp) / len(pred_labels)) if len(pred_labels) > 0 else 1
110 |     rec = (len(tp) / len(gt_labels)) if len(gt_labels) > 0 else 1
111 |     f2_measure = ((5 * prec * rec) / (4 * prec + rec)) if (4 * prec + rec) > 0 else 0
112 | 
113 |     return f2_measure
114 | 
115 | 
116 | def validate(model, dataset, indices, batch_size, top_k):
117 |     model.eval()
118 |     sampler = SubsetRandomSampler(indices)
119 |     loader = DataLoader(dataset, batch_size, sampler=sampler, num_workers=0, collate_fn=lambda X: X)
120 | 
121 |     pred_scores_all = []
122 |     pred_labels_all = []
123 |     gt_labels_all = []
124 | 
125 |     for samples in loader:
126 |         input_tensor = torch.stack([sample['image'] for sample in samples]).cuda()
127 |         labels = [sample['labels'] for sample in samples]
128 | 
129 |         with torch.no_grad():
130 |             pred_scores, pred_labels = model.predict(input_tensor, top_k)
131 |             pred_scores_all.extend(pred_scores)
132 |             pred_labels_all.extend(pred_labels)
133 |             gt_labels_all.extend(labels)
134 | 
135 |     thresholds = np.arange(0.0, 1.0, 0.05, dtype=np.float64)
136 |     results = np.zeros_like(thresholds)
137 | 
138 |     for i, thresh in enumerate(thresholds):
139 |         f2_measure = 0
140 |         for pred_scores, pred_labels, gt_labels in zip(pred_scores_all, pred_labels_all, gt_labels_all):
141 |             f2_measure += calculate_f2_measure(pred_labels, pred_scores, gt_labels, thresh)
142 |         results[i] = f2_measure / len(gt_labels_all)
143 | 
144 |     best_idx = np.argmax(results)
145 | 
146 |     return results[best_idx], thresholds[best_idx]
147 | 
148 | 
149 | def adjust_learning_rate(optimizer, epoch, initial_lr, lr_decay, lr_step):
150 |     lr = initial_lr * (lr_decay ** (epoch // lr_step))
151 |     for param_group in optimizer.param_groups:
152 |         param_group['lr'] = lr
153 | 
154 | 
155 | def main():
156 |     with open(CONFIG_PATH, 'r') as f:
157 |         config = yaml.load(f)
158 |     os.makedirs(get_path('snapshots', config), exist_ok=True)
159 | 
160 |     logger = logging.getLogger('tuning')
161 |     logger.setLevel(logging.DEBUG)
162 |     fh = logging.FileHandler(get_path('tuning.log', config))
163 |     fh.setLevel(logging.DEBUG)
164 |     ch = logging.StreamHandler()
165 |     ch.setLevel(logging.DEBUG)
166 |     formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
167 |     fh.setFormatter(formatter)
168 |     ch.setFormatter(formatter)
169 |     logger.addHandler(fh)
170 |     logger.addHandler(ch)
171 | 
172 |     random.seed(config['TUNING']['SEED'])
173 |     np.random.seed(config['TUNING']['SEED'])
174 |     torch.manual_seed(config['TUNING']['SEED'])
175 | 
176 |     with open(config['DATASET']['NAME_TO_LABEL_PATH'], 'rb') as f:
177 |         name_to_label = pickle.load(f)
178 |     n_classes = len(name_to_label)
179 |     logger.info('Total number of classes: {}.'.format(n_classes))
180 | 
181 |     img_path_to_labels = {}
182 |     with open(config['DATASET']['TUNING_LABELS_PATH'], 'r') as f:
183 |         for line in f:
184 |             img_id, names = line.strip().split(',')
185 |             img_path = img_id + '.jpg'
186 |             labels = [name_to_label[name] for name in names.split(' ')]
187 |             img_path_to_labels[img_path] = labels
188 | 
189 |     content = sorted(img_path_to_labels)
190 |     with open(config['DATASET']['TUNING_LIST_PATH'], 'w') as f:
191 |         for img_path in content:
192 |             labels = img_path_to_labels[img_path]
193 |             line = ' '.join([img_path] + [str(label) for label in labels])
194 |             f.write('{}\n'.format(line))
195 | 
196 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
197 |     train_transform = transforms.Compose([transforms.Resize(256),
198 |                                           transforms.RandomCrop(224),
199 |                                           transforms.RandomHorizontalFlip(),
200 |                                           transforms.ToTensor(),
201 |                                           normalize])
202 |     val_transform = transforms.Compose([transforms.Resize(256),
203 |                                         transforms.CenterCrop(224),
204 |                                         transforms.ToTensor(),
205 |                                         normalize])
206 |     train_dataset = LabeledImages(config['DATASET']['TUNING_LIST_PATH'], config['DATASET']['STAGE1_TEST_IMAGES_ROOT'],
207 |                                   train_transform)
208 |     val_dataset = LabeledImages(config['DATASET']['TUNING_LIST_PATH'], config['DATASET']['STAGE1_TEST_IMAGES_ROOT'],
209 |                                 val_transform)
210 | 
211 |     indices = np.arange(len(train_dataset), dtype=np.int64)
212 |     np.random.shuffle(indices)
213 |     train_size = round(config['DATASET']['TUNING_TRAIN_RATIO'] * len(indices))
214 |     train_indices = indices[:train_size]
215 |     val_indices = indices[train_size:]
216 | 
217 |     logger.info('Train size: {}. Val size: {}.'.format(len(train_indices), len(val_indices)))
218 | 
219 |     classifier = Classifier(n_classes)
220 |     model_with_loss = ModelWithLoss(classifier).cuda()
221 | 
222 |     initial_weights_path = os.path.join(config['TRAINING']['ARTIFACTS_ROOT'], 'snapshots',
223 |                                         'snapshot_epoch_{}.pth.tar'.format(config['TRAINING']['N_EPOCH']))
224 |     logger.info('Finetuning from {}'.format(initial_weights_path))
225 |     state = torch.load(initial_weights_path, map_location=lambda storage, loc: storage)
226 |     model_with_loss.load_state_dict(state['model'])
227 | 
228 |     optimizer = torch.optim.SGD(model_with_loss.parameters(),
229 |                                 lr=config['TUNING']['INITIAL_LR'],
230 |                                 momentum=config['TUNING']['MOMENTUM'],
231 |                                 weight_decay=config['TUNING']['WEIGHT_DECAY'],
232 |                                 nesterov=True)
233 | 
234 |     loss_meter = AverageMeter()
235 | 
236 |     iteration = 0
237 |     snapshots = [(int(path.split('_epoch_')[-1].split('.')[0]), path) for path in os.listdir(get_path('snapshots', config))
238 |                  if path.startswith('snapshot') and path.endswith('.pth.tar')]
239 |     if len(snapshots) > 0:
240 |         snapshots.sort(key=lambda t: t[0])
241 |         logger.info('Finetuning from {}'.format(snapshots[-1][1]))
242 |         state = torch.load(os.path.join(get_path('snapshots', config), snapshots[-1][1]), map_location=lambda storage, loc: storage)
243 |         model_with_loss.load_state_dict(state['model'])
244 |         optimizer.load_state_dict(state['optimizer'])
245 |         start_epoch = state['epoch'] + 1
246 |     else:
247 |         start_epoch = 1
248 |     writer = SummaryWriter(get_path(os.path.join('tensorboard', 'run_epoch_{}'.format(start_epoch)), config))
249 | 
250 |     for epoch in range(start_epoch, config['TUNING']['N_EPOCH'] + 1):
251 |         adjust_learning_rate(optimizer, epoch, config['TUNING']['INITIAL_LR'], config['TUNING']['LR_DECAY'],
252 |                              config['TUNING']['LR_STEP'])
253 |         logger.info('Start epoch {} / {}.'.format(epoch, config['TUNING']['N_EPOCH']))
254 |         val_score, val_thresh = validate(model_with_loss, val_dataset, val_indices, config['TUNING']['BATCH_SIZE'],
255 |                                          config['VALIDATION']['TOP_K'])
256 |         logger.info('Val score: {}. Val thresh: {}.'.format(val_score, val_thresh))
257 |         writer.add_scalar('val_score', val_score, iteration)
258 |         writer.add_scalar('val_thresh', val_thresh, iteration)
259 | 
260 |         model_with_loss.train()
261 |         sampler = SubsetRandomSampler(train_indices)
262 |         train_loader = DataLoader(train_dataset,
263 |                                   batch_size=config['TUNING']['BATCH_SIZE'],
264 |                                   sampler=sampler,
265 |                                   num_workers=4,
266 |                                   collate_fn=lambda X: X,
267 |                                   drop_last=True)
268 |         for samples in train_loader:
269 |             input_tensor = torch.stack([sample['image'] for sample in samples]).cuda()
270 |             labels = [sample['labels'] for sample in samples]
271 | 
272 |             optimizer.zero_grad()
273 |             loss = model_with_loss(input_tensor, labels)
274 |             loss.backward()
275 |             optimizer.step()
276 | 
277 |             loss_meter.update(loss.item(), len(input_tensor))
278 | 
279 |             if iteration % config['TUNING']['LOG_FREQUENCY'] == 0:
280 |                 logger.info('Iteration {}. Loss {}.'.format(iteration, loss_meter.avg))
281 |                 writer.add_scalar('train_loss', loss_meter.avg, iteration)
282 |                 loss_meter.reset()
283 | 
284 |                 for i, param_group in enumerate(optimizer.param_groups):
285 |                     writer.add_scalar('lr/group_{}'.format(i), param_group['lr'], iteration)
286 | 
287 |             iteration += 1
288 | 
289 |         state = {'model': model_with_loss.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch}
290 |         torch.save(state, get_path(os.path.join('snapshots', 'snapshot_epoch_{}.pth.tar'.format(epoch)), config))
291 | 
292 | 
293 | if __name__ == '__main__':
294 |     main()


--------------------------------------------------------------------------------