├── source ├── models │ ├── __init__.py │ └── se_resnet.py ├── utils │ ├── __init__.py │ └── average_precision_calculator.py ├── datasets.py ├── config.yaml ├── prepare_dataset.py ├── generate_submission.py ├── train_se_resnet101.py └── tune_se_resnet101.py ├── .gitignore ├── Dockerfile ├── README.md └── LICENSE /source/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /source/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /source/datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from torch.utils.data import Dataset 5 | from torchvision.datasets.folder import pil_loader 6 | 7 | 8 | class LabeledImages(Dataset): 9 | def __init__(self, list_path, images_root, transform=None): 10 | self._transform = transform 11 | self._images_root = images_root 12 | 13 | self._content = [] 14 | self._labels_list = [] 15 | 16 | with open(list_path, 'r') as f: 17 | for line in f: 18 | parts = line.strip().split(' ') 19 | 20 | self._content.append(parts[0]) 21 | labels = [int(label) for label in parts[1:]] 22 | self._labels_list.append(labels) 23 | 24 | def __len__(self): 25 | return len(self._content) 26 | 27 | def __getitem__(self, idx): 28 | img_path = os.path.join(self._images_root, self._content[idx]) 29 | labels = np.array(self._labels_list[idx], dtype=np.int64) 30 | 31 | img = pil_loader(img_path) 32 | if self._transform is not None: 33 | img = self._transform(img) 34 | 35 | return {'image': img, 'labels': labels} -------------------------------------------------------------------------------- /source/config.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | ORIGINAL_IMAGES_ROOT: "/original_images" 3 | RESIZED_IMAGES_ROOT: "/resized_images" 4 | SHORTEST_SIZE: 256 5 | HUMAN_LABELS_PATH: "/inclusive/train_human_labels.csv" 6 | TRAIN_LIST_PATH: "/artifacts/train.txt" 7 | VAL_LIST_PATH: "/artifacts/val.txt" 8 | VAL_RATIO: 0.01 9 | TUNING_LABELS_PATH: "/inclusive/tuning_labels.csv" 10 | TUNING_LIST_PATH: "/artifacts/tuning.txt" 11 | TUNING_TRAIN_RATIO: 0.99 12 | NAME_TO_LABEL_PATH: "/artifacts/name_to_label.pkl" 13 | STAGE1_TEST_IMAGES_ROOT: "/inclusive/stage_1_test_images" 14 | STAGE1_SAMPLE_SUBMISSION_PATH: "/inclusive/stage_1_sample_submission.csv" 15 | STAGE2_TEST_IMAGES_ROOT: "/inclusive/stage_2_test_images" 16 | STAGE2_SAMPLE_SUBMISSION_PATH: "/inclusive/stage_2_sample_submission.csv" 17 | TRAINING: 18 | SEED: 0xDEADFACE 19 | ARTIFACTS_ROOT: "/artifacts/training" 20 | BATCH_SIZE: 32 21 | INITIAL_LR: 0.01 22 | LR_DECAY: 0.1 23 | LR_STEP: 10 24 | MOMENTUM: 0.9 25 | WEIGHT_DECAY: 0.0001 26 | N_EPOCH: 25 27 | LOG_FREQUENCY: 100 28 | VALIDATION: 29 | TOP_K: 150 30 | TUNING: 31 | SEED: 0xDEADFACE 32 | ARTIFACTS_ROOT: "/artifacts/tuning" 33 | BATCH_SIZE: 32 34 | INITIAL_LR: 0.0001 35 | LR_DECAY: 0.1 36 | LR_STEP: 100 37 | MOMENTUM: 0.9 38 | WEIGHT_DECAY: 0.0001 39 | N_EPOCH: 80 40 | LOG_FREQUENCY: 30 41 | SUBMISSION: 42 | OUTPUT_ROOT: "/output" 43 | BATCH_SIZE: 32 44 | TOP_K: 150 45 | MIN_PREDS: 2 46 | MAX_PREDS: 5 47 | THRESHOLD: 0.55 48 | 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:8.0-cudnn7-devel-ubuntu16.04 2 | 3 | SHELL ["/bin/bash", "-c"] 4 | 5 | RUN rm -rf /var/lib/apt/lists/* \ 6 | /etc/apt/sources.list.d/cuda.list \ 7 | /etc/apt/sources.list.d/nvidia-ml.list && \ 8 | apt-get update && \ 9 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 10 | build-essential \ 11 | ca-certificates \ 12 | cmake \ 13 | wget \ 14 | git \ 15 | vim \ 16 | nano \ 17 | less \ 18 | tmux \ 19 | htop \ 20 | screen \ 21 | curl \ 22 | mc \ 23 | openssh-server \ 24 | openssh-client && \ 25 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 26 | python3 \ 27 | python3-dev && \ 28 | wget -O ~/get-pip.py \ 29 | https://bootstrap.pypa.io/get-pip.py && \ 30 | python3 ~/get-pip.py && \ 31 | pip3 --no-cache-dir install \ 32 | setuptools \ 33 | numpy==1.14.1 \ 34 | scipy==1.0.0 \ 35 | matplotlib==2.1.2 \ 36 | pandas==0.22.0 \ 37 | scikit-learn==0.19.1 \ 38 | opencv-python==3.2.0.8 \ 39 | Cython==0.27.3 \ 40 | jupyterlab==0.32.1 \ 41 | pyyaml==3.12 \ 42 | scikit-image>=0.9.3 \ 43 | h5py>=2.2.0 \ 44 | networkx>=1.8.1 \ 45 | nose>=1.3.0 \ 46 | pytest && \ 47 | cd /tmp && \ 48 | git clone https://github.com/pytorch/pytorch.git && \ 49 | cd pytorch && \ 50 | git checkout v0.4.1 && \ 51 | git submodule update --init --recursive && \ 52 | python3 setup.py install && \ 53 | pip --no-cache-dir install \ 54 | torchvision==0.2.1 \ 55 | tensorboardX==1.2 && \ 56 | printf "export LC_ALL=C.UTF-8\n" >> /etc/environment && \ 57 | apt-get clean && \ 58 | apt-get autoremove && \ 59 | rm -rf /var/lib/apt/lists/* /tmp/* 60 | 61 | COPY ./source /source 62 | -------------------------------------------------------------------------------- /source/prepare_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import yaml 4 | import random 5 | from collections import defaultdict 6 | import cv2 7 | 8 | 9 | CONFIG_PATH = '/source/config.yaml' 10 | LOG_FREQUENCY = 10000 11 | 12 | 13 | def main(): 14 | with open(CONFIG_PATH, 'r') as f: 15 | config = yaml.load(f) 16 | 17 | name_to_label = {} 18 | img_path_to_labels = defaultdict(list) 19 | 20 | with open(config['DATASET']['HUMAN_LABELS_PATH'], 'r') as f: 21 | f.readline() 22 | for line in f: 23 | parts = line.strip().split(',') 24 | img_path = os.path.join('train', parts[0] + '.jpg') 25 | name = parts[2] 26 | if name not in name_to_label: 27 | name_to_label[name] = len(name_to_label) 28 | label = name_to_label[name] 29 | img_path_to_labels[img_path].append(label) 30 | 31 | print('Total number of images: {}. Total number of labels: {}.'.format(len(img_path_to_labels), len(name_to_label))) 32 | 33 | content = sorted(img_path_to_labels) 34 | print('Resizing images...') 35 | for i, rel_path in enumerate(content): 36 | src_path = os.path.join(config['DATASET']['ORIGINAL_IMAGES_ROOT'], rel_path) 37 | dst_path = os.path.join(config['DATASET']['RESIZED_IMAGES_ROOT'], rel_path) 38 | 39 | os.makedirs(os.path.dirname(dst_path), exist_ok=True) 40 | 41 | img = cv2.imread(src_path) 42 | height, width, channels = img.shape 43 | if height < width: 44 | dst_height = config['DATASET']['SHORTEST_SIZE'] 45 | dst_width = round((config['DATASET']['SHORTEST_SIZE'] / height) * width) 46 | else: 47 | dst_width = config['DATASET']['SHORTEST_SIZE'] 48 | dst_height = round((config['DATASET']['SHORTEST_SIZE'] / width) * height) 49 | img = cv2.resize(img, (dst_width, dst_height)) 50 | 51 | cv2.imwrite(dst_path, img) 52 | if i % LOG_FREQUENCY == 0: 53 | print('{} / {} processed'.format(i + 1, len(content))) 54 | 55 | random.seed(0xDEADFACE) 56 | random.shuffle(content) 57 | 58 | n_val = round(config['DATASET']['VAL_RATIO'] * len(content)) 59 | n_train = len(content) - n_val 60 | 61 | train_content = content[:n_train] 62 | val_content = content[n_train:] 63 | 64 | print('Train size: {}. Val size: {}.'.format(len(train_content), len(val_content))) 65 | 66 | with open(config['DATASET']['TRAIN_LIST_PATH'], 'w') as f: 67 | for img_path in train_content: 68 | labels = img_path_to_labels[img_path] 69 | line = ' '.join([img_path] + [str(label) for label in labels]) 70 | f.write('{}\n'.format(line)) 71 | 72 | with open(config['DATASET']['VAL_LIST_PATH'], 'w') as f: 73 | for img_path in val_content: 74 | labels = img_path_to_labels[img_path] 75 | line = ' '.join([img_path] + [str(label) for label in labels]) 76 | f.write('{}\n'.format(line)) 77 | 78 | with open(config['DATASET']['NAME_TO_LABEL_PATH'], 'wb') as f: 79 | pickle.dump(name_to_label, f) 80 | 81 | 82 | if __name__ == '__main__': 83 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # inclusive-images-challenge 2 | 4th place solution for the [Inclusive Images Challenge on Kaggle](https://www.kaggle.com/c/inclusive-images-challenge). 3 | Private LB score: **0.33184** 4 | ## The hardware I used 5 | - CPU: Intel Core i7 5930k 6 | - GPU: 1x NVIDIA GTX 1080 7 | - RAM: 64 GB 8 | - SSD: 2x 512GB 9 | - HDD: 1x 3TB 10 | ## Prerequisites 11 | ### Environment 12 | The model was trained in the docker container. It is highly recommended to use [nvidia-docker2](https://github.com/NVIDIA/nvidia-docker) if you want to reproduce the result. 13 | The code assumes that you have at least 1 NVIDIA GPU and CUDA 8 compatible driver. Run the following command to build the docker image: 14 | ```bash 15 | cd path/to/solution 16 | sudo docker build -t inclusive . 17 | ``` 18 | ### Free space 19 | - HDD: ~600 GB (525 GB for the Open Images Training dataset + 71 GB for checkpoints, logs, etc) 20 | - SSD: ~100 GB (77 GB for the resized Open Images Training dataset + 13 GB for the competition data) 21 | ### Data 22 | Download the [open-images-dataset](https://www.kaggle.com/c/inclusive-images-challenge#Data-Download-&-Getting-Started) to `/path/to/hdd/open-images-dataset/train` 23 | ```bash 24 | mkdir -p /path/to/hdd/open-images-dataset 25 | cd /path/to/hdd/open-images-dataset 26 | aws s3 --no-sign-request sync s3://open-images-dataset/train train/ 27 | ``` 28 | Download the [inclusive-images-challenge-data](https://www.kaggle.com/c/inclusive-images-challenge/data) to `/path/to/ssd/inclusive-images-challenge/data` 29 | ```bash 30 | mkdir -p /path/to/ssd/inclusive-images-challenge/data 31 | cd /path/to/ssd/inclusive-images-challenge/data 32 | kaggle competitions download -c inclusive-images-challenge 33 | unzip train_human_labels.csv.zip 34 | unzip stage_1_sample_submission.csv.zip 35 | unzip stage_2_sample_submission.csv.zip 36 | unzip stage_1_test_images.zip -d stage_1_test_images 37 | unzip stage_2_images.zip -d stage_2_test_images 38 | ``` 39 | _Note: there are some missing files in the Inclusive Images Challenge Stage 1 data. You have to delete them manually from the `tuning_labels.csv`_ 40 | ### Output directories 41 | Create a directory for the resized Open Images Training dataset 42 | ```bash 43 | mkdir -p /path/to/ssd/open-images-dataset/train-resized 44 | ``` 45 | Create a directory for training artifacts (checkpoints, logs, etc) 46 | ```bash 47 | mkdir -p /path/to/hdd/inclusive-images-challenge/artifacts 48 | ``` 49 | Create a directory for the model output (submissions) 50 | ```bash 51 | mkdir -p /path/to/hdd/inclusive-images-challenge/output 52 | ``` 53 | ## How to train the model 54 | Run the docker container with the paths correctly mounted 55 | ```bash 56 | sudo docker run --runtime=nvidia -i -t -d --rm --ipc=host -v /path/to/hdd/open-images-dataset:/original_images -v /path/to/ssd/open-images-dataset/train-resized:/resized_images -v /path/to/ssd/inclusive-images-challenge/data:/inclusive -v /path/to/hdd/inclusive-images-challenge/artifacts:/artifacts -v /path/to/hdd/inclusive-images-challenge/output:/output --name inclusive inclusive 57 | sudo docker exec -it inclusive /bin/bash 58 | ``` 59 | Prepare the training dataset (inside the container) 60 | ```bash 61 | python3 /source/prepare_dataset.py 62 | ``` 63 | Train the model on the Open Images Training dataset (inside the container) 64 | ```bash 65 | python3 /source/train_se_resnet101.py 66 | ``` 67 | Finetune the model on the Inclusive Images Challenge Stage 1 tuning set (inside the container) 68 | ```bash 69 | python3 /source/tune_se_resnet101.py 70 | ``` 71 | ## How to generate submissions 72 | Run the following command (inside the container) 73 | ```bash 74 | python3 /source/generate_submission.py stage_id (1 or 2) 75 | ``` 76 | Submissions will appear in the output directory: `/path/to/hdd/inclusive-images-challenge/output` 77 | The fastest way to get predictions for a new test dataset is to replace dataset from the second stage with new one. 78 | ## `source/config.yaml` 79 | This file specifies the path to the train, test, model, and output directories. 80 | - This is the only place that specifies the path to these directories. 81 | - Any code that is doing I/O uses the appropriate base paths from `config.yaml` 82 | _Note: If you are using the docker container, then you do not need to change the paths in this file._ 83 | ## Serialized copy of the trained model 84 | You can download my artifacts folder which I used to generate my final submissions: [GoogleDrive](https://drive.google.com/file/d/1rg5m7xKXGdc3jnaI-QKLKtpwUPAmieeP/view?usp=sharing) 85 | 86 | -------------------------------------------------------------------------------- /source/models/se_resnet.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import math 3 | 4 | 5 | class SELayer(nn.Module): 6 | 7 | def __init__(self, channel, reduction=16): 8 | super(SELayer, self).__init__() 9 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 10 | self.fc = nn.Sequential( 11 | nn.Linear(channel, channel // reduction), 12 | nn.ReLU(inplace=True), 13 | nn.Linear(channel // reduction, channel), 14 | nn.Sigmoid()) 15 | 16 | def forward(self, x): 17 | b, c, _, _ = x.size() 18 | y = self.avg_pool(x).view(b, c) 19 | y = self.fc(y).view(b, c, 1, 1) 20 | return x * y 21 | 22 | 23 | class SEBottleneck(nn.Module): 24 | expansion = 4 25 | 26 | def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=16): 27 | super(SEBottleneck, self).__init__() 28 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) 29 | self.bn1 = nn.BatchNorm2d(planes) 30 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, bias=False) 31 | self.bn2 = nn.BatchNorm2d(planes) 32 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) 33 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 34 | self.relu = nn.ReLU(inplace=True) 35 | self.se = SELayer(planes * self.expansion, reduction) 36 | self.downsample = downsample 37 | self.stride = stride 38 | 39 | def forward(self, x): 40 | residual = x 41 | 42 | out = self.conv1(x) 43 | out = self.bn1(out) 44 | out = self.relu(out) 45 | 46 | out = self.conv2(out) 47 | out = self.bn2(out) 48 | out = self.relu(out) 49 | 50 | out = self.conv3(out) 51 | out = self.bn3(out) 52 | out = self.se(out) 53 | 54 | if self.downsample is not None: 55 | residual = self.downsample(x) 56 | 57 | out += residual 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class SEResNet(nn.Module): 64 | 65 | def __init__(self, block, layers): 66 | self.inplanes = 64 67 | super(SEResNet, self).__init__() 68 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 69 | bias=False) 70 | self.bn1 = nn.BatchNorm2d(64) 71 | self.relu = nn.ReLU(inplace=True) 72 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2) 73 | 74 | self.layer1 = self._make_layer(block, 64, layers[0]) 75 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 76 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 77 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 78 | self.avgpool = nn.AvgPool2d(5) 79 | 80 | for m in self.modules(): 81 | if isinstance(m, nn.Conv2d): 82 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 83 | m.weight.data.normal_(0, math.sqrt(2. / n)) 84 | elif isinstance(m, nn.BatchNorm2d): 85 | m.weight.data.fill_(1) 86 | m.bias.data.zero_() 87 | 88 | def _make_layer(self, block, planes, blocks, stride=1): 89 | downsample = None 90 | if stride != 1 or self.inplanes != planes * block.expansion: 91 | downsample = nn.Sequential( 92 | nn.Conv2d(self.inplanes, planes * block.expansion, 93 | kernel_size=1, stride=stride, padding=0, bias=False), 94 | nn.BatchNorm2d(planes * block.expansion)) 95 | 96 | layers = [] 97 | layers.append(block(self.inplanes, planes, stride, downsample)) 98 | self.inplanes = planes * block.expansion 99 | for i in range(1, blocks): 100 | layers.append(block(self.inplanes, planes)) 101 | 102 | return nn.Sequential(*layers) 103 | 104 | def forward(self, x): 105 | x = self.conv1(x) 106 | x = self.bn1(x) 107 | x = self.relu(x) 108 | 109 | x = self.maxpool(x) 110 | 111 | x = self.layer1(x) 112 | x = self.layer2(x) 113 | x = self.layer3(x) 114 | x = self.layer4(x) 115 | 116 | x = self.avgpool(x) 117 | x = x.view(x.size(0), -1) 118 | 119 | return x 120 | 121 | 122 | def se_resnet101(**kwargs): 123 | """Constructs a SE-ResNet-101 model. 124 | """ 125 | model = SEResNet(SEBottleneck, [3, 4, 23, 3], **kwargs) 126 | return model -------------------------------------------------------------------------------- /source/generate_submission.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import pickle 4 | import argparse 5 | 6 | import numpy as np 7 | import torch 8 | torch.backends.cudnn.benchmark=True 9 | from torch import nn 10 | from torch.nn import functional as F 11 | from torch.utils.data import DataLoader 12 | 13 | from datasets import LabeledImages 14 | from torchvision import transforms 15 | from models.se_resnet import se_resnet101 16 | 17 | 18 | CONFIG_PATH = '/source/config.yaml' 19 | 20 | 21 | class Classifier(nn.Module): 22 | def __init__(self, n_classes): 23 | super(Classifier, self).__init__() 24 | self.n_classes = n_classes 25 | self.features = se_resnet101() 26 | self.classifier = nn.Linear(2048, n_classes) 27 | 28 | def forward(self, x): 29 | x = self.features(x) 30 | x = self.classifier(x) 31 | 32 | return x 33 | 34 | 35 | class ModelWithLoss(nn.Module): 36 | def __init__(self, classifier): 37 | super(ModelWithLoss, self).__init__() 38 | self.classifier = classifier 39 | self.criterion = nn.CrossEntropyLoss(size_average=False) 40 | 41 | def forward(self, x, labels): 42 | assert len(x) == len(labels) 43 | 44 | predictions = self.classifier(x) 45 | 46 | all_classes = np.arange(self.classifier.n_classes, dtype=np.int64) 47 | zero_label = torch.tensor([0]).to(x.device) 48 | 49 | loss = 0 50 | denominator = 0 51 | for prediction, positives in zip(predictions, labels): 52 | negatives = np.setdiff1d(all_classes, positives, assume_unique=True) 53 | negatives_tensor = torch.tensor(negatives).to(x.device) 54 | positives_tensor = torch.tensor(positives).to(x.device).unsqueeze(dim=1) 55 | 56 | for positive in positives_tensor: 57 | indices = torch.cat((positive, negatives_tensor)) 58 | loss = loss + self.criterion(prediction[indices].unsqueeze(dim=0), zero_label) 59 | denominator += 1 60 | 61 | loss /= denominator 62 | 63 | return loss 64 | 65 | def predict(self, x, top_k): 66 | input_shape = x.shape 67 | if len(input_shape) == 5: 68 | x = x.view(-1, input_shape[2], input_shape[3], input_shape[4]) 69 | predictions = self.classifier(x) 70 | predictions = predictions.view(input_shape[0], input_shape[1], -1).mean(dim=1) 71 | else: 72 | predictions = self.classifier(x) 73 | 74 | scores, labels = predictions.sort(dim=1, descending=True) 75 | 76 | pred_scores = np.zeros(shape=(len(scores), top_k), dtype=np.float32) 77 | pred_labels = labels[:, :top_k].cpu().numpy() 78 | 79 | for i in range(top_k): 80 | i_scores = torch.cat((scores[:, i:i + 1], scores[:, top_k:]), dim=1) 81 | pred_scores[:, i] = F.softmax(i_scores, dim=1)[:, 0].cpu().numpy() 82 | 83 | return pred_scores, pred_labels 84 | 85 | 86 | def main(): 87 | parser = argparse.ArgumentParser(description='Generates submission (stage 1 or stage 2)') 88 | parser.add_argument('stage', type=int, choices=[1, 2]) 89 | args = parser.parse_args() 90 | 91 | with open(CONFIG_PATH, 'r') as f: 92 | config = yaml.load(f) 93 | 94 | dataset_root = config['DATASET']['STAGE{}_TEST_IMAGES_ROOT'.format(args.stage)] 95 | assert os.path.exists(dataset_root), dataset_root 96 | sample_submission_path = config['DATASET']['STAGE{}_SAMPLE_SUBMISSION_PATH'.format(args.stage)] 97 | assert os.path.exists(sample_submission_path), sample_submission_path 98 | 99 | output_root = config['SUBMISSION']['OUTPUT_ROOT'] 100 | assert os.path.exists(output_root), output_root 101 | 102 | test_list_path = os.path.join(output_root, 'test_stage{}.txt'.format(args.stage)) 103 | 104 | with open(sample_submission_path, 'r') as f_in, open(test_list_path, 'w') as f_out: 105 | f_in.readline() 106 | for line in f_in: 107 | img_id, _ = line.split(',') 108 | img_name = img_id + '.jpg' 109 | if os.path.exists(os.path.join(dataset_root, img_name)): 110 | f_out.write('{}\n'.format(img_name)) 111 | else: 112 | print('Warning: file {} does not exist'.format(os.path.join(dataset_root, img_name))) 113 | 114 | with open(config['DATASET']['NAME_TO_LABEL_PATH'], 'rb') as f: 115 | name_to_label = pickle.load(f) 116 | label_to_name = {label: name for name, label in name_to_label.items()} 117 | n_classes = len(name_to_label) 118 | 119 | classifier = Classifier(n_classes) 120 | model = ModelWithLoss(classifier).cuda().eval() 121 | 122 | snapshot_path = os.path.join(config['TUNING']['ARTIFACTS_ROOT'], 'snapshots', 123 | 'snapshot_epoch_{}.pth.tar'.format(config['TUNING']['N_EPOCH'])) 124 | state = torch.load(snapshot_path, map_location=lambda storage, loc: storage) 125 | model.load_state_dict(state['model']) 126 | 127 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 128 | to_tensor = transforms.Compose([transforms.ToTensor(), normalize]) 129 | tta_transform = transforms.Compose([transforms.Resize(256), 130 | transforms.TenCrop(224), 131 | transforms.Lambda( 132 | lambda crops: torch.stack([to_tensor(crop) for crop in crops]))]) 133 | dataset = LabeledImages(test_list_path, dataset_root, tta_transform) 134 | loader = DataLoader(dataset, config['SUBMISSION']['BATCH_SIZE'], num_workers=4, collate_fn=lambda X: X) 135 | 136 | pred_scores_all = [] 137 | pred_labels_all = [] 138 | 139 | for samples in loader: 140 | input_tensor = torch.stack([sample['image'] for sample in samples]).cuda() 141 | 142 | with torch.no_grad(): 143 | pred_scores, pred_labels = model.predict(input_tensor, config['SUBMISSION']['TOP_K']) 144 | pred_scores_all.extend(pred_scores) 145 | pred_labels_all.extend(pred_labels) 146 | 147 | image_id_to_names = {} 148 | 149 | threshold = config['SUBMISSION']['THRESHOLD'] 150 | min_preds = config['SUBMISSION']['MIN_PREDS'] 151 | max_preds = config['SUBMISSION']['MAX_PREDS'] 152 | 153 | for pred_scores, pred_labels, img_name in zip(pred_scores_all, pred_labels_all, dataset._content): 154 | best_indices = np.argsort(pred_scores) 155 | best_labels = pred_labels[best_indices] 156 | best_scores = pred_scores[best_indices] 157 | pred_labels = best_labels[best_scores > threshold] 158 | if len(pred_labels) > max_preds: 159 | pred_labels = pred_labels[-max_preds:] 160 | if len(pred_labels) >= min_preds: 161 | pred_names = [label_to_name[label] for label in pred_labels.tolist()] 162 | else: 163 | pred_names = [label_to_name[label] for label in best_labels[-min_preds:].tolist()] 164 | image_id = img_name.split('.')[0] 165 | image_id_to_names[image_id] = pred_names 166 | 167 | submission_path = os.path.join(output_root, 'submission_stage{}.csv'.format(args.stage)) 168 | 169 | with open(sample_submission_path, 'r') as f_in, open(submission_path, 'w') as f_out: 170 | f_out.write(f_in.readline()) 171 | for line in f_in: 172 | img_id, _ = line.split(',') 173 | if img_id in image_id_to_names: 174 | names = image_id_to_names[img_id] 175 | f_out.write('{},'.format(img_id)) 176 | f_out.write('{}\n'.format(' '.join(names))) 177 | else: 178 | f_out.write('{},\n'.format(img_id)) 179 | 180 | 181 | if __name__ == '__main__': 182 | main() 183 | -------------------------------------------------------------------------------- /source/train_se_resnet101.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import pickle 4 | import logging 5 | import random 6 | 7 | import numpy as np 8 | import torch 9 | torch.backends.cudnn.benchmark=True 10 | from torch import nn 11 | from torch.nn import functional as F 12 | from torch.utils.data import DataLoader 13 | 14 | from tensorboardX import SummaryWriter 15 | 16 | from datasets import LabeledImages 17 | from torchvision import transforms 18 | from models.se_resnet import se_resnet101 19 | 20 | from utils.average_precision_calculator import AveragePrecisionCalculator 21 | 22 | 23 | CONFIG_PATH = '/source/config.yaml' 24 | 25 | 26 | def get_path(rel_path, config): 27 | return os.path.join(config['TRAINING']['ARTIFACTS_ROOT'], rel_path) 28 | 29 | 30 | class Classifier(nn.Module): 31 | def __init__(self, n_classes): 32 | super(Classifier, self).__init__() 33 | self.n_classes = n_classes 34 | self.features = se_resnet101() 35 | self.classifier = nn.Linear(2048, n_classes) 36 | 37 | def forward(self, x): 38 | x = self.features(x) 39 | x = self.classifier(x) 40 | 41 | return x 42 | 43 | 44 | class ModelWithLoss(nn.Module): 45 | def __init__(self, classifier): 46 | super(ModelWithLoss, self).__init__() 47 | self.classifier = classifier 48 | self.criterion = nn.CrossEntropyLoss(size_average=False) 49 | 50 | def forward(self, x, labels): 51 | assert len(x) == len(labels) 52 | 53 | predictions = self.classifier(x) 54 | 55 | all_classes = np.arange(self.classifier.n_classes, dtype=np.int64) 56 | zero_label = torch.tensor([0]).to(x.device) 57 | 58 | loss = 0 59 | denominator = 0 60 | for prediction, positives in zip(predictions, labels): 61 | negatives = np.setdiff1d(all_classes, positives, assume_unique=True) 62 | negatives_tensor = torch.tensor(negatives).to(x.device) 63 | positives_tensor = torch.tensor(positives).to(x.device).unsqueeze(dim=1) 64 | 65 | for positive in positives_tensor: 66 | indices = torch.cat((positive, negatives_tensor)) 67 | loss = loss + self.criterion(prediction[indices].unsqueeze(dim=0), zero_label) 68 | denominator += 1 69 | 70 | loss /= denominator 71 | 72 | return loss 73 | 74 | def predict(self, x, top_k): 75 | predictions = self.classifier(x) 76 | scores, labels = predictions.sort(dim=1, descending=True) 77 | 78 | pred_scores = np.zeros(shape=(len(scores), top_k), dtype=np.float32) 79 | pred_labels = labels[:, :top_k].cpu().numpy() 80 | 81 | for i in range(top_k): 82 | i_scores = torch.cat((scores[:, i:i + 1], scores[:, top_k:]), dim=1) 83 | pred_scores[:, i] = F.softmax(i_scores, dim=1)[:, 0].cpu().numpy() 84 | 85 | return pred_scores, pred_labels 86 | 87 | 88 | class AverageMeter(object): 89 | def __init__(self): 90 | self.reset() 91 | 92 | def reset(self): 93 | self.val = 0 94 | self.avg = 0 95 | self.sum = 0 96 | self.count = 0 97 | 98 | def update(self, val, n=1): 99 | self.val = val 100 | self.sum += val * n 101 | self.count += n 102 | self.avg = self.sum / self.count 103 | 104 | 105 | def validate(model, val_dataset, batch_size, top_k): 106 | model.eval() 107 | 108 | predictions = [] 109 | actuals = [] 110 | total_num_positives = 0 111 | 112 | loss_meter = AverageMeter() 113 | 114 | val_loader = DataLoader(val_dataset, batch_size, num_workers=4, collate_fn=lambda X: X) 115 | for samples in val_loader: 116 | input_tensor = torch.stack([sample['image'] for sample in samples]).cuda() 117 | labels = [sample['labels'] for sample in samples] 118 | 119 | with torch.no_grad(): 120 | pred_scores, pred_labels = model.predict(input_tensor, top_k) 121 | loss = model(input_tensor, labels) 122 | 123 | loss_meter.update(loss.item(), len(samples)) 124 | 125 | cur_actuals = np.zeros_like(pred_labels, dtype=np.bool) 126 | for i in range(len(pred_labels)): 127 | assert len(labels[i]) <= top_k 128 | total_num_positives += len(labels[i]) 129 | for label in labels[i]: 130 | cur_actuals[i] = np.logical_or(cur_actuals[i], pred_labels[i] == label) 131 | 132 | predictions.extend(pred_scores.flatten().tolist()) 133 | actuals.extend(cur_actuals.astype(np.uint8).flatten().tolist()) 134 | 135 | gap = AveragePrecisionCalculator.ap_at_n(predictions, actuals, n=None, total_num_positives=total_num_positives) 136 | return loss_meter.avg, gap 137 | 138 | 139 | def adjust_learning_rate(optimizer, epoch, initial_lr, lr_decay, lr_step): 140 | lr = initial_lr * (lr_decay ** (epoch // lr_step)) 141 | for param_group in optimizer.param_groups: 142 | param_group['lr'] = lr 143 | 144 | 145 | def main(): 146 | with open(CONFIG_PATH, 'r') as f: 147 | config = yaml.load(f) 148 | os.makedirs(get_path('snapshots', config), exist_ok=True) 149 | 150 | logger = logging.getLogger('train') 151 | logger.setLevel(logging.DEBUG) 152 | fh = logging.FileHandler(get_path('train.log', config)) 153 | fh.setLevel(logging.DEBUG) 154 | ch = logging.StreamHandler() 155 | ch.setLevel(logging.DEBUG) 156 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 157 | fh.setFormatter(formatter) 158 | ch.setFormatter(formatter) 159 | logger.addHandler(fh) 160 | logger.addHandler(ch) 161 | 162 | random.seed(config['TRAINING']['SEED']) 163 | np.random.seed(config['TRAINING']['SEED']) 164 | torch.manual_seed(config['TRAINING']['SEED']) 165 | 166 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 167 | train_transform = transforms.Compose([transforms.Resize(256), 168 | transforms.RandomCrop(224), 169 | transforms.RandomHorizontalFlip(), 170 | transforms.ToTensor(), 171 | normalize]) 172 | val_transform = transforms.Compose([transforms.Resize(256), 173 | transforms.CenterCrop(224), 174 | transforms.ToTensor(), 175 | normalize]) 176 | train_dataset = LabeledImages(config['DATASET']['TRAIN_LIST_PATH'], config['DATASET']['RESIZED_IMAGES_ROOT'], 177 | train_transform) 178 | val_dataset = LabeledImages(config['DATASET']['VAL_LIST_PATH'], config['DATASET']['RESIZED_IMAGES_ROOT'], 179 | val_transform) 180 | logger.info('Train size: {}. Val size: {}.'.format(len(train_dataset), len(val_dataset))) 181 | 182 | with open(config['DATASET']['NAME_TO_LABEL_PATH'], 'rb') as f: 183 | name_to_label = pickle.load(f) 184 | n_classes = len(name_to_label) 185 | logger.info('Total number of classes: {}.'.format(n_classes)) 186 | 187 | classifier = Classifier(n_classes) 188 | model_with_loss = ModelWithLoss(classifier).cuda() 189 | 190 | optimizer = torch.optim.SGD(model_with_loss.parameters(), 191 | lr=config['TRAINING']['INITIAL_LR'], 192 | momentum=config['TRAINING']['MOMENTUM'], 193 | weight_decay=config['TRAINING']['WEIGHT_DECAY'], 194 | nesterov=True) 195 | 196 | loss_meter = AverageMeter() 197 | 198 | iteration = 0 199 | snapshots = [(int(path.split('_epoch_')[-1].split('.')[0]), path) for path in os.listdir(get_path('snapshots', config)) 200 | if path.startswith('snapshot') and path.endswith('.pth.tar')] 201 | if len(snapshots) > 0: 202 | snapshots.sort(key=lambda t: t[0]) 203 | logger.info('Finetuning from {}'.format(snapshots[-1][1])) 204 | state = torch.load(os.path.join(get_path('snapshots', config), snapshots[-1][1]), map_location=lambda storage, loc: storage) 205 | model_with_loss.load_state_dict(state['model']) 206 | optimizer.load_state_dict(state['optimizer']) 207 | start_epoch = state['epoch'] + 1 208 | else: 209 | start_epoch = 1 210 | writer = SummaryWriter(get_path(os.path.join('tensorboard', 'run_epoch_{}'.format(start_epoch)), config)) 211 | 212 | for epoch in range(start_epoch, config['TRAINING']['N_EPOCH'] + 1): 213 | adjust_learning_rate(optimizer, epoch, config['TRAINING']['INITIAL_LR'], config['TRAINING']['LR_DECAY'], 214 | config['TRAINING']['LR_STEP']) 215 | logger.info('Start epoch {} / {}.'.format(epoch, config['TRAINING']['N_EPOCH'])) 216 | val_loss, val_gap = validate(model_with_loss, val_dataset, config['TRAINING']['BATCH_SIZE'], 217 | config['VALIDATION']['TOP_K']) 218 | logger.info('Val loss: {}. Val GAP: {}.'.format(val_loss, val_gap)) 219 | writer.add_scalar('val_loss', val_loss, iteration) 220 | writer.add_scalar('val_gap', val_gap, iteration) 221 | 222 | model_with_loss.train() 223 | train_loader = DataLoader(train_dataset, 224 | batch_size=config['TRAINING']['BATCH_SIZE'], 225 | shuffle=True, 226 | num_workers=4, 227 | collate_fn=lambda X: X, 228 | drop_last=True) 229 | for samples in train_loader: 230 | input_tensor = torch.stack([sample['image'] for sample in samples]).cuda() 231 | labels = [sample['labels'] for sample in samples] 232 | 233 | optimizer.zero_grad() 234 | loss = model_with_loss(input_tensor, labels) 235 | loss.backward() 236 | optimizer.step() 237 | 238 | loss_meter.update(loss.item(), len(input_tensor)) 239 | 240 | if iteration % config['TRAINING']['LOG_FREQUENCY'] == 0: 241 | logger.info('Iteration {}. Loss {}.'.format(iteration, loss_meter.avg)) 242 | writer.add_scalar('train_loss', loss_meter.avg, iteration) 243 | loss_meter.reset() 244 | 245 | for i, param_group in enumerate(optimizer.param_groups): 246 | writer.add_scalar('lr/group_{}'.format(i), param_group['lr'], iteration) 247 | 248 | iteration += 1 249 | 250 | state = {'model': model_with_loss.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch} 251 | torch.save(state, get_path(os.path.join('snapshots', 'snapshot_epoch_{}.pth.tar'.format(epoch)), config)) 252 | 253 | 254 | if __name__ == '__main__': 255 | main() -------------------------------------------------------------------------------- /source/utils/average_precision_calculator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS-IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Calculate or keep track of the interpolated average precision. 16 | 17 | It provides an interface for calculating interpolated average precision for an 18 | entire list or the top-n ranked items. For the definition of the 19 | (non-)interpolated average precision: 20 | http://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf 21 | 22 | Example usages: 23 | 1) Use it as a static function call to directly calculate average precision for 24 | a short ranked list in the memory. 25 | 26 | ``` 27 | import random 28 | 29 | p = np.array([random.random() for _ in xrange(10)]) 30 | a = np.array([random.choice([0, 1]) for _ in xrange(10)]) 31 | 32 | ap = average_precision_calculator.AveragePrecisionCalculator.ap(p, a) 33 | ``` 34 | 35 | 2) Use it as an object for long ranked list that cannot be stored in memory or 36 | the case where partial predictions can be observed at a time (Tensorflow 37 | predictions). In this case, we first call the function accumulate many times 38 | to process parts of the ranked list. After processing all the parts, we call 39 | peek_interpolated_ap_at_n. 40 | ``` 41 | p1 = np.array([random.random() for _ in xrange(5)]) 42 | a1 = np.array([random.choice([0, 1]) for _ in xrange(5)]) 43 | p2 = np.array([random.random() for _ in xrange(5)]) 44 | a2 = np.array([random.choice([0, 1]) for _ in xrange(5)]) 45 | 46 | # interpolated average precision at 10 using 1000 break points 47 | calculator = average_precision_calculator.AveragePrecisionCalculator(10) 48 | calculator.accumulate(p1, a1) 49 | calculator.accumulate(p2, a2) 50 | ap3 = calculator.peek_ap_at_n() 51 | ``` 52 | """ 53 | 54 | import heapq 55 | import random 56 | import numbers 57 | 58 | import numpy 59 | 60 | 61 | class AveragePrecisionCalculator(object): 62 | """Calculate the average precision and average precision at n.""" 63 | 64 | def __init__(self, top_n=None): 65 | """Construct an AveragePrecisionCalculator to calculate average precision. 66 | 67 | This class is used to calculate the average precision for a single label. 68 | 69 | Args: 70 | top_n: A positive Integer specifying the average precision at n, or 71 | None to use all provided data points. 72 | 73 | Raises: 74 | ValueError: An error occurred when the top_n is not a positive integer. 75 | """ 76 | if not ((isinstance(top_n, int) and top_n >= 0) or top_n is None): 77 | raise ValueError("top_n must be a positive integer or None.") 78 | 79 | self._top_n = top_n # average precision at n 80 | self._total_positives = 0 # total number of positives have seen 81 | self._heap = [] # max heap of (prediction, actual) 82 | 83 | @property 84 | def heap_size(self): 85 | """Gets the heap size maintained in the class.""" 86 | return len(self._heap) 87 | 88 | @property 89 | def num_accumulated_positives(self): 90 | """Gets the number of positive samples that have been accumulated.""" 91 | return self._total_positives 92 | 93 | def accumulate(self, predictions, actuals, num_positives=None): 94 | """Accumulate the predictions and their ground truth labels. 95 | 96 | After the function call, we may call peek_ap_at_n to actually calculate 97 | the average precision. 98 | Note predictions and actuals must have the same shape. 99 | 100 | Args: 101 | predictions: a list storing the prediction scores. 102 | actuals: a list storing the ground truth labels. Any value 103 | larger than 0 will be treated as positives, otherwise as negatives. 104 | num_positives = If the 'predictions' and 'actuals' inputs aren't complete, 105 | then it's possible some true positives were missed in them. In that case, 106 | you can provide 'num_positives' in order to accurately track recall. 107 | 108 | Raises: 109 | ValueError: An error occurred when the format of the input is not the 110 | numpy 1-D array or the shape of predictions and actuals does not match. 111 | """ 112 | if len(predictions) != len(actuals): 113 | raise ValueError("the shape of predictions and actuals does not match.") 114 | 115 | if not num_positives is None: 116 | if not isinstance(num_positives, numbers.Number) or num_positives < 0: 117 | raise ValueError("'num_positives' was provided but it wan't a nonzero number.") 118 | 119 | if not num_positives is None: 120 | self._total_positives += num_positives 121 | else: 122 | self._total_positives += numpy.size(numpy.where(actuals > 0)) 123 | topk = self._top_n 124 | heap = self._heap 125 | 126 | for i in range(numpy.size(predictions)): 127 | if topk is None or len(heap) < topk: 128 | heapq.heappush(heap, (predictions[i], actuals[i])) 129 | else: 130 | if predictions[i] > heap[0][0]: # heap[0] is the smallest 131 | heapq.heappop(heap) 132 | heapq.heappush(heap, (predictions[i], actuals[i])) 133 | 134 | def clear(self): 135 | """Clear the accumulated predictions.""" 136 | self._heap = [] 137 | self._total_positives = 0 138 | 139 | def peek_ap_at_n(self): 140 | """Peek the non-interpolated average precision at n. 141 | 142 | Returns: 143 | The non-interpolated average precision at n (default 0). 144 | If n is larger than the length of the ranked list, 145 | the average precision will be returned. 146 | """ 147 | if self.heap_size <= 0: 148 | return 0 149 | predlists = numpy.array(list(zip(*self._heap))) 150 | 151 | ap = self.ap_at_n(predlists[0], 152 | predlists[1], 153 | n=self._top_n, 154 | total_num_positives=self._total_positives) 155 | return ap 156 | 157 | @staticmethod 158 | def ap(predictions, actuals): 159 | """Calculate the non-interpolated average precision. 160 | 161 | Args: 162 | predictions: a numpy 1-D array storing the sparse prediction scores. 163 | actuals: a numpy 1-D array storing the ground truth labels. Any value 164 | larger than 0 will be treated as positives, otherwise as negatives. 165 | 166 | Returns: 167 | The non-interpolated average precision at n. 168 | If n is larger than the length of the ranked list, 169 | the average precision will be returned. 170 | 171 | Raises: 172 | ValueError: An error occurred when the format of the input is not the 173 | numpy 1-D array or the shape of predictions and actuals does not match. 174 | """ 175 | return AveragePrecisionCalculator.ap_at_n(predictions, 176 | actuals, 177 | n=None) 178 | 179 | @staticmethod 180 | def ap_at_n(predictions, actuals, n=20, total_num_positives=None): 181 | """Calculate the non-interpolated average precision. 182 | 183 | Args: 184 | predictions: a numpy 1-D array storing the sparse prediction scores. 185 | actuals: a numpy 1-D array storing the ground truth labels. Any value 186 | larger than 0 will be treated as positives, otherwise as negatives. 187 | n: the top n items to be considered in ap@n. 188 | total_num_positives : (optionally) you can specify the number of total 189 | positive 190 | in the list. If specified, it will be used in calculation. 191 | 192 | Returns: 193 | The non-interpolated average precision at n. 194 | If n is larger than the length of the ranked list, 195 | the average precision will be returned. 196 | 197 | Raises: 198 | ValueError: An error occurred when 199 | 1) the format of the input is not the numpy 1-D array; 200 | 2) the shape of predictions and actuals does not match; 201 | 3) the input n is not a positive integer. 202 | """ 203 | if len(predictions) != len(actuals): 204 | raise ValueError("the shape of predictions and actuals does not match.") 205 | 206 | if n is not None: 207 | if not isinstance(n, int) or n <= 0: 208 | raise ValueError("n must be 'None' or a positive integer." 209 | " It was '%s'." % n) 210 | 211 | ap = 0.0 212 | 213 | predictions = numpy.array(predictions) 214 | actuals = numpy.array(actuals) 215 | 216 | # add a shuffler to avoid overestimating the ap 217 | predictions, actuals = AveragePrecisionCalculator._shuffle(predictions, 218 | actuals) 219 | sortidx = sorted( 220 | range(len(predictions)), 221 | key=lambda k: predictions[k], 222 | reverse=True) 223 | 224 | if total_num_positives is None: 225 | numpos = numpy.size(numpy.where(actuals > 0)) 226 | else: 227 | numpos = total_num_positives 228 | 229 | if numpos == 0: 230 | return 0 231 | 232 | if n is not None: 233 | numpos = min(numpos, n) 234 | delta_recall = 1.0 / numpos 235 | poscount = 0.0 236 | 237 | # calculate the ap 238 | r = len(sortidx) 239 | if n is not None: 240 | r = min(r, n) 241 | for i in range(r): 242 | if actuals[sortidx[i]] > 0: 243 | poscount += 1 244 | ap += poscount / (i + 1) * delta_recall 245 | return ap 246 | 247 | @staticmethod 248 | def _shuffle(predictions, actuals): 249 | random.seed(0) 250 | suffidx = random.sample(range(len(predictions)), len(predictions)) 251 | predictions = predictions[suffidx] 252 | actuals = actuals[suffidx] 253 | return predictions, actuals 254 | 255 | @staticmethod 256 | def _zero_one_normalize(predictions, epsilon=1e-7): 257 | """Normalize the predictions to the range between 0.0 and 1.0. 258 | 259 | For some predictions like SVM predictions, we need to normalize them before 260 | calculate the interpolated average precision. The normalization will not 261 | change the rank in the original list and thus won't change the average 262 | precision. 263 | 264 | Args: 265 | predictions: a numpy 1-D array storing the sparse prediction scores. 266 | epsilon: a small constant to avoid denominator being zero. 267 | 268 | Returns: 269 | The normalized prediction. 270 | """ 271 | denominator = numpy.max(predictions) - numpy.min(predictions) 272 | ret = (predictions - numpy.min(predictions)) / numpy.max(denominator, 273 | epsilon) 274 | return ret 275 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /source/tune_se_resnet101.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import pickle 4 | import logging 5 | import random 6 | 7 | import numpy as np 8 | import torch 9 | torch.backends.cudnn.benchmark=True 10 | from torch import nn 11 | from torch.nn import functional as F 12 | from torch.utils.data import DataLoader 13 | from torch.utils.data.sampler import SubsetRandomSampler 14 | 15 | from tensorboardX import SummaryWriter 16 | 17 | from datasets import LabeledImages 18 | from torchvision import transforms 19 | from models.se_resnet import se_resnet101 20 | 21 | 22 | CONFIG_PATH = '/source/config.yaml' 23 | 24 | 25 | def get_path(rel_path, config): 26 | return os.path.join(config['TUNING']['ARTIFACTS_ROOT'], rel_path) 27 | 28 | 29 | class Classifier(nn.Module): 30 | def __init__(self, n_classes): 31 | super(Classifier, self).__init__() 32 | self.n_classes = n_classes 33 | self.features = se_resnet101() 34 | self.classifier = nn.Linear(2048, n_classes) 35 | 36 | def forward(self, x): 37 | x = self.features(x) 38 | x = self.classifier(x) 39 | 40 | return x 41 | 42 | 43 | class ModelWithLoss(nn.Module): 44 | def __init__(self, classifier): 45 | super(ModelWithLoss, self).__init__() 46 | self.classifier = classifier 47 | self.criterion = nn.CrossEntropyLoss(size_average=False) 48 | 49 | def forward(self, x, labels): 50 | assert len(x) == len(labels) 51 | 52 | predictions = self.classifier(x) 53 | 54 | all_classes = np.arange(self.classifier.n_classes, dtype=np.int64) 55 | zero_label = torch.tensor([0]).to(x.device) 56 | 57 | loss = 0 58 | denominator = 0 59 | for prediction, positives in zip(predictions, labels): 60 | negatives = np.setdiff1d(all_classes, positives, assume_unique=True) 61 | negatives_tensor = torch.tensor(negatives).to(x.device) 62 | positives_tensor = torch.tensor(positives).to(x.device).unsqueeze(dim=1) 63 | 64 | for positive in positives_tensor: 65 | indices = torch.cat((positive, negatives_tensor)) 66 | loss = loss + self.criterion(prediction[indices].unsqueeze(dim=0), zero_label) 67 | denominator += 1 68 | 69 | loss /= denominator 70 | 71 | return loss 72 | 73 | def predict(self, x, top_k): 74 | predictions = self.classifier(x) 75 | scores, labels = predictions.sort(dim=1, descending=True) 76 | 77 | pred_scores = np.zeros(shape=(len(scores), top_k), dtype=np.float32) 78 | pred_labels = labels[:, :top_k].cpu().numpy() 79 | 80 | for i in range(top_k): 81 | i_scores = torch.cat((scores[:, i:i + 1], scores[:, top_k:]), dim=1) 82 | pred_scores[:, i] = F.softmax(i_scores, dim=1)[:, 0].cpu().numpy() 83 | 84 | return pred_scores, pred_labels 85 | 86 | 87 | class AverageMeter(object): 88 | def __init__(self): 89 | self.reset() 90 | 91 | def reset(self): 92 | self.val = 0 93 | self.avg = 0 94 | self.sum = 0 95 | self.count = 0 96 | 97 | def update(self, val, n=1): 98 | self.val = val 99 | self.sum += val * n 100 | self.count += n 101 | self.avg = self.sum / self.count 102 | 103 | 104 | def calculate_f2_measure(pred_labels, pred_scores, gt_labels, thresh): 105 | pred_labels = pred_labels[pred_scores > thresh] 106 | pred_labels = set(pred_labels.tolist()) 107 | gt_labels = set(gt_labels.tolist()) 108 | tp = pred_labels & gt_labels 109 | prec = (len(tp) / len(pred_labels)) if len(pred_labels) > 0 else 1 110 | rec = (len(tp) / len(gt_labels)) if len(gt_labels) > 0 else 1 111 | f2_measure = ((5 * prec * rec) / (4 * prec + rec)) if (4 * prec + rec) > 0 else 0 112 | 113 | return f2_measure 114 | 115 | 116 | def validate(model, dataset, indices, batch_size, top_k): 117 | model.eval() 118 | sampler = SubsetRandomSampler(indices) 119 | loader = DataLoader(dataset, batch_size, sampler=sampler, num_workers=0, collate_fn=lambda X: X) 120 | 121 | pred_scores_all = [] 122 | pred_labels_all = [] 123 | gt_labels_all = [] 124 | 125 | for samples in loader: 126 | input_tensor = torch.stack([sample['image'] for sample in samples]).cuda() 127 | labels = [sample['labels'] for sample in samples] 128 | 129 | with torch.no_grad(): 130 | pred_scores, pred_labels = model.predict(input_tensor, top_k) 131 | pred_scores_all.extend(pred_scores) 132 | pred_labels_all.extend(pred_labels) 133 | gt_labels_all.extend(labels) 134 | 135 | thresholds = np.arange(0.0, 1.0, 0.05, dtype=np.float64) 136 | results = np.zeros_like(thresholds) 137 | 138 | for i, thresh in enumerate(thresholds): 139 | f2_measure = 0 140 | for pred_scores, pred_labels, gt_labels in zip(pred_scores_all, pred_labels_all, gt_labels_all): 141 | f2_measure += calculate_f2_measure(pred_labels, pred_scores, gt_labels, thresh) 142 | results[i] = f2_measure / len(gt_labels_all) 143 | 144 | best_idx = np.argmax(results) 145 | 146 | return results[best_idx], thresholds[best_idx] 147 | 148 | 149 | def adjust_learning_rate(optimizer, epoch, initial_lr, lr_decay, lr_step): 150 | lr = initial_lr * (lr_decay ** (epoch // lr_step)) 151 | for param_group in optimizer.param_groups: 152 | param_group['lr'] = lr 153 | 154 | 155 | def main(): 156 | with open(CONFIG_PATH, 'r') as f: 157 | config = yaml.load(f) 158 | os.makedirs(get_path('snapshots', config), exist_ok=True) 159 | 160 | logger = logging.getLogger('tuning') 161 | logger.setLevel(logging.DEBUG) 162 | fh = logging.FileHandler(get_path('tuning.log', config)) 163 | fh.setLevel(logging.DEBUG) 164 | ch = logging.StreamHandler() 165 | ch.setLevel(logging.DEBUG) 166 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 167 | fh.setFormatter(formatter) 168 | ch.setFormatter(formatter) 169 | logger.addHandler(fh) 170 | logger.addHandler(ch) 171 | 172 | random.seed(config['TUNING']['SEED']) 173 | np.random.seed(config['TUNING']['SEED']) 174 | torch.manual_seed(config['TUNING']['SEED']) 175 | 176 | with open(config['DATASET']['NAME_TO_LABEL_PATH'], 'rb') as f: 177 | name_to_label = pickle.load(f) 178 | n_classes = len(name_to_label) 179 | logger.info('Total number of classes: {}.'.format(n_classes)) 180 | 181 | img_path_to_labels = {} 182 | with open(config['DATASET']['TUNING_LABELS_PATH'], 'r') as f: 183 | for line in f: 184 | img_id, names = line.strip().split(',') 185 | img_path = img_id + '.jpg' 186 | labels = [name_to_label[name] for name in names.split(' ')] 187 | img_path_to_labels[img_path] = labels 188 | 189 | content = sorted(img_path_to_labels) 190 | with open(config['DATASET']['TUNING_LIST_PATH'], 'w') as f: 191 | for img_path in content: 192 | labels = img_path_to_labels[img_path] 193 | line = ' '.join([img_path] + [str(label) for label in labels]) 194 | f.write('{}\n'.format(line)) 195 | 196 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 197 | train_transform = transforms.Compose([transforms.Resize(256), 198 | transforms.RandomCrop(224), 199 | transforms.RandomHorizontalFlip(), 200 | transforms.ToTensor(), 201 | normalize]) 202 | val_transform = transforms.Compose([transforms.Resize(256), 203 | transforms.CenterCrop(224), 204 | transforms.ToTensor(), 205 | normalize]) 206 | train_dataset = LabeledImages(config['DATASET']['TUNING_LIST_PATH'], config['DATASET']['STAGE1_TEST_IMAGES_ROOT'], 207 | train_transform) 208 | val_dataset = LabeledImages(config['DATASET']['TUNING_LIST_PATH'], config['DATASET']['STAGE1_TEST_IMAGES_ROOT'], 209 | val_transform) 210 | 211 | indices = np.arange(len(train_dataset), dtype=np.int64) 212 | np.random.shuffle(indices) 213 | train_size = round(config['DATASET']['TUNING_TRAIN_RATIO'] * len(indices)) 214 | train_indices = indices[:train_size] 215 | val_indices = indices[train_size:] 216 | 217 | logger.info('Train size: {}. Val size: {}.'.format(len(train_indices), len(val_indices))) 218 | 219 | classifier = Classifier(n_classes) 220 | model_with_loss = ModelWithLoss(classifier).cuda() 221 | 222 | initial_weights_path = os.path.join(config['TRAINING']['ARTIFACTS_ROOT'], 'snapshots', 223 | 'snapshot_epoch_{}.pth.tar'.format(config['TRAINING']['N_EPOCH'])) 224 | logger.info('Finetuning from {}'.format(initial_weights_path)) 225 | state = torch.load(initial_weights_path, map_location=lambda storage, loc: storage) 226 | model_with_loss.load_state_dict(state['model']) 227 | 228 | optimizer = torch.optim.SGD(model_with_loss.parameters(), 229 | lr=config['TUNING']['INITIAL_LR'], 230 | momentum=config['TUNING']['MOMENTUM'], 231 | weight_decay=config['TUNING']['WEIGHT_DECAY'], 232 | nesterov=True) 233 | 234 | loss_meter = AverageMeter() 235 | 236 | iteration = 0 237 | snapshots = [(int(path.split('_epoch_')[-1].split('.')[0]), path) for path in os.listdir(get_path('snapshots', config)) 238 | if path.startswith('snapshot') and path.endswith('.pth.tar')] 239 | if len(snapshots) > 0: 240 | snapshots.sort(key=lambda t: t[0]) 241 | logger.info('Finetuning from {}'.format(snapshots[-1][1])) 242 | state = torch.load(os.path.join(get_path('snapshots', config), snapshots[-1][1]), map_location=lambda storage, loc: storage) 243 | model_with_loss.load_state_dict(state['model']) 244 | optimizer.load_state_dict(state['optimizer']) 245 | start_epoch = state['epoch'] + 1 246 | else: 247 | start_epoch = 1 248 | writer = SummaryWriter(get_path(os.path.join('tensorboard', 'run_epoch_{}'.format(start_epoch)), config)) 249 | 250 | for epoch in range(start_epoch, config['TUNING']['N_EPOCH'] + 1): 251 | adjust_learning_rate(optimizer, epoch, config['TUNING']['INITIAL_LR'], config['TUNING']['LR_DECAY'], 252 | config['TUNING']['LR_STEP']) 253 | logger.info('Start epoch {} / {}.'.format(epoch, config['TUNING']['N_EPOCH'])) 254 | val_score, val_thresh = validate(model_with_loss, val_dataset, val_indices, config['TUNING']['BATCH_SIZE'], 255 | config['VALIDATION']['TOP_K']) 256 | logger.info('Val score: {}. Val thresh: {}.'.format(val_score, val_thresh)) 257 | writer.add_scalar('val_score', val_score, iteration) 258 | writer.add_scalar('val_thresh', val_thresh, iteration) 259 | 260 | model_with_loss.train() 261 | sampler = SubsetRandomSampler(train_indices) 262 | train_loader = DataLoader(train_dataset, 263 | batch_size=config['TUNING']['BATCH_SIZE'], 264 | sampler=sampler, 265 | num_workers=4, 266 | collate_fn=lambda X: X, 267 | drop_last=True) 268 | for samples in train_loader: 269 | input_tensor = torch.stack([sample['image'] for sample in samples]).cuda() 270 | labels = [sample['labels'] for sample in samples] 271 | 272 | optimizer.zero_grad() 273 | loss = model_with_loss(input_tensor, labels) 274 | loss.backward() 275 | optimizer.step() 276 | 277 | loss_meter.update(loss.item(), len(input_tensor)) 278 | 279 | if iteration % config['TUNING']['LOG_FREQUENCY'] == 0: 280 | logger.info('Iteration {}. Loss {}.'.format(iteration, loss_meter.avg)) 281 | writer.add_scalar('train_loss', loss_meter.avg, iteration) 282 | loss_meter.reset() 283 | 284 | for i, param_group in enumerate(optimizer.param_groups): 285 | writer.add_scalar('lr/group_{}'.format(i), param_group['lr'], iteration) 286 | 287 | iteration += 1 288 | 289 | state = {'model': model_with_loss.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch} 290 | torch.save(state, get_path(os.path.join('snapshots', 'snapshot_epoch_{}.pth.tar'.format(epoch)), config)) 291 | 292 | 293 | if __name__ == '__main__': 294 | main() --------------------------------------------------------------------------------