├── requirements.txt
├── src
    ├── ddp
    │   ├── run_ddp.sh
    │   ├── utils.py
    │   ├── main.py
    │   ├── config.py
    │   ├── net.py
    │   ├── dataset.py
    │   └── trainer.py
    ├── dp
    │   ├── run_dp.sh
    │   ├── main.py
    │   ├── utils.py
    │   ├── config.py
    │   ├── net.py
    │   ├── dataset.py
    │   └── trainer.py
    └── single
    │   ├── run_single.sh
    │   ├── main.py
    │   ├── utils.py
    │   ├── config.py
    │   ├── net.py
    │   ├── dataset.py
    │   └── trainer.py
├── Dockerfile
├── README.md
├── .gitignore
└── LICENSE


/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorboard==2.4.1
2 | tensorboardX==2.1
3 | torch==1.7.1
4 | torchvision==0.8.2
5 | tqdm==4.49.0
6 | PyYAML==5.4.1
7 | 


--------------------------------------------------------------------------------
/src/ddp/run_ddp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | EPOCH=50
 4 | BATCH_SIZE=256
 5 | 
 6 | LR=0.1
 7 | LR_DECAY_STEP_SIZE=25
 8 | LR_DECAY_GAMMA=0.1
 9 | WEIGHT_DECAY=0.0001
10 | 
11 | SEED=42
12 | 
13 | python src/ddp/main.py\
14 |         --seed=${SEED}\
15 |         --epoch=${EPOCH}\
16 |         --batch-size=${BATCH_SIZE}\
17 |         --lr=${LR}\
18 |         --weight-decay=${WEIGHT_DECAY}\
19 |         --lr-decay-step-size=${LR_DECAY_STEP_SIZE}\
20 |         --lr-decay-gamma=${LR_DECAY_GAMMA}\
21 |         --amp\
22 |         --contain-test
23 | 


--------------------------------------------------------------------------------
/src/dp/run_dp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | EPOCH=50
 4 | BATCH_SIZE=128
 5 | 
 6 | LR=0.1
 7 | LR_DECAY_STEP_SIZE=25
 8 | LR_DECAY_GAMMA=0.1
 9 | WEIGHT_DECAY=0.0001
10 | 
11 | SEED=42
12 | 
13 | python src/dp/main.py\
14 |         --seed=${SEED}\
15 |         --epoch=${EPOCH}\
16 |         --batch-size=${BATCH_SIZE}\
17 |         --lr=${LR}\
18 |         --weight-decay=${WEIGHT_DECAY}\
19 |         --lr-decay-step-size=${LR_DECAY_STEP_SIZE}\
20 |         --lr-decay-gamma=${LR_DECAY_GAMMA}\
21 |         --amp\
22 |         --contain-test
23 | 


--------------------------------------------------------------------------------
/src/single/run_single.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | EPOCH=50
 4 | BATCH_SIZE=128
 5 | 
 6 | LR=0.1
 7 | LR_DECAY_STEP_SIZE=25
 8 | LR_DECAY_GAMMA=0.1
 9 | WEIGHT_DECAY=0.0001
10 | 
11 | SEED=42
12 | 
13 | python src/single/main.py\
14 |         --seed=${SEED}\
15 |         --epoch=${EPOCH}\
16 |         --batch-size=${BATCH_SIZE}\
17 |         --lr=${LR}\
18 |         --weight-decay=${WEIGHT_DECAY}\
19 |         --lr-decay-step-size=${LR_DECAY_STEP_SIZE}\
20 |         --lr-decay-gamma=${LR_DECAY_GAMMA}\
21 |         --amp\
22 |         --contain-test
23 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
 2 | ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
 3 | ENV LC_ALL=C.UTF-8
 4 | 
 5 | ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
 6 | RUN apt-get update -y
 7 | 
 8 | RUN apt-get update \
 9 |     && apt-get install -y python3-pip python3-dev \
10 |     && cd /usr/local/bin \
11 |     && ln -s /usr/bin/python3 python \
12 |     && pip3 install --upgrade pip
13 | 
14 | COPY requirements.txt /tmp
15 | WORKDIR /tmp
16 | RUN pip install -r requirements.txt
17 | 
18 | ARG UNAME
19 | ARG UID
20 | ARG GID
21 | RUN groupadd -g $GID -o $UNAME
22 | RUN useradd -m -u $UID -g $GID -o -s /bin/bash $UNAME
23 | USER $UNAME


--------------------------------------------------------------------------------
/src/dp/main.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | 
 4 | import torch
 5 | 
 6 | from config import load_config
 7 | from net import ResNet18
 8 | from trainer import Trainer
 9 | from utils import fix_seed
10 | 
11 | 
12 | def main(hparams):
13 |     fix_seed(hparams.seed)
14 |     scaler = torch.cuda.amp.GradScaler() if hparams.amp else None
15 |     model = ResNet18()
16 | 
17 |     # training phase
18 |     trainer = Trainer(hparams, model, scaler)
19 |     version = trainer.fit()
20 | 
21 |     # testing phase
22 |     if hparams.contain_test:
23 |         state_dict = torch.load(
24 |             glob.glob(
25 |                 os.path.join(hparams.ckpt_path, f"version-{version}/best_model_*.pt")
26 |             )[0]
27 |         )
28 |         trainer.test(state_dict)
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     hparams = load_config()
33 |     main(hparams)
34 | 


--------------------------------------------------------------------------------
/src/single/main.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | 
 4 | import torch
 5 | 
 6 | from config import load_config
 7 | from net import ResNet18
 8 | from trainer import Trainer
 9 | from utils import fix_seed
10 | 
11 | 
12 | def main(hparams):
13 |     fix_seed(hparams.seed)
14 |     scaler = torch.cuda.amp.GradScaler() if hparams.amp else None
15 |     model = ResNet18()
16 | 
17 |     # training phase
18 |     trainer = Trainer(hparams, model, scaler)
19 |     version = trainer.fit()
20 | 
21 |     # testing phase
22 |     if hparams.contain_test:
23 |         state_dict = torch.load(
24 |             glob.glob(
25 |                 os.path.join(hparams.ckpt_path, f"version-{version}/best_model_*.pt")
26 |             )[0]
27 |         )
28 |         trainer.test(state_dict)
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     hparams = load_config()
33 |     main(hparams)
34 | 


--------------------------------------------------------------------------------
/src/dp/utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | 
 7 | def fix_seed(seed: int) -> None:
 8 |     torch.manual_seed(seed)
 9 |     torch.cuda.manual_seed(seed)
10 |     torch.cuda.manual_seed_all(seed)
11 |     torch.backends.cudnn.deterministic = True
12 |     torch.backends.cudnn.benchmark = False
13 |     np.random.seed(seed)
14 |     random.seed(seed)
15 | 
16 | 
17 | def accuracy(output, target, topk=(1,)):
18 |     """Computes the precision@k for the specified values of k"""
19 |     maxk = max(topk)
20 |     batch_size = target.size(0)
21 | 
22 |     _, pred = output.topk(maxk, 1, True, True)
23 |     pred = pred.t()
24 |     correct = pred.eq(target.reshape(1, -1).expand_as(pred))
25 | 
26 |     res = []
27 |     for k in topk:
28 |         correct_k = correct[:k].reshape(-1).float().sum(0)
29 |         res.append(correct_k.mul_(100.0 / batch_size))
30 |     return res
31 | 
32 | 
33 | class AverageMeter:
34 |     def __init__(self):
35 |         self.reset()
36 | 
37 |     def reset(self):
38 |         self.val = 0
39 |         self.avg = 0
40 |         self.sum = 0
41 |         self.count = 0
42 | 
43 |     def update(self, val: float, n: int = 1):
44 |         self.val = val
45 |         self.sum += val * n
46 |         self.count += n
47 |         self.avg = self.sum / self.count
48 | 


--------------------------------------------------------------------------------
/src/single/utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | 
 7 | def fix_seed(seed: int) -> None:
 8 |     torch.manual_seed(seed)
 9 |     torch.cuda.manual_seed(seed)
10 |     torch.cuda.manual_seed_all(seed)
11 |     torch.backends.cudnn.deterministic = True
12 |     torch.backends.cudnn.benchmark = False
13 |     np.random.seed(seed)
14 |     random.seed(seed)
15 | 
16 | 
17 | def accuracy(output, target, topk=(1,)):
18 |     """Computes the precision@k for the specified values of k"""
19 |     maxk = max(topk)
20 |     batch_size = target.size(0)
21 | 
22 |     _, pred = output.topk(maxk, 1, True, True)
23 |     pred = pred.t()
24 |     correct = pred.eq(target.reshape(1, -1).expand_as(pred))
25 | 
26 |     res = []
27 |     for k in topk:
28 |         correct_k = correct[:k].reshape(-1).float().sum(0)
29 |         res.append(correct_k.mul_(100.0 / batch_size))
30 |     return res
31 | 
32 | 
33 | class AverageMeter:
34 |     def __init__(self):
35 |         self.reset()
36 | 
37 |     def reset(self):
38 |         self.val = 0
39 |         self.avg = 0
40 |         self.sum = 0
41 |         self.count = 0
42 | 
43 |     def update(self, val: float, n: int = 1):
44 |         self.val = val
45 |         self.sum += val * n
46 |         self.count += n
47 |         self.avg = self.sum / self.count
48 | 


--------------------------------------------------------------------------------
/src/ddp/utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | import torch.distributed as dist
 6 | 
 7 | 
 8 | def fix_seed(seed: int) -> None:
 9 |     torch.manual_seed(seed)
10 |     torch.cuda.manual_seed(seed)
11 |     torch.cuda.manual_seed_all(seed)
12 |     torch.backends.cudnn.deterministic = True
13 |     torch.backends.cudnn.benchmark = False
14 |     np.random.seed(seed)
15 |     random.seed(seed)
16 | 
17 | 
18 | def accuracy(output, target, topk=(1,)):
19 |     """Computes the precision@k for the specified values of k"""
20 |     maxk = max(topk)
21 |     batch_size = target.size(0)
22 | 
23 |     _, pred = output.topk(maxk, 1, True, True)
24 |     pred = pred.t()
25 |     correct = pred.eq(target.reshape(1, -1).expand_as(pred))
26 | 
27 |     res = []
28 |     for k in topk:
29 |         correct_k = correct[:k].reshape(-1).float().sum(0)
30 |         res.append(correct_k.mul_(100.0 / batch_size))
31 |     return res
32 | 
33 | 
34 | class AverageMeter:
35 |     def __init__(self):
36 |         self.reset()
37 | 
38 |     def reset(self):
39 |         self.val = 0
40 |         self.avg = 0
41 |         self.sum = 0
42 |         self.count = 0
43 | 
44 |     def update(self, val: float, n: int = 1):
45 |         self.val = val
46 |         self.sum += val * n
47 |         self.count += n
48 |         self.avg = self.sum / self.count
49 | 


--------------------------------------------------------------------------------
/src/dp/config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def load_config():
 5 |     parser = argparse.ArgumentParser()
 6 | 
 7 |     # default hparams
 8 |     parser.add_argument("--dset", type=str, default="cifar100")
 9 |     parser.add_argument("--dpath", type=str, default="data/")
10 |     parser.add_argument("--ckpt-path", type=str, default="src/dp/checkpoints/")
11 | 
12 |     parser.add_argument("--seed", type=int, default=42, help="Seed for reproducibility")
13 |     parser.add_argument("--workers", type=int, default=4)
14 |     parser.add_argument("--eval-step", type=int, default=300)
15 |     parser.add_argument(
16 |         "--amp", action="store_true", default=False, help="PyTorch(>=1.6.x) AMP"
17 |     )
18 |     parser.add_argument("--contain-test", action="store_true", default=False)
19 | 
20 |     # training hparams
21 |     parser.add_argument("--epoch", type=int, default=100)
22 |     parser.add_argument("--batch-size", type=int, default=128)
23 |     parser.add_argument("--model", type=str, default="resnet18")
24 | 
25 |     parser.add_argument("--lr", type=float, default=0.1)
26 |     parser.add_argument("--weight-decay", type=float, default=0.0001)
27 |     parser.add_argument("--lr-decay-step-size", type=int, default=60)
28 |     parser.add_argument("--lr-decay-gamma", type=float, default=0.1)
29 | 
30 |     args = parser.parse_args()
31 |     return args


--------------------------------------------------------------------------------
/src/single/config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def load_config():
 5 |     parser = argparse.ArgumentParser()
 6 | 
 7 |     # default hparams
 8 |     parser.add_argument("--dset", type=str, default="cifar100")
 9 |     parser.add_argument("--dpath", type=str, default="data/")
10 |     parser.add_argument("--ckpt-path", type=str, default="src/single/checkpoints/")
11 | 
12 |     parser.add_argument("--seed", type=int, default=42, help="Seed for reproducibility")
13 |     parser.add_argument("--workers", type=int, default=4)
14 |     parser.add_argument("--eval-step", type=int, default=300)
15 |     parser.add_argument(
16 |         "--amp", action="store_true", default=False, help="PyTorch(>=1.6.x) AMP"
17 |     )
18 |     parser.add_argument("--contain-test", action="store_true", default=False)
19 | 
20 |     # training hparams
21 |     parser.add_argument("--epoch", type=int, default=200)
22 |     parser.add_argument("--batch-size", type=int, default=128)
23 |     parser.add_argument("--model", type=str, default="resnet18")
24 | 
25 |     parser.add_argument("--lr", type=float, default=0.1)
26 |     parser.add_argument("--weight-decay", type=float, default=0.0001)
27 |     parser.add_argument("--lr-decay-step-size", type=int, default=60)
28 |     parser.add_argument("--lr-decay-gamma", type=float, default=0.1)
29 | 
30 |     args = parser.parse_args()
31 |     return args


--------------------------------------------------------------------------------
/src/ddp/main.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | 
 4 | import torch
 5 | import torch.distributed as dist
 6 | import torch.multiprocessing as mp
 7 | 
 8 | from config import load_config
 9 | from net import ResNet18
10 | from trainer import Trainer
11 | from utils import fix_seed
12 | 
13 | 
14 | def main_worker(rank, ngpus_per_node, hparams):
15 |     print(f"Use GPU {rank} for training")
16 |     fix_seed(hparams.seed)
17 |     hparams.rank = hparams.rank * ngpus_per_node + rank
18 |     dist.init_process_group(
19 |         backend=hparams.dist_backend,
20 |         init_method=hparams.dist_url,
21 |         world_size=hparams.world_size,
22 |         rank=hparams.rank,
23 |     )
24 | 
25 |     scaler = torch.cuda.amp.GradScaler() if hparams.amp else None
26 |     model = ResNet18()
27 | 
28 |     # training phase
29 |     trainer = Trainer(hparams, model, scaler, rank, ngpus_per_node)
30 |     version = trainer.fit()
31 | 
32 |     # testing phase
33 |     if rank == 0 and hparams.contain_test:
34 |         state_dict = torch.load(
35 |             glob.glob(
36 |                 os.path.join(hparams.ckpt_path, f"version-{version}/best_model_*.pt")
37 |             )[0]
38 |         )
39 |         trainer.test(state_dict)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     hparams = load_config()
44 | 
45 |     # 'world_size' means total number of processes to run
46 |     ngpus_per_node = torch.cuda.device_count()
47 |     hparams.world_size = ngpus_per_node * hparams.world_size
48 | 
49 |     mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, hparams))
50 | 


--------------------------------------------------------------------------------
/src/ddp/config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def load_config():
 5 |     parser = argparse.ArgumentParser()
 6 | 
 7 |     # default hparams
 8 |     parser.add_argument("--dset", type=str, default="cifar100")
 9 |     parser.add_argument("--dpath", type=str, default="data/")
10 |     parser.add_argument("--ckpt-path", type=str, default="src/ddp/checkpoints/")
11 | 
12 |     parser.add_argument("--seed", type=int, default=42, help="Seed for reproducibility")
13 |     parser.add_argument("--workers", type=int, default=4)
14 |     parser.add_argument("--eval-step", type=int, default=300)
15 |     parser.add_argument(
16 |         "--amp", action="store_true", default=False, help="PyTorch(>=1.6.x) AMP"
17 |     )
18 |     parser.add_argument("--contain-test", action="store_true", default=False)
19 | 
20 |     # ddp hparams
21 |     parser.add_argument(
22 |         "--world-size", type=int, default=1, help="Total number of processes to run"
23 |     )
24 |     parser.add_argument("--rank", type=int, default=0)
25 |     parser.add_argument("--dist-backend", type=str, default="nccl")
26 |     parser.add_argument("--dist-url", default="tcp://127.0.0.1:3456", type=str)
27 | 
28 |     # training hparams
29 |     parser.add_argument("--epoch", type=int, default=100)
30 |     parser.add_argument("--batch-size", type=int, default=128)
31 |     parser.add_argument("--model", type=str, default="resnet18")
32 | 
33 |     parser.add_argument("--lr", type=float, default=0.1)
34 |     parser.add_argument("--weight-decay", type=float, default=0.0001)
35 |     parser.add_argument("--lr-decay-step-size", type=int, default=60)
36 |     parser.add_argument("--lr-decay-gamma", type=float, default=0.1)
37 | 
38 |     args = parser.parse_args()
39 |     return args


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Distributed Training in PyTorch
 2 | 
 3 | There are some distributed training steps you can try according to [PyTorch Document](https://pytorch.org/tutorials/beginner/dist_overview.html).
 4 | 
 5 | 
 6 | > PyTorch provides several options for data-parallel training. For applications that gradually grow from simple to complex and from prototype to production, the common development trajectory would be:
 7 | > 1. Use **single-device** training, if the data and model can fit in one GPU, and the training speed is not a concern.
 8 | > 2. Use **single-machine multi-GPU DataParallel**, if there are multiple GPUs on the server, and you would like to speed up training with the minimum code change.
 9 | Use single-machine multi-GPU DistributedDataParallel, if you would like to further speed up training and are willing to write a little more code to set it up.
10 | > 3. Use **multi-machine DistributedDataParallel** and the launching script, if the application needs to scale across machine boundaries.
11 | > 4. Use torchelastic to launch distributed training, if errors (e.g., OOM) are expected or if the resources can join and leave dynamically during the training.
12 | 
13 | 
14 | In this repo, I compared **single-device(1)** with **single-machine multi-GPU DataParallel(2)** and **single-machine multi-GPU DistributedDataParallel**.
15 | 
16 | ## Environment
17 | - Nvidia RTX 2080ti * 2
18 | - torch==1.7.1
19 | - torchvision==0.8.2
20 | 
21 | All dependencies are written in [requirements.txt](https://github.com/youngerous/distributed-training-comparison/blob/main/requirements.txt), and you can also access through [Dockerfile](https://github.com/youngerous/distributed-training-comparison/blob/main/Dockerfile).
22 | 
23 | ## How to Run
24 | All three folders - ```src/single/```, ```src/dp/```, and ```src/ddp/``` - are independent structures.
25 | 
26 | ### Single
27 | ```sh
28 | $ sh src/single/run_single.sh
29 | ```
30 | ### DataParallel
31 | ```sh
32 | $ sh src/dp/run_dp.sh
33 | ```
34 | ### DistributedDataParallel
35 | ```sh
36 | $ sh src/ddp/run_ddp.sh
37 | ```
38 | 
39 | ## Result
40 | Batch size is set to 128 or 256. It is recommended to use [SyncBatchNorm](https://pytorch.org/docs/stable/generated/torch.nn.SyncBatchNorm.html) in DDP training, but I used vanila BatchNorm so just trained on 256 batch size. Best model is selected according to validation top-1 accuracy.
41 | 
42 | 
43 | And I did not care detailed hyperparameter settings, so you can change some settings in order to improve performance (e.g. using ADAM optimizer).
44 | 
45 | |  Dataset  |   Model   | Test Loss  | Top-1 Acc  | Top-5 Acc  | Batch Size |            Method             |
46 | | :-------: | :-------: | :--------: | :--------: | :--------: | :--------: | :---------------------------: |
47 | | CIFAR-100 | ResNet-18 |   1.3728   |   70.99%   |   91.57%   |    128     |            Single             |
48 | | CIFAR-100 | ResNet-18 |   1.3394   |   70.64%   |   91.60%   |    256     |            Single             |
49 | | CIFAR-100 | ResNet-18 |   1.2974   | **71.48%** |   91.65%   |    128     |       DataParallel (DP)       |
50 | | CIFAR-100 | ResNet-18 |   1.3373   |   71.20%   |   91.53%   |    256     |       DataParallel (DP)       |
51 | | CIFAR-100 | ResNet-18 | **1.2268** |   71.17%   | **91.84%** |    256     | DistributedDataParallel (DDP) |
52 | 
53 | - Experiment results are averaged value of random seed 2, 4, 42.
54 | - Automatic Mixed Precision(AMP) is applied to every experiment.
55 | 
56 | ## Reference
57 | - [[Docs] Distributed Communication Package - torch.distributed](https://pytorch.org/docs/stable/distributed.html#)
58 | - [[Post] Technologies behind Distributed Deep Learning - AllReduce :: Keisuke Fukuda](https://tech.preferred.jp/en/blog/technologies-behind-distributed-deep-learning-allreduce/)
59 | - [[Post] PyTorch Distributed Training :: leimao blog](https://leimao.github.io/blog/PyTorch-Distributed-Training/)
60 | - [[Post] Distributed data parallel training in Pytorch :: yangkky blog](https://yangkky.github.io/2019/07/08/distributed-pytorch-tutorial.html)
61 | - [[Repo] PyTorch Official Example](https://github.com/pytorch/examples/blob/master/imagenet/main.py)
62 | - [[Repo] pytorch-distributed :: tczhangzhi](https://github.com/tczhangzhi/pytorch-distributed)
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | checkpoints/
  2 | data/
  3 | result.csv
  4 | single.txt
  5 | dp.txt
  6 | ddp.txt
  7 | 
  8 | # Created by https://www.toptal.com/developers/gitignore/api/macos,windows,python,jupyternotebooks
  9 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos,windows,python,jupyternotebooks
 10 | 
 11 | ### JupyterNotebooks ###
 12 | # gitignore template for Jupyter Notebooks
 13 | # website: http://jupyter.org/
 14 | 
 15 | .ipynb_checkpoints
 16 | */.ipynb_checkpoints/*
 17 | 
 18 | # IPython
 19 | profile_default/
 20 | ipython_config.py
 21 | 
 22 | # Remove previous ipynb_checkpoints
 23 | #   git rm -r .ipynb_checkpoints/
 24 | 
 25 | ### macOS ###
 26 | # General
 27 | .DS_Store
 28 | .AppleDouble
 29 | .LSOverride
 30 | 
 31 | # Icon must end with two \r
 32 | Icon
 33 | 
 34 | 
 35 | # Thumbnails
 36 | ._*
 37 | 
 38 | # Files that might appear in the root of a volume
 39 | .DocumentRevisions-V100
 40 | .fseventsd
 41 | .Spotlight-V100
 42 | .TemporaryItems
 43 | .Trashes
 44 | .VolumeIcon.icns
 45 | .com.apple.timemachine.donotpresent
 46 | 
 47 | # Directories potentially created on remote AFP share
 48 | .AppleDB
 49 | .AppleDesktop
 50 | Network Trash Folder
 51 | Temporary Items
 52 | .apdisk
 53 | 
 54 | ### Python ###
 55 | # Byte-compiled / optimized / DLL files
 56 | __pycache__/
 57 | *.py[cod]
 58 | *$py.class
 59 | 
 60 | # C extensions
 61 | *.so
 62 | 
 63 | # Distribution / packaging
 64 | .Python
 65 | build/
 66 | develop-eggs/
 67 | dist/
 68 | downloads/
 69 | eggs/
 70 | .eggs/
 71 | lib/
 72 | lib64/
 73 | parts/
 74 | sdist/
 75 | var/
 76 | wheels/
 77 | pip-wheel-metadata/
 78 | share/python-wheels/
 79 | *.egg-info/
 80 | .installed.cfg
 81 | *.egg
 82 | MANIFEST
 83 | 
 84 | # PyInstaller
 85 | #  Usually these files are written by a python script from a template
 86 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 87 | *.manifest
 88 | *.spec
 89 | 
 90 | # Installer logs
 91 | pip-log.txt
 92 | pip-delete-this-directory.txt
 93 | 
 94 | # Unit test / coverage reports
 95 | htmlcov/
 96 | .tox/
 97 | .nox/
 98 | .coverage
 99 | .coverage.*
100 | .cache
101 | nosetests.xml
102 | coverage.xml
103 | *.cover
104 | *.py,cover
105 | .hypothesis/
106 | .pytest_cache/
107 | pytestdebug.log
108 | 
109 | # Translations
110 | *.mo
111 | *.pot
112 | 
113 | # Django stuff:
114 | *.log
115 | local_settings.py
116 | db.sqlite3
117 | db.sqlite3-journal
118 | 
119 | # Flask stuff:
120 | instance/
121 | .webassets-cache
122 | 
123 | # Scrapy stuff:
124 | .scrapy
125 | 
126 | # Sphinx documentation
127 | docs/_build/
128 | doc/_build/
129 | 
130 | # PyBuilder
131 | target/
132 | 
133 | # Jupyter Notebook
134 | 
135 | # IPython
136 | 
137 | # pyenv
138 | .python-version
139 | 
140 | # pipenv
141 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
142 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
143 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
144 | #   install all needed dependencies.
145 | #Pipfile.lock
146 | 
147 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
148 | __pypackages__/
149 | 
150 | # Celery stuff
151 | celerybeat-schedule
152 | celerybeat.pid
153 | 
154 | # SageMath parsed files
155 | *.sage.py
156 | 
157 | # Environments
158 | .env
159 | .venv
160 | env/
161 | venv/
162 | ENV/
163 | env.bak/
164 | venv.bak/
165 | pythonenv*
166 | 
167 | # Spyder project settings
168 | .spyderproject
169 | .spyproject
170 | 
171 | # Rope project settings
172 | .ropeproject
173 | 
174 | # mkdocs documentation
175 | /site
176 | 
177 | # mypy
178 | .mypy_cache/
179 | .dmypy.json
180 | dmypy.json
181 | 
182 | # Pyre type checker
183 | .pyre/
184 | 
185 | # pytype static type analyzer
186 | .pytype/
187 | 
188 | # profiling data
189 | .prof
190 | 
191 | ### Windows ###
192 | # Windows thumbnail cache files
193 | Thumbs.db
194 | Thumbs.db:encryptable
195 | ehthumbs.db
196 | ehthumbs_vista.db
197 | 
198 | # Dump file
199 | *.stackdump
200 | 
201 | # Folder config file
202 | [Dd]esktop.ini
203 | 
204 | # Recycle Bin used on file shares
205 | $RECYCLE.BIN/
206 | 
207 | # Windows Installer files
208 | *.cab
209 | *.msi
210 | *.msix
211 | *.msm
212 | *.msp
213 | 
214 | # Windows shortcuts
215 | *.lnk
216 | 
217 | # End of https://www.toptal.com/developers/gitignore/api/macos,windows,python,jupyternotebooks


--------------------------------------------------------------------------------
/src/ddp/net.py:
--------------------------------------------------------------------------------
  1 | """ResNet in PyTorch.
  2 | For Pre-activation ResNet, see 'preact_resnet.py'.
  3 | Reference:
  4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  5 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
  6 | Ref: https://github.com/kuangliu/pytorch-cifar
  7 | """
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | 
 12 | 
 13 | class BasicBlock(nn.Module):
 14 |     expansion = 1
 15 | 
 16 |     def __init__(self, in_planes, planes, stride=1):
 17 |         super(BasicBlock, self).__init__()
 18 |         self.conv1 = nn.Conv2d(
 19 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
 20 |         )
 21 |         self.bn1 = nn.BatchNorm2d(planes)
 22 |         self.conv2 = nn.Conv2d(
 23 |             planes, planes, kernel_size=3, stride=1, padding=1, bias=False
 24 |         )
 25 |         self.bn2 = nn.BatchNorm2d(planes)
 26 | 
 27 |         self.shortcut = nn.Sequential()
 28 |         if stride != 1 or in_planes != self.expansion * planes:
 29 |             self.shortcut = nn.Sequential(
 30 |                 nn.Conv2d(
 31 |                     in_planes,
 32 |                     self.expansion * planes,
 33 |                     kernel_size=1,
 34 |                     stride=stride,
 35 |                     bias=False,
 36 |                 ),
 37 |                 nn.BatchNorm2d(self.expansion * planes),
 38 |             )
 39 | 
 40 |     def forward(self, x):
 41 |         out = F.relu(self.bn1(self.conv1(x)))
 42 |         out = self.bn2(self.conv2(out))
 43 |         out += self.shortcut(x)
 44 |         out = F.relu(out)
 45 |         return out
 46 | 
 47 | 
 48 | class Bottleneck(nn.Module):
 49 |     expansion = 4
 50 | 
 51 |     def __init__(self, in_planes, planes, stride=1):
 52 |         super(Bottleneck, self).__init__()
 53 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 54 |         self.bn1 = nn.BatchNorm2d(planes)
 55 |         self.conv2 = nn.Conv2d(
 56 |             planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
 57 |         )
 58 |         self.bn2 = nn.BatchNorm2d(planes)
 59 |         self.conv3 = nn.Conv2d(
 60 |             planes, self.expansion * planes, kernel_size=1, bias=False
 61 |         )
 62 |         self.bn3 = nn.BatchNorm2d(self.expansion * planes)
 63 | 
 64 |         self.shortcut = nn.Sequential()
 65 |         if stride != 1 or in_planes != self.expansion * planes:
 66 |             self.shortcut = nn.Sequential(
 67 |                 nn.Conv2d(
 68 |                     in_planes,
 69 |                     self.expansion * planes,
 70 |                     kernel_size=1,
 71 |                     stride=stride,
 72 |                     bias=False,
 73 |                 ),
 74 |                 nn.BatchNorm2d(self.expansion * planes),
 75 |             )
 76 | 
 77 |     def forward(self, x):
 78 |         out = F.relu(self.bn1(self.conv1(x)))
 79 |         out = F.relu(self.bn2(self.conv2(out)))
 80 |         out = self.bn3(self.conv3(out))
 81 |         out += self.shortcut(x)
 82 |         out = F.relu(out)
 83 |         return out
 84 | 
 85 | 
 86 | class ResNet(nn.Module):
 87 |     def __init__(self, block, num_blocks, num_classes=100):
 88 |         super(ResNet, self).__init__()
 89 |         self.in_planes = 64
 90 | 
 91 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 92 |         self.bn1 = nn.BatchNorm2d(64)
 93 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 94 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 95 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 96 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 97 |         self.linear = nn.Linear(512 * block.expansion, num_classes)
 98 | 
 99 |     def _make_layer(self, block, planes, num_blocks, stride):
100 |         strides = [stride] + [1] * (num_blocks - 1)
101 |         layers = []
102 |         for stride in strides:
103 |             layers.append(block(self.in_planes, planes, stride))
104 |             self.in_planes = planes * block.expansion
105 |         return nn.Sequential(*layers)
106 | 
107 |     def forward(self, x):
108 |         out = F.relu(self.bn1(self.conv1(x)))
109 |         out = self.layer1(out)
110 |         out = self.layer2(out)
111 |         out = self.layer3(out)
112 |         out = self.layer4(out)
113 |         out = F.avg_pool2d(out, 4)
114 |         out = out.view(out.size(0), -1)
115 |         out = self.linear(out)
116 |         return out
117 | 
118 | 
119 | def ResNet18():
120 |     return ResNet(BasicBlock, [2, 2, 2, 2])
121 | 
122 | 
123 | def ResNet34():
124 |     return ResNet(BasicBlock, [3, 4, 6, 3])
125 | 
126 | 
127 | def ResNet50():
128 |     return ResNet(Bottleneck, [3, 4, 6, 3])
129 | 
130 | 
131 | def ResNet101():
132 |     return ResNet(Bottleneck, [3, 4, 23, 3])
133 | 
134 | 
135 | def ResNet152():
136 |     return ResNet(Bottleneck, [3, 8, 36, 3])
137 | 
138 | 
139 | def test():
140 |     net = ResNet18()
141 |     y = net(torch.randn(1, 3, 32, 32))
142 |     print(y.size())
143 | 
144 | 
145 | # test()


--------------------------------------------------------------------------------
/src/dp/net.py:
--------------------------------------------------------------------------------
  1 | """ResNet in PyTorch.
  2 | For Pre-activation ResNet, see 'preact_resnet.py'.
  3 | Reference:
  4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  5 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
  6 | Ref: https://github.com/kuangliu/pytorch-cifar
  7 | """
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | 
 12 | 
 13 | class BasicBlock(nn.Module):
 14 |     expansion = 1
 15 | 
 16 |     def __init__(self, in_planes, planes, stride=1):
 17 |         super(BasicBlock, self).__init__()
 18 |         self.conv1 = nn.Conv2d(
 19 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
 20 |         )
 21 |         self.bn1 = nn.BatchNorm2d(planes)
 22 |         self.conv2 = nn.Conv2d(
 23 |             planes, planes, kernel_size=3, stride=1, padding=1, bias=False
 24 |         )
 25 |         self.bn2 = nn.BatchNorm2d(planes)
 26 | 
 27 |         self.shortcut = nn.Sequential()
 28 |         if stride != 1 or in_planes != self.expansion * planes:
 29 |             self.shortcut = nn.Sequential(
 30 |                 nn.Conv2d(
 31 |                     in_planes,
 32 |                     self.expansion * planes,
 33 |                     kernel_size=1,
 34 |                     stride=stride,
 35 |                     bias=False,
 36 |                 ),
 37 |                 nn.BatchNorm2d(self.expansion * planes),
 38 |             )
 39 | 
 40 |     def forward(self, x):
 41 |         out = F.relu(self.bn1(self.conv1(x)))
 42 |         out = self.bn2(self.conv2(out))
 43 |         out += self.shortcut(x)
 44 |         out = F.relu(out)
 45 |         return out
 46 | 
 47 | 
 48 | class Bottleneck(nn.Module):
 49 |     expansion = 4
 50 | 
 51 |     def __init__(self, in_planes, planes, stride=1):
 52 |         super(Bottleneck, self).__init__()
 53 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 54 |         self.bn1 = nn.BatchNorm2d(planes)
 55 |         self.conv2 = nn.Conv2d(
 56 |             planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
 57 |         )
 58 |         self.bn2 = nn.BatchNorm2d(planes)
 59 |         self.conv3 = nn.Conv2d(
 60 |             planes, self.expansion * planes, kernel_size=1, bias=False
 61 |         )
 62 |         self.bn3 = nn.BatchNorm2d(self.expansion * planes)
 63 | 
 64 |         self.shortcut = nn.Sequential()
 65 |         if stride != 1 or in_planes != self.expansion * planes:
 66 |             self.shortcut = nn.Sequential(
 67 |                 nn.Conv2d(
 68 |                     in_planes,
 69 |                     self.expansion * planes,
 70 |                     kernel_size=1,
 71 |                     stride=stride,
 72 |                     bias=False,
 73 |                 ),
 74 |                 nn.BatchNorm2d(self.expansion * planes),
 75 |             )
 76 | 
 77 |     def forward(self, x):
 78 |         out = F.relu(self.bn1(self.conv1(x)))
 79 |         out = F.relu(self.bn2(self.conv2(out)))
 80 |         out = self.bn3(self.conv3(out))
 81 |         out += self.shortcut(x)
 82 |         out = F.relu(out)
 83 |         return out
 84 | 
 85 | 
 86 | class ResNet(nn.Module):
 87 |     def __init__(self, block, num_blocks, num_classes=100):
 88 |         super(ResNet, self).__init__()
 89 |         self.in_planes = 64
 90 | 
 91 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 92 |         self.bn1 = nn.BatchNorm2d(64)
 93 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 94 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 95 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 96 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 97 |         self.linear = nn.Linear(512 * block.expansion, num_classes)
 98 | 
 99 |     def _make_layer(self, block, planes, num_blocks, stride):
100 |         strides = [stride] + [1] * (num_blocks - 1)
101 |         layers = []
102 |         for stride in strides:
103 |             layers.append(block(self.in_planes, planes, stride))
104 |             self.in_planes = planes * block.expansion
105 |         return nn.Sequential(*layers)
106 | 
107 |     def forward(self, x):
108 |         out = F.relu(self.bn1(self.conv1(x)))
109 |         out = self.layer1(out)
110 |         out = self.layer2(out)
111 |         out = self.layer3(out)
112 |         out = self.layer4(out)
113 |         out = F.avg_pool2d(out, 4)
114 |         out = out.view(out.size(0), -1)
115 |         out = self.linear(out)
116 |         return out
117 | 
118 | 
119 | def ResNet18():
120 |     return ResNet(BasicBlock, [2, 2, 2, 2])
121 | 
122 | 
123 | def ResNet34():
124 |     return ResNet(BasicBlock, [3, 4, 6, 3])
125 | 
126 | 
127 | def ResNet50():
128 |     return ResNet(Bottleneck, [3, 4, 6, 3])
129 | 
130 | 
131 | def ResNet101():
132 |     return ResNet(Bottleneck, [3, 4, 23, 3])
133 | 
134 | 
135 | def ResNet152():
136 |     return ResNet(Bottleneck, [3, 8, 36, 3])
137 | 
138 | 
139 | def test():
140 |     net = ResNet18()
141 |     y = net(torch.randn(1, 3, 32, 32))
142 |     print(y.size())
143 | 
144 | 
145 | # test()


--------------------------------------------------------------------------------
/src/single/net.py:
--------------------------------------------------------------------------------
  1 | """ResNet in PyTorch.
  2 | For Pre-activation ResNet, see 'preact_resnet.py'.
  3 | Reference:
  4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  5 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
  6 | Ref: https://github.com/kuangliu/pytorch-cifar
  7 | """
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | 
 12 | 
 13 | class BasicBlock(nn.Module):
 14 |     expansion = 1
 15 | 
 16 |     def __init__(self, in_planes, planes, stride=1):
 17 |         super(BasicBlock, self).__init__()
 18 |         self.conv1 = nn.Conv2d(
 19 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
 20 |         )
 21 |         self.bn1 = nn.BatchNorm2d(planes)
 22 |         self.conv2 = nn.Conv2d(
 23 |             planes, planes, kernel_size=3, stride=1, padding=1, bias=False
 24 |         )
 25 |         self.bn2 = nn.BatchNorm2d(planes)
 26 | 
 27 |         self.shortcut = nn.Sequential()
 28 |         if stride != 1 or in_planes != self.expansion * planes:
 29 |             self.shortcut = nn.Sequential(
 30 |                 nn.Conv2d(
 31 |                     in_planes,
 32 |                     self.expansion * planes,
 33 |                     kernel_size=1,
 34 |                     stride=stride,
 35 |                     bias=False,
 36 |                 ),
 37 |                 nn.BatchNorm2d(self.expansion * planes),
 38 |             )
 39 | 
 40 |     def forward(self, x):
 41 |         out = F.relu(self.bn1(self.conv1(x)))
 42 |         out = self.bn2(self.conv2(out))
 43 |         out += self.shortcut(x)
 44 |         out = F.relu(out)
 45 |         return out
 46 | 
 47 | 
 48 | class Bottleneck(nn.Module):
 49 |     expansion = 4
 50 | 
 51 |     def __init__(self, in_planes, planes, stride=1):
 52 |         super(Bottleneck, self).__init__()
 53 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 54 |         self.bn1 = nn.BatchNorm2d(planes)
 55 |         self.conv2 = nn.Conv2d(
 56 |             planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
 57 |         )
 58 |         self.bn2 = nn.BatchNorm2d(planes)
 59 |         self.conv3 = nn.Conv2d(
 60 |             planes, self.expansion * planes, kernel_size=1, bias=False
 61 |         )
 62 |         self.bn3 = nn.BatchNorm2d(self.expansion * planes)
 63 | 
 64 |         self.shortcut = nn.Sequential()
 65 |         if stride != 1 or in_planes != self.expansion * planes:
 66 |             self.shortcut = nn.Sequential(
 67 |                 nn.Conv2d(
 68 |                     in_planes,
 69 |                     self.expansion * planes,
 70 |                     kernel_size=1,
 71 |                     stride=stride,
 72 |                     bias=False,
 73 |                 ),
 74 |                 nn.BatchNorm2d(self.expansion * planes),
 75 |             )
 76 | 
 77 |     def forward(self, x):
 78 |         out = F.relu(self.bn1(self.conv1(x)))
 79 |         out = F.relu(self.bn2(self.conv2(out)))
 80 |         out = self.bn3(self.conv3(out))
 81 |         out += self.shortcut(x)
 82 |         out = F.relu(out)
 83 |         return out
 84 | 
 85 | 
 86 | class ResNet(nn.Module):
 87 |     def __init__(self, block, num_blocks, num_classes=100):
 88 |         super(ResNet, self).__init__()
 89 |         self.in_planes = 64
 90 | 
 91 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 92 |         self.bn1 = nn.BatchNorm2d(64)
 93 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 94 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 95 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 96 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 97 |         self.linear = nn.Linear(512 * block.expansion, num_classes)
 98 | 
 99 |     def _make_layer(self, block, planes, num_blocks, stride):
100 |         strides = [stride] + [1] * (num_blocks - 1)
101 |         layers = []
102 |         for stride in strides:
103 |             layers.append(block(self.in_planes, planes, stride))
104 |             self.in_planes = planes * block.expansion
105 |         return nn.Sequential(*layers)
106 | 
107 |     def forward(self, x):
108 |         out = F.relu(self.bn1(self.conv1(x)))
109 |         out = self.layer1(out)
110 |         out = self.layer2(out)
111 |         out = self.layer3(out)
112 |         out = self.layer4(out)
113 |         out = F.avg_pool2d(out, 4)
114 |         out = out.view(out.size(0), -1)
115 |         out = self.linear(out)
116 |         return out
117 | 
118 | 
119 | def ResNet18():
120 |     return ResNet(BasicBlock, [2, 2, 2, 2])
121 | 
122 | 
123 | def ResNet34():
124 |     return ResNet(BasicBlock, [3, 4, 6, 3])
125 | 
126 | 
127 | def ResNet50():
128 |     return ResNet(Bottleneck, [3, 4, 6, 3])
129 | 
130 | 
131 | def ResNet101():
132 |     return ResNet(Bottleneck, [3, 4, 23, 3])
133 | 
134 | 
135 | def ResNet152():
136 |     return ResNet(Bottleneck, [3, 8, 36, 3])
137 | 
138 | 
139 | def test():
140 |     net = ResNet18()
141 |     y = net(torch.randn(1, 3, 32, 32))
142 |     print(y.size())
143 | 
144 | 
145 | # test()


--------------------------------------------------------------------------------
/src/dp/dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Ref: https://gist.github.com/kevinzakka/d33bf8d6c7f06a9d8c76d97a7879f5cb
  3 | Create train, valid, test iterators for CIFAR-100 [1].
  4 | [1]: https://discuss.pytorch.org/t/feedback-on-pytorch-for-kaggle-competitions/2252/4
  5 | """
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from torch.utils.data.sampler import SubsetRandomSampler
 10 | from torchvision import datasets, transforms
 11 | 
 12 | 
 13 | def get_trn_val_loader(
 14 |     data_dir: str,
 15 |     batch_size: int,
 16 |     valid_size: float = 0.1,
 17 |     shuffle: bool = True,
 18 |     num_workers: int = 1,
 19 |     pin_memory: bool = True,
 20 | ):
 21 |     """
 22 |     Utility function for loading and returning train and valid
 23 |     multi-process iterators over the CIFAR-100 dataset. A sample
 24 |     9x9 grid of the images can be optionally displayed.
 25 |     If using CUDA, num_workers should be set to 1 and pin_memory to True.
 26 |     :param data_dir: path directory to the dataset.
 27 |     :param batch_size: how many samples per batch to load.
 28 |     :param valid_size: percentage split of the training set used for
 29 |                         the validation set. Should be a float in the range [0, 1].
 30 |     :param shuffle: whether to shuffle the train/validation indices.
 31 |     :param num_workers: number of subprocesses to use when loading the dataset.
 32 |     :param pin_memory: whether to copy tensors into CUDA pinned memory.
 33 |                         Set it to True if using GPU.
 34 | 
 35 |     :return train_loader: training set iterator.
 36 |     :return valid_loader: validation set iterator.
 37 |     """
 38 |     error_msg = "[!] valid_size should be in the range [0, 1]."
 39 |     assert (valid_size >= 0) and (valid_size <= 1), error_msg
 40 | 
 41 |     normalize = transforms.Normalize(
 42 |         mean=[0.4914, 0.4822, 0.4465],
 43 |         std=[0.2023, 0.1994, 0.2010],
 44 |     )
 45 | 
 46 |     # define transforms
 47 |     valid_transform = transforms.Compose(
 48 |         [
 49 |             transforms.ToTensor(),
 50 |             normalize,
 51 |         ]
 52 |     )
 53 | 
 54 |     # augmentation
 55 |     train_transform = transforms.Compose(
 56 |         [
 57 |             transforms.RandomCrop(32, padding=4),
 58 |             transforms.RandomHorizontalFlip(),
 59 |             transforms.ToTensor(),
 60 |             normalize,
 61 |         ]
 62 |     )
 63 | 
 64 |     # load the dataset
 65 |     train_dataset = datasets.CIFAR100(
 66 |         root=data_dir,
 67 |         train=True,
 68 |         download=True,
 69 |         transform=train_transform,
 70 |     )
 71 | 
 72 |     valid_dataset = datasets.CIFAR100(
 73 |         root=data_dir,
 74 |         train=True,
 75 |         download=True,
 76 |         transform=valid_transform,
 77 |     )
 78 | 
 79 |     # train/valid split
 80 |     num_train = len(train_dataset)
 81 |     indices = list(range(num_train))
 82 |     split = int(np.floor(valid_size * num_train))
 83 | 
 84 |     if shuffle:
 85 |         np.random.shuffle(indices)
 86 | 
 87 |     train_idx, valid_idx = indices[split:], indices[:split]
 88 |     train_sampler = SubsetRandomSampler(train_idx)
 89 |     valid_sampler = SubsetRandomSampler(valid_idx)
 90 | 
 91 |     train_loader = torch.utils.data.DataLoader(
 92 |         train_dataset,
 93 |         batch_size=batch_size,
 94 |         sampler=train_sampler,
 95 |         num_workers=num_workers,
 96 |         pin_memory=pin_memory,
 97 |         drop_last=True,
 98 |     )
 99 |     valid_loader = torch.utils.data.DataLoader(
100 |         valid_dataset,
101 |         batch_size=batch_size,
102 |         sampler=valid_sampler,
103 |         num_workers=num_workers,
104 |         pin_memory=pin_memory,
105 |     )
106 | 
107 |     return train_loader, valid_loader
108 | 
109 | 
110 | def get_tst_loader(
111 |     data_dir: str,
112 |     batch_size: int,
113 |     shuffle: bool = True,
114 |     num_workers: int = 4,
115 |     pin_memory: bool = False,
116 | ):
117 |     """
118 |     Utility function for loading and returning a multi-process
119 |     test iterator over the CIFAR-100 dataset.
120 |     If using CUDA, num_workers should be set to 1 and pin_memory to True.
121 |     :param data_dir: path directory to the dataset.
122 |     :param batch_size: how many samples per batch to load.
123 |     :param shuffle: whether to shuffle the dataset after every epoch.
124 |     :param num_workers: number of subprocesses to use when loading the dataset.
125 |     :param pin_memory: whether to copy tensors into CUDA pinned memory.
126 |                         Set it to True if using GPU.
127 | 
128 |     :return data_loader: test set iterator.
129 |     """
130 |     normalize = transforms.Normalize(
131 |         mean=[0.485, 0.456, 0.406],
132 |         std=[0.229, 0.224, 0.225],
133 |     )
134 | 
135 |     # define transform
136 |     transform = transforms.Compose(
137 |         [
138 |             transforms.ToTensor(),
139 |             normalize,
140 |         ]
141 |     )
142 | 
143 |     dataset = datasets.CIFAR100(
144 |         root=data_dir,
145 |         train=False,
146 |         download=True,
147 |         transform=transform,
148 |     )
149 | 
150 |     data_loader = torch.utils.data.DataLoader(
151 |         dataset,
152 |         batch_size=batch_size,
153 |         shuffle=shuffle,
154 |         num_workers=num_workers,
155 |         pin_memory=pin_memory,
156 |     )
157 | 
158 |     return data_loader


--------------------------------------------------------------------------------
/src/single/dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Ref: https://gist.github.com/kevinzakka/d33bf8d6c7f06a9d8c76d97a7879f5cb
  3 | Create train, valid, test iterators for CIFAR-100 [1].
  4 | [1]: https://discuss.pytorch.org/t/feedback-on-pytorch-for-kaggle-competitions/2252/4
  5 | """
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from torch.utils.data.sampler import SubsetRandomSampler
 10 | from torchvision import datasets, transforms
 11 | 
 12 | 
 13 | def get_trn_val_loader(
 14 |     data_dir: str,
 15 |     batch_size: int,
 16 |     valid_size: float = 0.1,
 17 |     shuffle: bool = True,
 18 |     num_workers: int = 1,
 19 |     pin_memory: bool = True,
 20 | ):
 21 |     """
 22 |     Utility function for loading and returning train and valid
 23 |     multi-process iterators over the CIFAR-100 dataset. A sample
 24 |     9x9 grid of the images can be optionally displayed.
 25 |     If using CUDA, num_workers should be set to 1 and pin_memory to True.
 26 |     :param data_dir: path directory to the dataset.
 27 |     :param batch_size: how many samples per batch to load.
 28 |     :param valid_size: percentage split of the training set used for
 29 |                         the validation set. Should be a float in the range [0, 1].
 30 |     :param shuffle: whether to shuffle the train/validation indices.
 31 |     :param num_workers: number of subprocesses to use when loading the dataset.
 32 |     :param pin_memory: whether to copy tensors into CUDA pinned memory.
 33 |                         Set it to True if using GPU.
 34 | 
 35 |     :return train_loader: training set iterator.
 36 |     :return valid_loader: validation set iterator.
 37 |     """
 38 |     error_msg = "[!] valid_size should be in the range [0, 1]."
 39 |     assert (valid_size >= 0) and (valid_size <= 1), error_msg
 40 | 
 41 |     normalize = transforms.Normalize(
 42 |         mean=[0.4914, 0.4822, 0.4465],
 43 |         std=[0.2023, 0.1994, 0.2010],
 44 |     )
 45 | 
 46 |     # define transforms
 47 |     valid_transform = transforms.Compose(
 48 |         [
 49 |             transforms.ToTensor(),
 50 |             normalize,
 51 |         ]
 52 |     )
 53 | 
 54 |     # augmentation
 55 |     train_transform = transforms.Compose(
 56 |         [
 57 |             transforms.RandomCrop(32, padding=4),
 58 |             transforms.RandomHorizontalFlip(),
 59 |             transforms.ToTensor(),
 60 |             normalize,
 61 |         ]
 62 |     )
 63 | 
 64 |     # load the dataset
 65 |     train_dataset = datasets.CIFAR100(
 66 |         root=data_dir,
 67 |         train=True,
 68 |         download=True,
 69 |         transform=train_transform,
 70 |     )
 71 | 
 72 |     valid_dataset = datasets.CIFAR100(
 73 |         root=data_dir,
 74 |         train=True,
 75 |         download=True,
 76 |         transform=valid_transform,
 77 |     )
 78 | 
 79 |     # train/valid split
 80 |     num_train = len(train_dataset)
 81 |     indices = list(range(num_train))
 82 |     split = int(np.floor(valid_size * num_train))
 83 | 
 84 |     if shuffle:
 85 |         np.random.shuffle(indices)
 86 | 
 87 |     train_idx, valid_idx = indices[split:], indices[:split]
 88 |     train_sampler = SubsetRandomSampler(train_idx)
 89 |     valid_sampler = SubsetRandomSampler(valid_idx)
 90 | 
 91 |     train_loader = torch.utils.data.DataLoader(
 92 |         train_dataset,
 93 |         batch_size=batch_size,
 94 |         sampler=train_sampler,
 95 |         num_workers=num_workers,
 96 |         pin_memory=pin_memory,
 97 |         drop_last=True,
 98 |     )
 99 |     valid_loader = torch.utils.data.DataLoader(
100 |         valid_dataset,
101 |         batch_size=batch_size,
102 |         sampler=valid_sampler,
103 |         num_workers=num_workers,
104 |         pin_memory=pin_memory,
105 |     )
106 | 
107 |     return train_loader, valid_loader
108 | 
109 | 
110 | def get_tst_loader(
111 |     data_dir: str,
112 |     batch_size: int,
113 |     shuffle: bool = True,
114 |     num_workers: int = 4,
115 |     pin_memory: bool = False,
116 | ):
117 |     """
118 |     Utility function for loading and returning a multi-process
119 |     test iterator over the CIFAR-100 dataset.
120 |     If using CUDA, num_workers should be set to 1 and pin_memory to True.
121 |     :param data_dir: path directory to the dataset.
122 |     :param batch_size: how many samples per batch to load.
123 |     :param shuffle: whether to shuffle the dataset after every epoch.
124 |     :param num_workers: number of subprocesses to use when loading the dataset.
125 |     :param pin_memory: whether to copy tensors into CUDA pinned memory.
126 |                         Set it to True if using GPU.
127 | 
128 |     :return data_loader: test set iterator.
129 |     """
130 |     normalize = transforms.Normalize(
131 |         mean=[0.485, 0.456, 0.406],
132 |         std=[0.229, 0.224, 0.225],
133 |     )
134 | 
135 |     # define transform
136 |     transform = transforms.Compose(
137 |         [
138 |             transforms.ToTensor(),
139 |             normalize,
140 |         ]
141 |     )
142 | 
143 |     dataset = datasets.CIFAR100(
144 |         root=data_dir,
145 |         train=False,
146 |         download=True,
147 |         transform=transform,
148 |     )
149 | 
150 |     data_loader = torch.utils.data.DataLoader(
151 |         dataset,
152 |         batch_size=batch_size,
153 |         shuffle=shuffle,
154 |         num_workers=num_workers,
155 |         pin_memory=pin_memory,
156 |     )
157 | 
158 |     return data_loader


--------------------------------------------------------------------------------
/src/ddp/dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Ref: https://gist.github.com/kevinzakka/d33bf8d6c7f06a9d8c76d97a7879f5cb
  3 | Create train, valid, test iterators for CIFAR-100 [1].
  4 | [1]: https://discuss.pytorch.org/t/feedback-on-pytorch-for-kaggle-competitions/2252/4
  5 | """
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from torch.utils.data import Subset
 10 | from torch.utils.data.distributed import DistributedSampler
 11 | from torch.utils.data.sampler import SubsetRandomSampler
 12 | from torchvision import datasets, transforms
 13 | 
 14 | 
 15 | def get_trn_val_loader(
 16 |     data_dir: str,
 17 |     batch_size: int,
 18 |     valid_size: float = 0.1,
 19 |     shuffle: bool = True,
 20 |     num_workers: int = 1,
 21 |     pin_memory: bool = True,
 22 | ):
 23 |     """
 24 |     Utility function for loading and returning train and valid
 25 |     multi-process iterators over the CIFAR-100 dataset. A sample
 26 |     9x9 grid of the images can be optionally displayed.
 27 |     If using CUDA, num_workers should be set to 1 and pin_memory to True.
 28 |     :param data_dir: path directory to the dataset.
 29 |     :param batch_size: how many samples per batch to load.
 30 |     :param valid_size: percentage split of the training set used for
 31 |                         the validation set. Should be a float in the range [0, 1].
 32 |     :param shuffle: whether to shuffle the train/validation indices.
 33 |     :param num_workers: number of subprocesses to use when loading the dataset.
 34 |     :param pin_memory: whether to copy tensors into CUDA pinned memory.
 35 |                         Set it to True if using GPU.
 36 | 
 37 |     :return train_loader: training set iterator.
 38 |     :return valid_loader: validation set iterator.
 39 |     """
 40 |     error_msg = "[!] valid_size should be in the range [0, 1]."
 41 |     assert (valid_size >= 0) and (valid_size <= 1), error_msg
 42 | 
 43 |     normalize = transforms.Normalize(
 44 |         mean=[0.4914, 0.4822, 0.4465],
 45 |         std=[0.2023, 0.1994, 0.2010],
 46 |     )
 47 | 
 48 |     # define transforms
 49 |     valid_transform = transforms.Compose(
 50 |         [
 51 |             transforms.ToTensor(),
 52 |             normalize,
 53 |         ]
 54 |     )
 55 | 
 56 |     # augmentation
 57 |     train_transform = transforms.Compose(
 58 |         [
 59 |             transforms.RandomCrop(32, padding=4),
 60 |             transforms.RandomHorizontalFlip(),
 61 |             transforms.ToTensor(),
 62 |             normalize,
 63 |         ]
 64 |     )
 65 | 
 66 |     # load the dataset
 67 |     ## Actually, download should be set to be False, because it is not multiprocess safe.
 68 |     ## So you should prefetch dataset.
 69 |     ## Ref: https://leimao.github.io/blog/PyTorch-Distributed-Training/
 70 |     train_dataset = datasets.CIFAR100(
 71 |         root=data_dir,
 72 |         train=True,
 73 |         download=True,  ##
 74 |         transform=train_transform,
 75 |     )
 76 | 
 77 |     valid_dataset = datasets.CIFAR100(
 78 |         root=data_dir,
 79 |         train=True,
 80 |         download=True,  ##
 81 |         transform=valid_transform,
 82 |     )
 83 | 
 84 |     # train/valid split
 85 |     num_train = len(train_dataset)
 86 |     indices = list(range(num_train))
 87 |     split = int(np.floor(valid_size * num_train))
 88 | 
 89 |     if shuffle:
 90 |         np.random.shuffle(indices)
 91 | 
 92 |     train_idx, valid_idx = indices[split:], indices[:split]
 93 | 
 94 |     # split indice explicitly before DistributedSampler
 95 |     train_dataset = Subset(train_dataset, train_idx)
 96 |     valid_dataset = Subset(valid_dataset, valid_idx)
 97 | 
 98 |     train_sampler = DistributedSampler(train_dataset)
 99 | 
100 |     train_loader = torch.utils.data.DataLoader(
101 |         train_dataset,
102 |         batch_size=batch_size,
103 |         sampler=train_sampler,
104 |         num_workers=num_workers,
105 |         pin_memory=pin_memory,
106 |         drop_last=True,
107 |         shuffle=(train_sampler is None),
108 |     )
109 |     valid_loader = torch.utils.data.DataLoader(
110 |         valid_dataset,
111 |         batch_size=batch_size,
112 |         num_workers=num_workers,
113 |         pin_memory=pin_memory,
114 |     )
115 | 
116 |     return train_loader, train_sampler, valid_loader
117 | 
118 | 
119 | def get_tst_loader(
120 |     data_dir: str,
121 |     batch_size: int,
122 |     shuffle: bool = True,
123 |     num_workers: int = 4,
124 |     pin_memory: bool = False,
125 | ):
126 |     """
127 |     Utility function for loading and returning a multi-process
128 |     test iterator over the CIFAR-100 dataset.
129 |     If using CUDA, num_workers should be set to 1 and pin_memory to True.
130 |     :param data_dir: path directory to the dataset.
131 |     :param batch_size: how many samples per batch to load.
132 |     :param shuffle: whether to shuffle the dataset after every epoch.
133 |     :param num_workers: number of subprocesses to use when loading the dataset.
134 |     :param pin_memory: whether to copy tensors into CUDA pinned memory.
135 |                         Set it to True if using GPU.
136 | 
137 |     :return data_loader: test set iterator.
138 |     """
139 |     normalize = transforms.Normalize(
140 |         mean=[0.485, 0.456, 0.406],
141 |         std=[0.229, 0.224, 0.225],
142 |     )
143 | 
144 |     # define transform
145 |     transform = transforms.Compose(
146 |         [
147 |             transforms.ToTensor(),
148 |             normalize,
149 |         ]
150 |     )
151 | 
152 |     dataset = datasets.CIFAR100(
153 |         root=data_dir,
154 |         train=False,
155 |         download=True,
156 |         transform=transform,
157 |     )
158 |     sampler = DistributedSampler(dataset)
159 |     data_loader = torch.utils.data.DataLoader(
160 |         dataset,
161 |         batch_size=batch_size,
162 |         shuffle=shuffle,
163 |         sampler=sampler,
164 |         num_workers=num_workers,
165 |         pin_memory=pin_memory,
166 |     )
167 | 
168 |     return data_loader
169 | 


--------------------------------------------------------------------------------
/src/single/trainer.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import logging
  3 | import os
  4 | import random
  5 | from typing import *
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.optim as optim
 10 | import yaml
 11 | from tensorboardX import SummaryWriter
 12 | from tqdm import tqdm
 13 | 
 14 | from dataset import get_trn_val_loader, get_tst_loader
 15 | from utils import AverageMeter, accuracy
 16 | 
 17 | 
 18 | class Trainer:
 19 |     def __init__(self, hparams, model, scaler):
 20 |         self.hparams = hparams
 21 |         self.device = "cuda" if torch.cuda.is_available() else "cpu"
 22 |         self.dset = hparams.dset
 23 | 
 24 |         self.model_name = hparams.model
 25 |         self.model = model
 26 |         self.model = model.to(self.device)
 27 |         self.scaler = scaler
 28 | 
 29 |         # optimizer, scheduler
 30 |         self.optimizer, self.lr_scheduler = self.configure_optimizers()
 31 | 
 32 |         # metric
 33 |         self.criterion = nn.CrossEntropyLoss()
 34 | 
 35 |         # dataloader
 36 |         self.train_loader, self.val_loader = get_trn_val_loader(
 37 |             data_dir=hparams.dpath.strip(),
 38 |             batch_size=hparams.batch_size,
 39 |             valid_size=0.1,
 40 |             num_workers=hparams.workers,
 41 |             pin_memory=True,
 42 |         )
 43 |         self.test_loader = get_tst_loader(
 44 |             data_dir=hparams.dpath.strip(),
 45 |             batch_size=hparams.batch_size,
 46 |             shuffle=False,
 47 |             num_workers=1,
 48 |             pin_memory=True,
 49 |         )
 50 | 
 51 |         # model-saving options
 52 |         self.version = 0
 53 |         while True:
 54 |             self.save_path = os.path.join(hparams.ckpt_path, f"version-{self.version}")
 55 |             if not os.path.exists(self.save_path):
 56 |                 os.makedirs(self.save_path)
 57 |                 break
 58 |             else:
 59 |                 self.version += 1
 60 |         self.summarywriter = SummaryWriter(self.save_path)
 61 |         self.global_step = 0
 62 |         self.global_val_loss = 1e5
 63 |         self.global_top1_acc = 0
 64 |         self.eval_step = hparams.eval_step
 65 |         logging.basicConfig(
 66 |             filename=os.path.join(self.save_path, "experiment.log"),
 67 |             level=logging.INFO,
 68 |             format="%(asctime)s > %(message)s",
 69 |         )
 70 |         with open(
 71 |             os.path.join(self.save_path, "hparams.yaml"), "w", encoding="utf8"
 72 |         ) as outfile:
 73 |             yaml.dump(hparams, outfile, default_flow_style=False, allow_unicode=True)
 74 | 
 75 |         # experiment-logging options
 76 |         self.best_result = {"version": self.version}
 77 | 
 78 |     def configure_optimizers(self):
 79 |         # optimizer
 80 |         optimizer = optim.SGD(
 81 |             self.model.parameters(),
 82 |             lr=self.hparams.lr,
 83 |             weight_decay=self.hparams.weight_decay,
 84 |             momentum=0.9,
 85 |             nesterov=True,
 86 |         )
 87 | 
 88 |         # lr scheduler (optional)
 89 |         scheduler = optim.lr_scheduler.StepLR(
 90 |             optimizer,
 91 |             step_size=self.hparams.lr_decay_step_size,
 92 |             gamma=self.hparams.lr_decay_gamma,
 93 |         )
 94 |         return optimizer, scheduler
 95 | 
 96 |     def save_checkpoint(self, epoch: int, val_acc: float, model: nn.Module) -> None:
 97 |         logging.info(
 98 |             f"Val acc increased ({self.global_top1_acc:.4f} → {val_acc:.4f}). Saving model ..."
 99 |         )
100 |         new_path = os.path.join(
101 |             self.save_path, f"best_model_epoch_{epoch}_acc_{val_acc:.4f}.pt"
102 |         )
103 | 
104 |         for filename in glob.glob(os.path.join(self.save_path, "*.pt")):
105 |             os.remove(filename)  # remove old checkpoint
106 |         torch.save(model.state_dict(), new_path)
107 |         self.global_top1_acc = val_acc
108 | 
109 |     def fit(self) -> dict:
110 |         for epoch in tqdm(range(self.hparams.epoch), desc="epoch"):
111 |             logging.info(f"* Learning Rate: {self.optimizer.param_groups[0]['lr']:.5f}")
112 |             result = self._train_epoch(epoch)
113 | 
114 |             # update checkpoint
115 |             if result["val_acc"] > self.global_top1_acc:
116 |                 self.save_checkpoint(epoch, result["val_acc"], self.model)
117 |             self.lr_scheduler.step()
118 | 
119 |         self.summarywriter.close()
120 |         return self.version
121 | 
122 |     def _train_epoch(self, epoch: int) -> dict:
123 |         train_loss = AverageMeter()
124 | 
125 |         self.model.train()
126 |         for step, batch in tqdm(
127 |             enumerate(self.train_loader),
128 |             desc="train_steps",
129 |             total=len(self.train_loader),
130 |         ):
131 |             img, label = map(lambda x: x.to(self.device), batch)
132 | 
133 |             self.optimizer.zero_grad()
134 |             if self.hparams.amp:
135 |                 with torch.cuda.amp.autocast():
136 |                     logit = self.model(img)
137 |                     loss = self.criterion(logit, label)
138 |                 self.scaler.scale(loss).backward()
139 |                 self.scaler.step(self.optimizer)
140 |                 self.scaler.update()
141 |             else:
142 |                 logit = self.model(img)
143 |                 loss = self.criterion(logit, label)
144 |                 loss.backward()
145 |                 self.optimizer.step()
146 | 
147 |             train_loss.update(loss.item())
148 | 
149 |             self.global_step += 1
150 |             if self.global_step % self.eval_step == 0:
151 |                 logging.info(
152 |                     f"[Single Version {self.version} Epoch {epoch}] global step: {self.global_step}, train loss: {loss.item():.3f}"
153 |                 )
154 | 
155 |         train_loss = train_loss.avg
156 |         val_loss, val_acc = self.validate(epoch)
157 | 
158 |         # tensorboard writing
159 |         self.summarywriter.add_scalars(
160 |             "lr", {"lr": self.optimizer.param_groups[0]["lr"]}, epoch
161 |         )
162 |         self.summarywriter.add_scalars(
163 |             "loss/step", {"val": val_loss, "train": train_loss}, self.global_step
164 |         )
165 |         self.summarywriter.add_scalars(
166 |             "loss/epoch", {"val": val_loss, "train": train_loss}, epoch
167 |         )
168 |         self.summarywriter.add_scalars("acc/epoch", {"val": val_acc}, epoch)
169 |         logging.info(
170 |             f"** global step: {self.global_step}, val loss: {val_loss:.3f}, val_acc: {val_acc:.2f}%"
171 |         )
172 | 
173 |         return {"val_loss": val_loss, "val_acc": val_acc}
174 | 
175 |     def validate(self, epoch: int) -> Tuple[float]:
176 |         val_loss = AverageMeter()
177 |         top1 = AverageMeter()
178 | 
179 |         self.model.eval()
180 |         with torch.no_grad():
181 |             for step, batch in tqdm(
182 |                 enumerate(self.val_loader),
183 |                 desc="valid_steps",
184 |                 total=len(self.val_loader),
185 |             ):
186 |                 img, label = map(lambda x: x.to(self.device), batch)
187 |                 pred = self.model(img)
188 |                 loss = self.criterion(pred, label)
189 |                 val_loss.update(loss.item())
190 | 
191 |                 prec1 = accuracy(pred, label, topk=(1,))[0]
192 |                 top1.update(prec1.item())
193 | 
194 |         return val_loss.avg, top1.avg
195 | 
196 |     def test(self, state_dict) -> dict:
197 |         test_loss = AverageMeter()
198 |         top1 = AverageMeter()
199 |         top5 = AverageMeter()
200 | 
201 |         self.model.load_state_dict(state_dict)
202 |         self.model.eval()
203 |         with torch.no_grad():
204 |             for step, batch in tqdm(
205 |                 enumerate(self.test_loader),
206 |                 desc="tst_steps",
207 |                 total=len(self.test_loader),
208 |             ):
209 |                 img, label = map(lambda x: x.to(self.device), batch)
210 |                 pred = self.model(img)
211 | 
212 |                 loss = self.criterion(pred, label)
213 |                 test_loss.update(loss.item())
214 | 
215 |                 prec1, prec5 = accuracy(pred, label, topk=(1, 5))
216 |                 top1.update(prec1.item())
217 |                 top5.update(prec5.item())
218 | 
219 |         print()
220 |         print(f"** Test Loss: {test_loss.avg:.4f}")
221 |         print(f"** Top-1 Accuracy: {top1.avg:.4f}%")
222 |         print(f"** Top-5 Accuracy: {top5.avg:.4f}%")
223 |         print()
224 |         return {
225 |             "test_loss": test_loss.avg,
226 |             "top_1_acc": top1.avg,
227 |             "top_5_acc": top5.avg,
228 |         }


--------------------------------------------------------------------------------
/src/dp/trainer.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import logging
  3 | import os
  4 | import random
  5 | from typing import *
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.optim as optim
 10 | import yaml
 11 | from tensorboardX import SummaryWriter
 12 | from tqdm import tqdm
 13 | 
 14 | from dataset import get_trn_val_loader, get_tst_loader
 15 | from utils import AverageMeter, accuracy
 16 | 
 17 | 
 18 | class Trainer:
 19 |     def __init__(self, hparams, model, scaler):
 20 |         self.hparams = hparams
 21 |         self.device = "cuda" if torch.cuda.is_available() else "cpu"
 22 |         self.dset = hparams.dset
 23 | 
 24 |         self.model_name = hparams.model
 25 |         self.model = model
 26 |         self.model = model.to(self.device)
 27 |         self.model = nn.DataParallel(self.model)
 28 |         self.scaler = scaler
 29 | 
 30 |         # optimizer, scheduler
 31 |         self.optimizer, self.lr_scheduler = self.configure_optimizers()
 32 | 
 33 |         # metric
 34 |         self.criterion = nn.CrossEntropyLoss()
 35 | 
 36 |         # dataloader
 37 |         self.train_loader, self.val_loader = get_trn_val_loader(
 38 |             data_dir=hparams.dpath.strip(),
 39 |             batch_size=hparams.batch_size,
 40 |             valid_size=0.1,
 41 |             num_workers=hparams.workers,
 42 |             pin_memory=True,
 43 |         )
 44 |         self.test_loader = get_tst_loader(
 45 |             data_dir=hparams.dpath.strip(),
 46 |             batch_size=hparams.batch_size,
 47 |             shuffle=False,
 48 |             num_workers=1,
 49 |             pin_memory=True,
 50 |         )
 51 | 
 52 |         # model-saving options
 53 |         self.version = 0
 54 |         while True:
 55 |             self.save_path = os.path.join(hparams.ckpt_path, f"version-{self.version}")
 56 |             if not os.path.exists(self.save_path):
 57 |                 os.makedirs(self.save_path)
 58 |                 break
 59 |             else:
 60 |                 self.version += 1
 61 |         self.summarywriter = SummaryWriter(self.save_path)
 62 |         self.global_step = 0
 63 |         self.global_val_loss = 1e5
 64 |         self.global_top1_acc = 0
 65 |         self.eval_step = hparams.eval_step
 66 |         logging.basicConfig(
 67 |             filename=os.path.join(self.save_path, "experiment.log"),
 68 |             level=logging.INFO,
 69 |             format="%(asctime)s > %(message)s",
 70 |         )
 71 |         with open(
 72 |             os.path.join(self.save_path, "hparams.yaml"), "w", encoding="utf8"
 73 |         ) as outfile:
 74 |             yaml.dump(hparams, outfile, default_flow_style=False, allow_unicode=True)
 75 | 
 76 |         # experiment-logging options
 77 |         self.best_result = {"version": self.version}
 78 | 
 79 |     def configure_optimizers(self):
 80 |         # optimizer
 81 |         optimizer = optim.SGD(
 82 |             self.model.parameters(),
 83 |             lr=self.hparams.lr,
 84 |             weight_decay=self.hparams.weight_decay,
 85 |             momentum=0.9,
 86 |             nesterov=True,
 87 |         )
 88 | 
 89 |         # lr scheduler (optional)
 90 |         scheduler = optim.lr_scheduler.StepLR(
 91 |             optimizer,
 92 |             step_size=self.hparams.lr_decay_step_size,
 93 |             gamma=self.hparams.lr_decay_gamma,
 94 |         )
 95 |         return optimizer, scheduler
 96 | 
 97 |     def save_checkpoint(self, epoch: int, val_acc: float, model: nn.Module) -> None:
 98 |         logging.info(
 99 |             f"Val acc increased ({self.global_top1_acc:.4f} → {val_acc:.4f}). Saving model ..."
100 |         )
101 |         new_path = os.path.join(
102 |             self.save_path, f"best_model_epoch_{epoch}_acc_{val_acc:.4f}.pt"
103 |         )
104 | 
105 |         for filename in glob.glob(os.path.join(self.save_path, "*.pt")):
106 |             os.remove(filename)  # remove old checkpoint
107 |         torch.save(model.state_dict(), new_path)
108 |         self.global_top1_acc = val_acc
109 | 
110 |     def fit(self) -> dict:
111 |         for epoch in tqdm(range(self.hparams.epoch), desc="epoch"):
112 |             logging.info(f"* Learning Rate: {self.optimizer.param_groups[0]['lr']:.5f}")
113 |             result = self._train_epoch(epoch)
114 | 
115 |             # update checkpoint
116 |             if result["val_acc"] > self.global_top1_acc:
117 |                 self.save_checkpoint(epoch, result["val_acc"], self.model)
118 |             self.lr_scheduler.step()
119 | 
120 |         self.summarywriter.close()
121 |         return self.version
122 | 
123 |     def _train_epoch(self, epoch: int) -> dict:
124 |         train_loss = AverageMeter()
125 | 
126 |         self.model.train()
127 |         for step, batch in tqdm(
128 |             enumerate(self.train_loader),
129 |             desc="train_steps",
130 |             total=len(self.train_loader),
131 |         ):
132 |             img, label = map(lambda x: x.to(self.device), batch)
133 | 
134 |             self.optimizer.zero_grad()
135 |             if self.hparams.amp:
136 |                 with torch.cuda.amp.autocast():
137 |                     logit = self.model(img)
138 |                     loss = self.criterion(logit, label)
139 |                 self.scaler.scale(loss).backward()
140 |                 self.scaler.step(self.optimizer)
141 |                 self.scaler.update()
142 |             else:
143 |                 logit = self.model(img)
144 |                 loss = self.criterion(logit, label)
145 |                 loss.backward()
146 |                 self.optimizer.step()
147 | 
148 |             train_loss.update(loss.item())
149 | 
150 |             self.global_step += 1
151 |             if self.global_step % self.eval_step == 0:
152 |                 logging.info(
153 |                     f"[DP Version {self.version} Epoch {epoch}] global step: {self.global_step}, train loss: {loss.item():.3f}"
154 |                 )
155 | 
156 |         train_loss = train_loss.avg
157 |         val_loss, val_acc = self.validate(epoch)
158 | 
159 |         # tensorboard writing
160 |         self.summarywriter.add_scalars(
161 |             "lr", {"lr": self.optimizer.param_groups[0]["lr"]}, epoch
162 |         )
163 |         self.summarywriter.add_scalars(
164 |             "loss/step", {"val": val_loss, "train": train_loss}, self.global_step
165 |         )
166 |         self.summarywriter.add_scalars(
167 |             "loss/epoch", {"val": val_loss, "train": train_loss}, epoch
168 |         )
169 |         self.summarywriter.add_scalars("acc/epoch", {"val": val_acc}, epoch)
170 |         logging.info(
171 |             f"** global step: {self.global_step}, val loss: {val_loss:.3f}, val_acc: {val_acc:.2f}%"
172 |         )
173 | 
174 |         return {"val_loss": val_loss, "val_acc": val_acc}
175 | 
176 |     def validate(self, epoch: int) -> Tuple[float]:
177 |         val_loss = AverageMeter()
178 |         top1 = AverageMeter()
179 | 
180 |         self.model.eval()
181 |         with torch.no_grad():
182 |             for step, batch in tqdm(
183 |                 enumerate(self.val_loader),
184 |                 desc="valid_steps",
185 |                 total=len(self.val_loader),
186 |             ):
187 |                 img, label = map(lambda x: x.to(self.device), batch)
188 |                 pred = self.model(img)
189 |                 loss = self.criterion(pred, label)
190 |                 val_loss.update(loss.item())
191 | 
192 |                 prec1 = accuracy(pred, label, topk=(1,))[0]
193 |                 top1.update(prec1.item())
194 | 
195 |         return val_loss.avg, top1.avg
196 | 
197 |     def test(self, state_dict) -> dict:
198 |         test_loss = AverageMeter()
199 |         top1 = AverageMeter()
200 |         top5 = AverageMeter()
201 | 
202 |         self.model.load_state_dict(state_dict)
203 |         self.model.eval()
204 |         with torch.no_grad():
205 |             for step, batch in tqdm(
206 |                 enumerate(self.test_loader),
207 |                 desc="tst_steps",
208 |                 total=len(self.test_loader),
209 |             ):
210 |                 img, label = map(lambda x: x.to(self.device), batch)
211 |                 pred = self.model(img)
212 | 
213 |                 loss = self.criterion(pred, label)
214 |                 test_loss.update(loss.item())
215 | 
216 |                 prec1, prec5 = accuracy(pred, label, topk=(1, 5))
217 |                 top1.update(prec1.item())
218 |                 top5.update(prec5.item())
219 | 
220 |         print()
221 |         print(f"** Test Loss: {test_loss.avg:.4f}")
222 |         print(f"** Top-1 Accuracy: {top1.avg:.4f}%")
223 |         print(f"** Top-5 Accuracy: {top5.avg:.4f}%")
224 |         print()
225 |         return {
226 |             "test_loss": test_loss.avg,
227 |             "top_1_acc": top1.avg,
228 |             "top_5_acc": top5.avg,
229 |         }


--------------------------------------------------------------------------------
/src/ddp/trainer.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import logging
  3 | import os
  4 | import random
  5 | from typing import *
  6 | 
  7 | import torch
  8 | import torch.distributed as dist
  9 | import torch.nn as nn
 10 | import torch.optim as optim
 11 | import yaml
 12 | from tensorboardX import SummaryWriter
 13 | from torch.nn.parallel import DistributedDataParallel as DDP
 14 | from tqdm import tqdm
 15 | 
 16 | from dataset import get_trn_val_loader, get_tst_loader
 17 | from utils import AverageMeter, accuracy
 18 | 
 19 | 
 20 | class Trainer:
 21 |     def __init__(self, hparams, model, scaler, rank, ngpus_per_node):
 22 |         self.hparams = hparams
 23 |         self.rank = rank
 24 |         self.nprocs = torch.cuda.device_count()
 25 |         self.device = f"cuda:{rank}" if torch.cuda.is_available() else "cpu"
 26 |         self.dset = hparams.dset
 27 | 
 28 |         self.model_name = hparams.model
 29 |         self.model = model
 30 |         self.model = model.to(self.device, non_blocking=True)
 31 |         self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
 32 |         self.scaler = scaler
 33 | 
 34 |         hparams.batch_size = int(hparams.batch_size / ngpus_per_node)
 35 | 
 36 |         # optimizer, scheduler
 37 |         self.optimizer, self.lr_scheduler = self.configure_optimizers()
 38 | 
 39 |         # metric
 40 |         self.criterion = nn.CrossEntropyLoss()
 41 | 
 42 |         # dataloader and distributed sampler
 43 |         self.train_loader, self.train_sampler, self.val_loader = get_trn_val_loader(
 44 |             data_dir=hparams.dpath.strip(),
 45 |             batch_size=hparams.batch_size,
 46 |             valid_size=0.1,
 47 |             num_workers=hparams.workers,
 48 |             pin_memory=True,
 49 |         )
 50 |         self.test_loader = get_tst_loader(
 51 |             data_dir=hparams.dpath.strip(),
 52 |             batch_size=hparams.batch_size,
 53 |             shuffle=False,
 54 |             num_workers=1,
 55 |             pin_memory=True,
 56 |         )
 57 | 
 58 |         # model-saving options (only at rank 0)
 59 |         if self.rank == 0:
 60 |             self.version = 0
 61 |             while True:
 62 |                 self.save_path = os.path.join(
 63 |                     hparams.ckpt_path, f"version-{self.version}"
 64 |                 )
 65 |                 if not os.path.exists(self.save_path):
 66 |                     os.makedirs(self.save_path)
 67 |                     break
 68 |                 else:
 69 |                     self.version += 1
 70 |             self.summarywriter = SummaryWriter(self.save_path)
 71 |             self.global_step = 0
 72 |             self.global_val_loss = 1e5
 73 |             self.global_top1_acc = 0
 74 |             self.eval_step = hparams.eval_step
 75 |             logging.basicConfig(
 76 |                 filename=os.path.join(self.save_path, "experiment.log"),
 77 |                 level=logging.INFO,
 78 |                 format="%(asctime)s > %(message)s",
 79 |             )
 80 |             with open(
 81 |                 os.path.join(self.save_path, "hparams.yaml"), "w", encoding="utf8"
 82 |             ) as outfile:
 83 |                 yaml.dump(
 84 |                     hparams, outfile, default_flow_style=False, allow_unicode=True
 85 |                 )
 86 | 
 87 |             # experiment-logging options
 88 |             self.best_result = {"version": self.version}
 89 | 
 90 |     def configure_optimizers(self):
 91 |         # optimizer
 92 |         optimizer = optim.SGD(
 93 |             self.model.parameters(),
 94 |             lr=self.hparams.lr,
 95 |             weight_decay=self.hparams.weight_decay,
 96 |             momentum=0.9,
 97 |             nesterov=True,
 98 |         )
 99 | 
100 |         # lr scheduler (optional)
101 |         scheduler = optim.lr_scheduler.StepLR(
102 |             optimizer,
103 |             step_size=self.hparams.lr_decay_step_size,
104 |             gamma=self.hparams.lr_decay_gamma,
105 |         )
106 |         return optimizer, scheduler
107 | 
108 |     def save_checkpoint(self, epoch: int, val_acc: float, model: nn.Module) -> None:
109 |         logging.info(
110 |             f"Val acc increased ({self.global_top1_acc:.4f} → {val_acc:.4f}). Saving model ..."
111 |         )
112 |         new_path = os.path.join(
113 |             self.save_path, f"best_model_epoch_{epoch}_acc_{val_acc:.4f}.pt"
114 |         )
115 | 
116 |         for filename in glob.glob(os.path.join(self.save_path, "*.pt")):
117 |             os.remove(filename)  # remove old checkpoint
118 |         torch.save(model.state_dict(), new_path)
119 |         self.global_top1_acc = val_acc
120 | 
121 |     def fit(self) -> dict:
122 |         for epoch in tqdm(
123 |             range(self.hparams.epoch), desc="epoch", disable=self.rank not in [0]
124 |         ):
125 |             self.train_sampler.set_epoch(epoch)
126 | 
127 |             logging.info(f"* Learning Rate: {self.optimizer.param_groups[0]['lr']:.5f}")
128 |             result = self._train_epoch(epoch)
129 | 
130 |             # update checkpoint
131 |             if self.rank == 0 and result["val_acc"] > self.global_top1_acc:
132 |                 self.save_checkpoint(epoch, result["val_acc"], self.model)
133 |             self.lr_scheduler.step()
134 | 
135 |         if self.rank == 0:
136 |             self.summarywriter.close()
137 |         return self.version if self.rank == 0 else None
138 | 
139 |     def _train_epoch(self, epoch: int) -> dict:
140 |         train_loss = AverageMeter()
141 | 
142 |         self.model.train()
143 |         for step, batch in tqdm(
144 |             enumerate(self.train_loader),
145 |             desc="train_steps",
146 |             total=len(self.train_loader),
147 |             disable=self.rank in [0],
148 |         ):
149 |             img, label = map(lambda x: x.to(self.device, non_blocking=True), batch)
150 | 
151 |             self.optimizer.zero_grad()
152 |             if self.hparams.amp:
153 |                 with torch.cuda.amp.autocast():
154 |                     logit = self.model(img)
155 |                     loss = self.criterion(logit, label)
156 |                 dist.barrier()
157 |                 self.scaler.scale(loss).backward()
158 |                 self.scaler.step(self.optimizer)
159 |                 self.scaler.update()
160 |             else:
161 |                 logit = self.model(img)
162 |                 loss = self.criterion(logit, label)
163 |                 dist.barrier()
164 |                 loss.backward()
165 |                 self.optimizer.step()
166 | 
167 |             train_loss.update(loss.item())
168 | 
169 |             if self.rank == 0:
170 |                 self.global_step += 1
171 |                 if self.global_step % self.eval_step == 0:
172 |                     logging.info(
173 |                         f"[DDP Version {self.version} Epoch {epoch}] global step: {self.global_step}, train loss: {loss.item():.3f}"
174 |                     )
175 | 
176 |         train_loss = train_loss.avg
177 | 
178 |         if self.rank == 0:
179 |             val_loss, val_acc = self.validate(epoch)
180 | 
181 |             # tensorboard writing
182 |             self.summarywriter.add_scalars(
183 |                 "lr", {"lr": self.optimizer.param_groups[0]["lr"]}, epoch
184 |             )
185 |             self.summarywriter.add_scalars(
186 |                 "loss/step", {"val": val_loss, "train": train_loss}, self.global_step
187 |             )
188 |             self.summarywriter.add_scalars(
189 |                 "loss/epoch", {"val": val_loss, "train": train_loss}, epoch
190 |             )
191 |             self.summarywriter.add_scalars("acc/epoch", {"val": val_acc}, epoch)
192 |             logging.info(
193 |                 f"** global step: {self.global_step}, val loss: {val_loss:.3f}, val_acc: {val_acc:.2f}%"
194 |             )
195 | 
196 |             return {"val_loss": val_loss, "val_acc": val_acc}
197 |         return None
198 | 
199 |     def validate(self, epoch: int) -> Tuple[float]:
200 |         val_loss = AverageMeter()
201 |         top1 = AverageMeter()
202 | 
203 |         self.model.eval()
204 |         with torch.no_grad():
205 |             for step, batch in tqdm(
206 |                 enumerate(self.val_loader),
207 |                 desc="valid_steps",
208 |                 total=len(self.val_loader),
209 |             ):
210 |                 img, label = map(lambda x: x.to(self.device, non_blocking=True), batch)
211 |                 pred = self.model(img)
212 | 
213 |                 loss = self.criterion(pred, label)
214 |                 val_loss.update(loss.item())
215 | 
216 |                 prec1 = accuracy(pred, label, topk=(1,))[0]
217 |                 top1.update(prec1.item())
218 | 
219 |         return val_loss.avg, top1.avg
220 | 
221 |     def test(self, state_dict) -> dict:
222 |         test_loss = AverageMeter()
223 |         top1 = AverageMeter()
224 |         top5 = AverageMeter()
225 | 
226 |         self.model.load_state_dict(state_dict)
227 |         self.model.eval()
228 |         with torch.no_grad():
229 |             for step, batch in tqdm(
230 |                 enumerate(self.test_loader),
231 |                 desc="tst_steps",
232 |                 total=len(self.test_loader),
233 |             ):
234 |                 img, label = map(lambda x: x.to(self.device, non_blocking=True), batch)
235 |                 pred = self.model(img)
236 | 
237 |                 loss = self.criterion(pred, label)
238 |                 test_loss.update(loss.item())
239 | 
240 |                 prec1, prec5 = accuracy(pred, label, topk=(1, 5))
241 |                 top1.update(prec1.item())
242 |                 top5.update(prec5.item())
243 | 
244 |         logging.info(f"** Test Loss: {test_loss.avg:.4f}")
245 |         logging.info(f"** Top-1 Accuracy: {top1.avg:.4f}%")
246 |         logging.info(f"** Top-5 Accuracy: {top5.avg:.4f}%")
247 | 
248 |         return {
249 |             "test_loss": test_loss.avg,
250 |             "top_1_acc": top1.avg,
251 |             "top_5_acc": top5.avg,
252 |         }
253 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------