├── config
    ├── __init__.py
    ├── search.json
    └── config.py
├── flashlight
    ├── __init__.py
    ├── runner
    │   ├── __init__.py
    │   ├── pl.py
    │   └── main_pl.py
    ├── utils
    │   └── func.py
    ├── network
    │   └── __init__.py
    └── dataloader
    │   └── __init__.py
├── .gitignore
├── requirements.txt
├── Dockerfile
├── nni_config.yml
├── run.py
├── .github
    └── code-formatting.yml
├── run_nni.py
├── .pep8speaks.yml
└── README.md


/config/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/flashlight/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/flashlight/runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .main_pl import MainPL
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .vscode/
3 | Logs/
4 | .pytest_cache/
5 | .coverage


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | omegaconf==2.0.5
2 | pytorch-lightning==1.2.5
3 | albumentations
4 | 


--------------------------------------------------------------------------------
/config/search.json:
--------------------------------------------------------------------------------
1 | {
2 |     "train.batch_size":{"_type":"choice","_value":[32,64,128]},
3 |     "network.version":{"_type":"choice","_value":["1_0", "1_1"]}
4 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM pytorch/pytorch:1.7.0-cuda11.0-cudnn8-runtime
2 | 
3 | RUN mkdir app
4 | WORKDIR app
5 | RUN git clone https://github.com/davinnovation/pytorch-boilerplate
6 | WORKDIR pytorch-boilerplate
7 | 
8 | RUN pip install -r requirements.txt


--------------------------------------------------------------------------------
/nni_config.yml:
--------------------------------------------------------------------------------
 1 | authorName: davinnovation
 2 | experimentName: example_mnist
 3 | trialConcurrency: 1
 4 | maxExecDuration: 5h
 5 | maxTrialNum: 10
 6 | #choice: local, remote, pai
 7 | trainingServicePlatform: local
 8 | searchSpacePath: config/search.json
 9 | #choice: true, false
10 | useAnnotation: false
11 | tuner:
12 |   builtinTunerName: GridSearch
13 | trial:
14 |   command: python run_nni.py
15 |   codeDir: .
16 |   gpuNum: 1


--------------------------------------------------------------------------------
/flashlight/utils/func.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | 
 3 | 
 4 | def function_arg_checker(function, args_dict):
 5 |     def check_option(given_keys, target_keys):
 6 |         for given in given_keys:
 7 |             if not given in target_keys:
 8 |                 return False
 9 |         return True
10 | 
11 |     if len(args_dict.keys()) > 0:
12 |         assert check_option(list(args_dict.keys()), list(inspect.getfullargspec(function)[0]),)
13 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
 4 | 
 5 | from omegaconf import OmegaConf
 6 | 
 7 | from config import config as dc
 8 | 
 9 | from flashlight.runner import main_pl
10 | 
11 | 
12 | def _main(cfg=dc.DefaultConfig) -> None:
13 |     args = OmegaConf.structured(cfg)
14 |     args.merge_with_cli()
15 | 
16 |     ml = main_pl.MainPL(
17 |         args.hw, args.network, args.data, args.opt, args.log, args.seed
18 |     )
19 |     ml.run()
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     _main()
24 | 


--------------------------------------------------------------------------------
/.github/code-formatting.yml:
--------------------------------------------------------------------------------
 1 | name: "Check Code Format"
 2 | on:
 3 |   # Trigger the workflow on push or pull request,
 4 |   # but only for the master branch
 5 |   push:
 6 |     branches:
 7 |       - master
 8 |   pull_request:
 9 |     branches:
10 |       - master
11 | 
12 | jobs:
13 |   code-black:
14 |     name: Check code formatting with Black
15 |     runs-on: ubuntu-20.04
16 |     steps:
17 |       - name: Checkout
18 |         uses: actions/checkout@v2
19 |       - name: Set up Python 3.8
20 |         uses: actions/setup-python@v2
21 |         with:
22 |           python-version: 3.8
23 |       - name: Install Black
24 |         run: pip install black==19.10b0
25 |       - name: Run Black
26 |         run: black -l 120 --check .


--------------------------------------------------------------------------------
/flashlight/network/__init__.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | 
 3 | from ..utils import func
 4 | 
 5 | import torch
 6 | import torchvision
 7 | 
 8 | 
 9 | def check_network_option(network, network_option_dict):
10 |     if len(network_option_dict.keys()) > 0:
11 |         func.function_arg_checker(NETWORK_DICT[network], network_option_dict)
12 |     return network_option_dict
13 | 
14 | 
15 | """Network Define"""
16 | # Add {"Network Name" : and nn.Module without initalize}
17 | def _get_squeezenet(num_classes, version: str = "1_0", pretrained=False, progress=True):
18 |     VERSION = {"1_0": torchvision.models.squeezenet1_0, "1_1": torchvision.models.squeezenet1_1}
19 | 
20 |     return VERSION[version](pretrained=pretrained, progress=progress, num_classes=num_classes)
21 | 
22 | 
23 | NETWORK_DICT = {"squeezenet": _get_squeezenet}
24 | 
25 | 
26 | def get_network(network_name, network_opt):
27 |     return NETWORK_DICT[network_name](**network_opt)
28 | 


--------------------------------------------------------------------------------
/config/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from omegaconf import MISSING
 3 | 
 4 | @dataclass
 5 | class HWConfig:
 6 |     gpu_idx: str = "0"
 7 |     num_workers: int = 10
 8 | 
 9 | 
10 | @dataclass
11 | class NetworkConfig:  # flexible
12 |     network: str = "squeezenet"
13 |     checkpoint: str = ""
14 |     num_classes: int = 11
15 |     version: str = "1_0"
16 | 
17 | 
18 | @dataclass
19 | class DataConfig:
20 |     ds_name: str = "MNIST"
21 |     data_dir: str = "./"
22 |     train_batchsize: int = 256
23 | 
24 | 
25 | @dataclass
26 | class OptConfig:  # flexible
27 |     opt: str = "Adam"
28 |     lr: float = 1e-3
29 | 
30 | 
31 | @dataclass
32 | class LogConfig:
33 |     project_name: str = "with_aug"
34 |     val_log_freq_epoch: int = 1
35 |     epoch: int = 10
36 | 
37 | @dataclass
38 | class DefaultConfig:
39 |     hw: HWConfig = HWConfig()
40 |     network: NetworkConfig = NetworkConfig()
41 |     data: DataConfig = DataConfig()
42 |     opt: OptConfig = OptConfig()
43 |     log: LogConfig = LogConfig()
44 |     seed: str = 42
45 | 


--------------------------------------------------------------------------------
/run_nni.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | os.environ["KMP_DUPLICATE_LIB_OK"] = "True"  # NOT Safe
 4 | 
 5 | from omegaconf import OmegaConf
 6 | import nni
 7 | 
 8 | from flashlight.runner import main_pl
 9 | 
10 | from config import config as dc
11 | 
12 | 
13 | def search_params_intp(params):
14 |     ret = {}
15 |     for param in params.keys():
16 |         # param : "train.batch"
17 |         spl = param.split(".")
18 |         if len(spl) == 2:
19 |             temp = {}
20 |             temp[spl[1]] = params[param]
21 |             ret[spl[0]] = temp
22 |         elif len(spl) == 1:
23 |             ret[spl[0]] = params[param]
24 |         else:
25 |             raise ValueError
26 |     return ret
27 | 
28 | 
29 | def _main(cfg=dc.DefaultConfig) -> None:
30 |     params = nni.get_next_parameter()
31 |     params = search_params_intp(params)
32 |     cfg = OmegaConf.structured(cfg)
33 |     args = OmegaConf.merge(cfg, params)
34 |     print(args)
35 |     ml = main_pl.MainPL(
36 |         args.train, args.val, args.test, args.hw, args.network, args.data, args.opt, args.log, args.seed
37 |     )
38 |     final_result = ml.run()
39 |     nni.report_final_result(final_result)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     _main()
44 | 


--------------------------------------------------------------------------------
/flashlight/runner/pl.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import pytorch_lightning as pl
 6 | 
 7 | 
 8 | class PLModule(pl.LightningModule):  # for classification
 9 |     def __init__(self, network, optimizer):
10 |         super(PLModule, self).__init__()
11 |         self.network = network["network"]
12 |         self.hparams = dict(network["network_option"])
13 |         self.optimizer = optimizer
14 | 
15 |         self.val_best_score = 0.0
16 | 
17 |         self.loss = nn.CrossEntropyLoss()  # TODO HardCoded
18 | 
19 |         # related to NNI
20 |         self.final_target = 0
21 | 
22 |     def forward(self, x):
23 |         pred = self.network(x)
24 |         return pred
25 | 
26 |     def training_step(self, batch, batch_idx):
27 |         pred = self.forward(batch[0])  # == self(batch[0])
28 |         Y = batch[1]
29 |         loss = self.loss(pred.float(), Y.long())
30 | 
31 |         self.log('train_loss', loss)
32 | 
33 |         return loss
34 | 
35 |     # def training_epoch_end(self, outputs):
36 |     #     pass
37 | 
38 |     def validation_step(self, batch, batch_idx):  # optional
39 |         pred = self.forward(batch[0])
40 |         Y = batch[1]
41 |         loss = self.loss(pred.float(), Y.long())
42 | 
43 |         self.log('val_loss', loss)
44 | 
45 |     # def validation_epoch_end(self, outputs):
46 |     #     pass
47 | 
48 |     def test_step(self, batch, batch_nb):  # optional
49 |         pred = self.forward(batch[0])
50 |         Y = batch[1]
51 |         loss = self.loss(pred.float(), Y.long())
52 | 
53 |         self.log('test_loss', loss)
54 | 
55 |     # def test_epoch_end(self, outputs):
56 |     #     pass
57 | 
58 |     def configure_optimizers(self):  # require
59 |         return self.optimizer
60 | 


--------------------------------------------------------------------------------
/.pep8speaks.yml:
--------------------------------------------------------------------------------
 1 | # File : .pep8speaks.yml
 2 | 
 3 | scanner:
 4 |     diff_only: True  # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned.
 5 |     linter: pycodestyle  # Other option is flake8
 6 | 
 7 | pycodestyle:  # Same as scanner.linter value. Other option is flake8
 8 |     max-line-length: 119  # Default is 79 in PEP 8
 9 |     ignore:  # Errors and warnings to ignore
10 |         - W504  # line break after binary operator
11 |         - E402  # module level import not at top of file
12 |         - E731  # do not assign a lambda expression, use a def
13 |         - C406  # Unnecessary list literal - rewrite as a dict literal.
14 |         - E741  # ambiguous variable name
15 |         - F401
16 |         - F841
17 |         - E203 # whitespace before ':'. Opposite convention enforced by black
18 |         - E501 # line too long. Handled by black
19 |         - W503 # line break before binary operator, need for black
20 | 
21 | no_blank_comment: True  # If True, no comment is made on PR without any errors.
22 | descending_issues_order: False  # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file
23 | 
24 | message:  # Customize the comment made by the bot,
25 |     opened:  # Messages when a new PR is submitted
26 |         header: "Hello @{name}! Thanks for opening this PR. "
27 |                 # The keyword {name} is converted into the author's username
28 |         footer: "Do see the [Hitchhiker's guide to code style](https://goo.gl/hqbW4r)"
29 |                 # The messages can be written as they would over GitHub
30 |     updated:  # Messages when new commits are added to the PR
31 |         header: "Hello @{name}! Thanks for updating this PR. "
32 |         footer: ""  # Why to comment the link to the style guide everytime? :)
33 |     no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: "


--------------------------------------------------------------------------------
/flashlight/dataloader/__init__.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import albumentations as AB
 4 | from albumentations.pytorch import ToTensor, ToTensorV2
 5 | import torchvision
 6 | import torch
 7 | from torch.utils.data import random_split
 8 | import pytorch_lightning as pl
 9 | 
10 | from ..utils import func
11 | 
12 | class MNISTDataModule(pl.LightningDataModule):
13 |     def __init__(self, data_dir:str, train_batchsize=32, val_batchsize=32, test_batchsize=32, workers=0):
14 |         super().__init__()
15 |         self.data_dir = data_dir
16 |         self.transform = torchvision.transforms.Compose(
17 |             [torchvision.transforms.Grayscale(num_output_channels=3), torchvision.transforms.ToTensor()]
18 |         )
19 |         self.train_batchsize = train_batchsize
20 |         self.val_batchsize = val_batchsize
21 |         self.test_batchsize = test_batchsize
22 |         self.workers = workers
23 | 
24 |     def prepare_data(self):
25 |         # download
26 |         torchvision.datasets.MNIST(self.data_dir, train=True, download=True)
27 |         torchvision.datasets.MNIST(self.data_dir, train=False, download=True)
28 | 
29 |     def setup(self, stage=None):
30 |         if stage == 'fit' or stage is None:
31 |             mnist_full = torchvision.datasets.MNIST(self.data_dir, train=True, transform=self.transform)
32 |             self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])
33 | 
34 |         if stage == 'test' or stage is None:
35 |             self.mnist_test = torchvision.datasets.MNIST(self.data_dir, train=False, transform=self.transform)
36 | 
37 |     def train_dataloader(self):
38 |         return torch.utils.data.DataLoader(self.mnist_train, batch_size=self.train_batchsize)
39 | 
40 |     def val_dataloader(self):
41 |         return torch.utils.data.DataLoader(self.mnist_val, batch_size=self.val_batchsize)
42 | 
43 |     def test_dataloader(self):
44 |         return torch.utils.data.DataLoader(self.mnist_test, batch_size=self.test_batchsize)
45 | 
46 | """ Dataset """
47 | # Add {Dataset Name : torch.utils.data.Dataset}
48 | DATA_DICT = {
49 |     "MNIST": MNISTDataModule
50 | }
51 | 
52 | def check_data_option(data, data_option_dict):
53 |     print(data, data_option_dict)
54 |     if len(data_option_dict.keys()) > 0:
55 |         func.function_arg_checker(DATA_DICT[data].__init__, data_option_dict)
56 |     return data_option_dict
57 | 
58 | def get_data(data, data_option_dict):
59 |     return DATA_DICT[data](**data_option_dict)
60 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pytorch-boilerplate : flashlight
  2 | 
  3 | The OTHER pytorch boilerplate.
  4 | 
  5 | ![Untitled](https://user-images.githubusercontent.com/3917185/84566705-4a425f80-adae-11ea-92f5-6290aff0478a.png)
  6 | 
  7 | - [x] LightningModule [flashlight/runner/pl.py]
  8 | - [x] Trainer [flashlight/runner/main_pl.py]
  9 | 
 10 | - [ ] Accelerators 
 11 | - [ ] Callback
 12 | - [x] Logging [flashlight/runner/pl.py]
 13 | - [ ] Metrics
 14 | - [ ] Plugins
 15 | 
 16 | ## Pre-requirements for local [*PRTM!*]
 17 | 
 18 | - python 3.5 >
 19 | - pytorch 1.5.0, torchvision 0.6.0 for your OS/CUDA match version
 20 | - ... and install requirements.txt packages `pip install -r requirements.txt`
 21 | 
 22 | ## Getting Started
 23 | 
 24 | `master` branch runs MNIST classification (torchvision dataset) with squeezenet (torchvision model)
 25 | for detail, check `config/config.py`
 26 | 
 27 | ### Run Single Experiment without NNI
 28 | 
 29 | 0. Prepare enviroment : gpu docker, local python env... whatever
 30 | 
 31 | - if docker : `docker pull davinnovation/pytorch-boilerplate:alpha`
 32 | 
 33 | 1. `python run.py` or `python -W ignore run.py`
 34 | 
 35 | ![image](https://user-images.githubusercontent.com/3917185/84721592-fc9b4200-afbb-11ea-9602-c41dc58f8b8a.png)
 36 | 
 37 | 2. after experiment... `tensorboard --logdir Logs`
 38 | 
 39 | ![image](https://user-images.githubusercontent.com/3917185/84721667-26ecff80-afbc-11ea-8152-4025cbaeda90.png)
 40 | 
 41 | ### Run Experiments with NNI
 42 | 
 43 | 0. Prepare environment
 44 | 
 45 | 1. `nnictl create --config nni_config.yml`
 46 | 
 47 | 2. localhost:8080
 48 | 
 49 | ![image](https://user-images.githubusercontent.com/3917185/84721734-484deb80-afbc-11ea-8585-60f1752dd1d8.png)
 50 | 
 51 | ## Diving into Code
 52 | 
 53 | ![image](https://user-images.githubusercontent.com/3917185/84723043-ac25e380-afbf-11ea-9116-fbabd47b5cc0.png)
 54 | 
 55 | - Adding Network
 56 | 
 57 | `flashlight.network.__init__.py`
 58 | 
 59 | ```python
 60 | """Network Define"""
 61 | # Add {"Network Name" : and nn.Module without initalize}
 62 | def _get_squeezenet(num_classes, version:str="1_0", pretrained=False, progress=True):
 63 |     VERSION = {
 64 |         "1_0" : torchvision.models.squeezenet1_0,
 65 |         "1_1" : torchvision.models.squeezenet1_1
 66 |     }
 67 | 
 68 |     return VERSION[version](pretrained=pretrained, progress=progress, num_classes=num_classes)
 69 | 
 70 | NETWORK_DICT = {
 71 |     "squeezenet": _get_squeezenet
 72 | }
 73 | ```
 74 | 
 75 | - Adding Dataset
 76 | 
 77 | `flashlight.dataloader.__init__.py`
 78 | 
 79 | ```python
 80 | """ Dataset """
 81 | # Add {Dataset Name : torch.utils.data.Dataset}
 82 | DATA_DICT = {"MNIST": torchvision.datasets.MNIST}
 83 | 
 84 | """ Dataset Transform """
 85 | 
 86 | transform = torchvision.transforms.Compose(
 87 |     [torchvision.transforms.Grayscale(num_output_channels=3), torchvision.transforms.ToTensor()]
 88 | )
 89 | 
 90 | def get_datalaoder(data, root="../datasets/", split="train"):
 91 |     if data in ["MNIST"]:  # if torchvision
 92 |         if split == "val":
 93 |             print(f"{data} dataset dosen't support validation set. val replaced by train")
 94 |         if split in ["train", "val"]:
 95 |             return DATA_DICT[data](root=root, train=True, download=True, transform=transform)
 96 |         else:
 97 |             return DATA_DICT[data](root=root, train=False, download=True, transform=transform)
 98 | ```
 99 | 
100 | - Change Loss, forward/backward... [Research Code]
101 | 
102 | `flashlight.runner.pl.py`
103 | 
104 | - Change Logger, hw options... [Engineering Code]
105 | 
106 | `flashlight.runner.main_pl.py`
107 | 


--------------------------------------------------------------------------------
/flashlight/runner/main_pl.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | import random
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.backends.cudnn as cudnn
  8 | import pytorch_lightning as pl
  9 | 
 10 | from torch import optim
 11 | import torchvision
 12 | 
 13 | from ..network import *
 14 | from ..dataloader import get_data, check_data_option
 15 | 
 16 | from .pl import PLModule
 17 | 
 18 | from pytorch_lightning import Trainer, seed_everything
 19 | from pytorch_lightning.loggers import TensorBoardLogger
 20 | 
 21 | 
 22 | class MainPL:
 23 |     def __init__(self, hw, network, data, opt, log, seed: int = None) -> None:
 24 |         if seed: self.fix_seed(seed)
 25 | 
 26 |         self.hw_args = self._hw_intp(hw)
 27 |         self.data_args = self._data_intp(data)
 28 | 
 29 |         self.network_args = self._network_intp(network)
 30 |         self.opt_args = self._opt_intp(opt, self.network_args["network"])
 31 |         self.log_args = self._log_intp(log)
 32 | 
 33 |     def fix_seed(self, seed=42):
 34 |         seed_everything(seed)
 35 | 
 36 |     def _hw_intp(self, args):
 37 |         gpu_idx = [int(gpu) for gpu in args.gpu_idx.split(",") if gpu != ""]
 38 |         return {
 39 |             "gpu_idx": gpu_idx if len(gpu_idx) > 0 else None,
 40 |             "num_workers": args.num_workers,
 41 |             "gpu_on": True if len(gpu_idx) > 0 else False,
 42 |         }
 43 | 
 44 |     def _data_intp(self, args):
 45 |         data_option = dict(args)
 46 |         del data_option["ds_name"]
 47 |         data_option = check_data_option(args.ds_name, data_option)
 48 |         data = get_data(args.ds_name, data_option)
 49 |         assert data != None
 50 |         
 51 |         return {"data": data}
 52 | 
 53 |     def _network_intp(self, args):
 54 |         network_option = dict(args)
 55 |         network = network_option["network"]
 56 |         checkpoint = network_option["checkpoint"]
 57 |         del network_option["network"]
 58 |         del network_option["checkpoint"]
 59 | 
 60 |         network_option_dict = check_network_option(network, network_option)
 61 |         network = get_network(network, network_option_dict)
 62 |         assert network != None
 63 |         return {"network": network, "network_option": network_option_dict}
 64 | 
 65 |     def _opt_intp(self, args, network):
 66 |         from ..utils import func
 67 |         args = dict(args)
 68 |         opt = args["opt"]
 69 |         del args["opt"]
 70 | 
 71 |         func.function_arg_checker(optim.__dict__[opt], args)
 72 | 
 73 |         opt = optim.__dict__[opt](network.parameters(), **args)
 74 | 
 75 |         return {"opt": opt}
 76 | 
 77 |     def _log_intp(self, args):
 78 |         run_only_test = False
 79 |         if "test_dir" in args.keys():
 80 |             run_only_test = True
 81 |         return {
 82 |             "project_name": args.project_name,
 83 |             "val_log_freq_epoch": args.val_log_freq_epoch,
 84 |             "run_only_test": run_only_test,
 85 |             "epoch": args.epoch
 86 |         }
 87 | 
 88 |     def run(self, profile:bool=True):
 89 |         network = self.network_args
 90 |         optimizer = self.opt_args["opt"]
 91 | 
 92 |         plm = PLModule(network=network, optimizer=optimizer)
 93 | 
 94 |         trainer = Trainer(
 95 |             logger=TensorBoardLogger(save_dir="./Logs", name=self.log_args["project_name"]),
 96 |             gpus=self.hw_args["gpu_idx"],
 97 |             check_val_every_n_epoch=self.log_args["val_log_freq_epoch"],
 98 |             max_epochs=self.log_args["epoch"],
 99 |             min_epochs=self.log_args["epoch"],
100 |             profiler=profile,
101 |         )
102 | 
103 |         trainer.fit(plm, datamodule=self.data_args["data"])
104 | 
105 |         trainer.test(plm, datamodule=self.data_args["data"])
106 | 
107 |         return plm.final_target
108 | 


--------------------------------------------------------------------------------