├── config ├── __init__.py ├── search.json └── config.py ├── flashlight ├── __init__.py ├── runner │ ├── __init__.py │ ├── pl.py │ └── main_pl.py ├── utils │ └── func.py ├── network │ └── __init__.py └── dataloader │ └── __init__.py ├── .gitignore ├── requirements.txt ├── Dockerfile ├── nni_config.yml ├── run.py ├── .github └── code-formatting.yml ├── run_nni.py ├── .pep8speaks.yml └── README.md /config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flashlight/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flashlight/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .main_pl import MainPL 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .vscode/ 3 | Logs/ 4 | .pytest_cache/ 5 | .coverage -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | omegaconf==2.0.5 2 | pytorch-lightning==1.2.5 3 | albumentations 4 | -------------------------------------------------------------------------------- /config/search.json: -------------------------------------------------------------------------------- 1 | { 2 | "train.batch_size":{"_type":"choice","_value":[32,64,128]}, 3 | "network.version":{"_type":"choice","_value":["1_0", "1_1"]} 4 | } -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:1.7.0-cuda11.0-cudnn8-runtime 2 | 3 | RUN mkdir app 4 | WORKDIR app 5 | RUN git clone https://github.com/davinnovation/pytorch-boilerplate 6 | WORKDIR pytorch-boilerplate 7 | 8 | RUN pip install -r requirements.txt -------------------------------------------------------------------------------- /nni_config.yml: -------------------------------------------------------------------------------- 1 | authorName: davinnovation 2 | experimentName: example_mnist 3 | trialConcurrency: 1 4 | maxExecDuration: 5h 5 | maxTrialNum: 10 6 | #choice: local, remote, pai 7 | trainingServicePlatform: local 8 | searchSpacePath: config/search.json 9 | #choice: true, false 10 | useAnnotation: false 11 | tuner: 12 | builtinTunerName: GridSearch 13 | trial: 14 | command: python run_nni.py 15 | codeDir: . 16 | gpuNum: 1 -------------------------------------------------------------------------------- /flashlight/utils/func.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | 4 | def function_arg_checker(function, args_dict): 5 | def check_option(given_keys, target_keys): 6 | for given in given_keys: 7 | if not given in target_keys: 8 | return False 9 | return True 10 | 11 | if len(args_dict.keys()) > 0: 12 | assert check_option(list(args_dict.keys()), list(inspect.getfullargspec(function)[0]),) 13 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ["KMP_DUPLICATE_LIB_OK"] = "True" 4 | 5 | from omegaconf import OmegaConf 6 | 7 | from config import config as dc 8 | 9 | from flashlight.runner import main_pl 10 | 11 | 12 | def _main(cfg=dc.DefaultConfig) -> None: 13 | args = OmegaConf.structured(cfg) 14 | args.merge_with_cli() 15 | 16 | ml = main_pl.MainPL( 17 | args.hw, args.network, args.data, args.opt, args.log, args.seed 18 | ) 19 | ml.run() 20 | 21 | 22 | if __name__ == "__main__": 23 | _main() 24 | -------------------------------------------------------------------------------- /.github/code-formatting.yml: -------------------------------------------------------------------------------- 1 | name: "Check Code Format" 2 | on: 3 | # Trigger the workflow on push or pull request, 4 | # but only for the master branch 5 | push: 6 | branches: 7 | - master 8 | pull_request: 9 | branches: 10 | - master 11 | 12 | jobs: 13 | code-black: 14 | name: Check code formatting with Black 15 | runs-on: ubuntu-20.04 16 | steps: 17 | - name: Checkout 18 | uses: actions/checkout@v2 19 | - name: Set up Python 3.8 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: 3.8 23 | - name: Install Black 24 | run: pip install black==19.10b0 25 | - name: Run Black 26 | run: black -l 120 --check . -------------------------------------------------------------------------------- /flashlight/network/__init__.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | from ..utils import func 4 | 5 | import torch 6 | import torchvision 7 | 8 | 9 | def check_network_option(network, network_option_dict): 10 | if len(network_option_dict.keys()) > 0: 11 | func.function_arg_checker(NETWORK_DICT[network], network_option_dict) 12 | return network_option_dict 13 | 14 | 15 | """Network Define""" 16 | # Add {"Network Name" : and nn.Module without initalize} 17 | def _get_squeezenet(num_classes, version: str = "1_0", pretrained=False, progress=True): 18 | VERSION = {"1_0": torchvision.models.squeezenet1_0, "1_1": torchvision.models.squeezenet1_1} 19 | 20 | return VERSION[version](pretrained=pretrained, progress=progress, num_classes=num_classes) 21 | 22 | 23 | NETWORK_DICT = {"squeezenet": _get_squeezenet} 24 | 25 | 26 | def get_network(network_name, network_opt): 27 | return NETWORK_DICT[network_name](**network_opt) 28 | -------------------------------------------------------------------------------- /config/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from omegaconf import MISSING 3 | 4 | @dataclass 5 | class HWConfig: 6 | gpu_idx: str = "0" 7 | num_workers: int = 10 8 | 9 | 10 | @dataclass 11 | class NetworkConfig: # flexible 12 | network: str = "squeezenet" 13 | checkpoint: str = "" 14 | num_classes: int = 11 15 | version: str = "1_0" 16 | 17 | 18 | @dataclass 19 | class DataConfig: 20 | ds_name: str = "MNIST" 21 | data_dir: str = "./" 22 | train_batchsize: int = 256 23 | 24 | 25 | @dataclass 26 | class OptConfig: # flexible 27 | opt: str = "Adam" 28 | lr: float = 1e-3 29 | 30 | 31 | @dataclass 32 | class LogConfig: 33 | project_name: str = "with_aug" 34 | val_log_freq_epoch: int = 1 35 | epoch: int = 10 36 | 37 | @dataclass 38 | class DefaultConfig: 39 | hw: HWConfig = HWConfig() 40 | network: NetworkConfig = NetworkConfig() 41 | data: DataConfig = DataConfig() 42 | opt: OptConfig = OptConfig() 43 | log: LogConfig = LogConfig() 44 | seed: str = 42 45 | -------------------------------------------------------------------------------- /run_nni.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ["KMP_DUPLICATE_LIB_OK"] = "True" # NOT Safe 4 | 5 | from omegaconf import OmegaConf 6 | import nni 7 | 8 | from flashlight.runner import main_pl 9 | 10 | from config import config as dc 11 | 12 | 13 | def search_params_intp(params): 14 | ret = {} 15 | for param in params.keys(): 16 | # param : "train.batch" 17 | spl = param.split(".") 18 | if len(spl) == 2: 19 | temp = {} 20 | temp[spl[1]] = params[param] 21 | ret[spl[0]] = temp 22 | elif len(spl) == 1: 23 | ret[spl[0]] = params[param] 24 | else: 25 | raise ValueError 26 | return ret 27 | 28 | 29 | def _main(cfg=dc.DefaultConfig) -> None: 30 | params = nni.get_next_parameter() 31 | params = search_params_intp(params) 32 | cfg = OmegaConf.structured(cfg) 33 | args = OmegaConf.merge(cfg, params) 34 | print(args) 35 | ml = main_pl.MainPL( 36 | args.train, args.val, args.test, args.hw, args.network, args.data, args.opt, args.log, args.seed 37 | ) 38 | final_result = ml.run() 39 | nni.report_final_result(final_result) 40 | 41 | 42 | if __name__ == "__main__": 43 | _main() 44 | -------------------------------------------------------------------------------- /flashlight/runner/pl.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | import torch 4 | import torch.nn as nn 5 | import pytorch_lightning as pl 6 | 7 | 8 | class PLModule(pl.LightningModule): # for classification 9 | def __init__(self, network, optimizer): 10 | super(PLModule, self).__init__() 11 | self.network = network["network"] 12 | self.hparams = dict(network["network_option"]) 13 | self.optimizer = optimizer 14 | 15 | self.val_best_score = 0.0 16 | 17 | self.loss = nn.CrossEntropyLoss() # TODO HardCoded 18 | 19 | # related to NNI 20 | self.final_target = 0 21 | 22 | def forward(self, x): 23 | pred = self.network(x) 24 | return pred 25 | 26 | def training_step(self, batch, batch_idx): 27 | pred = self.forward(batch[0]) # == self(batch[0]) 28 | Y = batch[1] 29 | loss = self.loss(pred.float(), Y.long()) 30 | 31 | self.log('train_loss', loss) 32 | 33 | return loss 34 | 35 | # def training_epoch_end(self, outputs): 36 | # pass 37 | 38 | def validation_step(self, batch, batch_idx): # optional 39 | pred = self.forward(batch[0]) 40 | Y = batch[1] 41 | loss = self.loss(pred.float(), Y.long()) 42 | 43 | self.log('val_loss', loss) 44 | 45 | # def validation_epoch_end(self, outputs): 46 | # pass 47 | 48 | def test_step(self, batch, batch_nb): # optional 49 | pred = self.forward(batch[0]) 50 | Y = batch[1] 51 | loss = self.loss(pred.float(), Y.long()) 52 | 53 | self.log('test_loss', loss) 54 | 55 | # def test_epoch_end(self, outputs): 56 | # pass 57 | 58 | def configure_optimizers(self): # require 59 | return self.optimizer 60 | -------------------------------------------------------------------------------- /.pep8speaks.yml: -------------------------------------------------------------------------------- 1 | # File : .pep8speaks.yml 2 | 3 | scanner: 4 | diff_only: True # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned. 5 | linter: pycodestyle # Other option is flake8 6 | 7 | pycodestyle: # Same as scanner.linter value. Other option is flake8 8 | max-line-length: 119 # Default is 79 in PEP 8 9 | ignore: # Errors and warnings to ignore 10 | - W504 # line break after binary operator 11 | - E402 # module level import not at top of file 12 | - E731 # do not assign a lambda expression, use a def 13 | - C406 # Unnecessary list literal - rewrite as a dict literal. 14 | - E741 # ambiguous variable name 15 | - F401 16 | - F841 17 | - E203 # whitespace before ':'. Opposite convention enforced by black 18 | - E501 # line too long. Handled by black 19 | - W503 # line break before binary operator, need for black 20 | 21 | no_blank_comment: True # If True, no comment is made on PR without any errors. 22 | descending_issues_order: False # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file 23 | 24 | message: # Customize the comment made by the bot, 25 | opened: # Messages when a new PR is submitted 26 | header: "Hello @{name}! Thanks for opening this PR. " 27 | # The keyword {name} is converted into the author's username 28 | footer: "Do see the [Hitchhiker's guide to code style](https://goo.gl/hqbW4r)" 29 | # The messages can be written as they would over GitHub 30 | updated: # Messages when new commits are added to the PR 31 | header: "Hello @{name}! Thanks for updating this PR. " 32 | footer: "" # Why to comment the link to the style guide everytime? :) 33 | no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: " -------------------------------------------------------------------------------- /flashlight/dataloader/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import albumentations as AB 4 | from albumentations.pytorch import ToTensor, ToTensorV2 5 | import torchvision 6 | import torch 7 | from torch.utils.data import random_split 8 | import pytorch_lightning as pl 9 | 10 | from ..utils import func 11 | 12 | class MNISTDataModule(pl.LightningDataModule): 13 | def __init__(self, data_dir:str, train_batchsize=32, val_batchsize=32, test_batchsize=32, workers=0): 14 | super().__init__() 15 | self.data_dir = data_dir 16 | self.transform = torchvision.transforms.Compose( 17 | [torchvision.transforms.Grayscale(num_output_channels=3), torchvision.transforms.ToTensor()] 18 | ) 19 | self.train_batchsize = train_batchsize 20 | self.val_batchsize = val_batchsize 21 | self.test_batchsize = test_batchsize 22 | self.workers = workers 23 | 24 | def prepare_data(self): 25 | # download 26 | torchvision.datasets.MNIST(self.data_dir, train=True, download=True) 27 | torchvision.datasets.MNIST(self.data_dir, train=False, download=True) 28 | 29 | def setup(self, stage=None): 30 | if stage == 'fit' or stage is None: 31 | mnist_full = torchvision.datasets.MNIST(self.data_dir, train=True, transform=self.transform) 32 | self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000]) 33 | 34 | if stage == 'test' or stage is None: 35 | self.mnist_test = torchvision.datasets.MNIST(self.data_dir, train=False, transform=self.transform) 36 | 37 | def train_dataloader(self): 38 | return torch.utils.data.DataLoader(self.mnist_train, batch_size=self.train_batchsize) 39 | 40 | def val_dataloader(self): 41 | return torch.utils.data.DataLoader(self.mnist_val, batch_size=self.val_batchsize) 42 | 43 | def test_dataloader(self): 44 | return torch.utils.data.DataLoader(self.mnist_test, batch_size=self.test_batchsize) 45 | 46 | """ Dataset """ 47 | # Add {Dataset Name : torch.utils.data.Dataset} 48 | DATA_DICT = { 49 | "MNIST": MNISTDataModule 50 | } 51 | 52 | def check_data_option(data, data_option_dict): 53 | print(data, data_option_dict) 54 | if len(data_option_dict.keys()) > 0: 55 | func.function_arg_checker(DATA_DICT[data].__init__, data_option_dict) 56 | return data_option_dict 57 | 58 | def get_data(data, data_option_dict): 59 | return DATA_DICT[data](**data_option_dict) 60 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pytorch-boilerplate : flashlight 2 | 3 | The OTHER pytorch boilerplate. 4 | 5 | ![Untitled](https://user-images.githubusercontent.com/3917185/84566705-4a425f80-adae-11ea-92f5-6290aff0478a.png) 6 | 7 | - [x] LightningModule [flashlight/runner/pl.py] 8 | - [x] Trainer [flashlight/runner/main_pl.py] 9 | 10 | - [ ] Accelerators 11 | - [ ] Callback 12 | - [x] Logging [flashlight/runner/pl.py] 13 | - [ ] Metrics 14 | - [ ] Plugins 15 | 16 | ## Pre-requirements for local [*PRTM!*] 17 | 18 | - python 3.5 > 19 | - pytorch 1.5.0, torchvision 0.6.0 for your OS/CUDA match version 20 | - ... and install requirements.txt packages `pip install -r requirements.txt` 21 | 22 | ## Getting Started 23 | 24 | `master` branch runs MNIST classification (torchvision dataset) with squeezenet (torchvision model) 25 | for detail, check `config/config.py` 26 | 27 | ### Run Single Experiment without NNI 28 | 29 | 0. Prepare enviroment : gpu docker, local python env... whatever 30 | 31 | - if docker : `docker pull davinnovation/pytorch-boilerplate:alpha` 32 | 33 | 1. `python run.py` or `python -W ignore run.py` 34 | 35 | ![image](https://user-images.githubusercontent.com/3917185/84721592-fc9b4200-afbb-11ea-9602-c41dc58f8b8a.png) 36 | 37 | 2. after experiment... `tensorboard --logdir Logs` 38 | 39 | ![image](https://user-images.githubusercontent.com/3917185/84721667-26ecff80-afbc-11ea-8152-4025cbaeda90.png) 40 | 41 | ### Run Experiments with NNI 42 | 43 | 0. Prepare environment 44 | 45 | 1. `nnictl create --config nni_config.yml` 46 | 47 | 2. localhost:8080 48 | 49 | ![image](https://user-images.githubusercontent.com/3917185/84721734-484deb80-afbc-11ea-8585-60f1752dd1d8.png) 50 | 51 | ## Diving into Code 52 | 53 | ![image](https://user-images.githubusercontent.com/3917185/84723043-ac25e380-afbf-11ea-9116-fbabd47b5cc0.png) 54 | 55 | - Adding Network 56 | 57 | `flashlight.network.__init__.py` 58 | 59 | ```python 60 | """Network Define""" 61 | # Add {"Network Name" : and nn.Module without initalize} 62 | def _get_squeezenet(num_classes, version:str="1_0", pretrained=False, progress=True): 63 | VERSION = { 64 | "1_0" : torchvision.models.squeezenet1_0, 65 | "1_1" : torchvision.models.squeezenet1_1 66 | } 67 | 68 | return VERSION[version](pretrained=pretrained, progress=progress, num_classes=num_classes) 69 | 70 | NETWORK_DICT = { 71 | "squeezenet": _get_squeezenet 72 | } 73 | ``` 74 | 75 | - Adding Dataset 76 | 77 | `flashlight.dataloader.__init__.py` 78 | 79 | ```python 80 | """ Dataset """ 81 | # Add {Dataset Name : torch.utils.data.Dataset} 82 | DATA_DICT = {"MNIST": torchvision.datasets.MNIST} 83 | 84 | """ Dataset Transform """ 85 | 86 | transform = torchvision.transforms.Compose( 87 | [torchvision.transforms.Grayscale(num_output_channels=3), torchvision.transforms.ToTensor()] 88 | ) 89 | 90 | def get_datalaoder(data, root="../datasets/", split="train"): 91 | if data in ["MNIST"]: # if torchvision 92 | if split == "val": 93 | print(f"{data} dataset dosen't support validation set. val replaced by train") 94 | if split in ["train", "val"]: 95 | return DATA_DICT[data](root=root, train=True, download=True, transform=transform) 96 | else: 97 | return DATA_DICT[data](root=root, train=False, download=True, transform=transform) 98 | ``` 99 | 100 | - Change Loss, forward/backward... [Research Code] 101 | 102 | `flashlight.runner.pl.py` 103 | 104 | - Change Logger, hw options... [Engineering Code] 105 | 106 | `flashlight.runner.main_pl.py` 107 | -------------------------------------------------------------------------------- /flashlight/runner/main_pl.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import random 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.backends.cudnn as cudnn 8 | import pytorch_lightning as pl 9 | 10 | from torch import optim 11 | import torchvision 12 | 13 | from ..network import * 14 | from ..dataloader import get_data, check_data_option 15 | 16 | from .pl import PLModule 17 | 18 | from pytorch_lightning import Trainer, seed_everything 19 | from pytorch_lightning.loggers import TensorBoardLogger 20 | 21 | 22 | class MainPL: 23 | def __init__(self, hw, network, data, opt, log, seed: int = None) -> None: 24 | if seed: self.fix_seed(seed) 25 | 26 | self.hw_args = self._hw_intp(hw) 27 | self.data_args = self._data_intp(data) 28 | 29 | self.network_args = self._network_intp(network) 30 | self.opt_args = self._opt_intp(opt, self.network_args["network"]) 31 | self.log_args = self._log_intp(log) 32 | 33 | def fix_seed(self, seed=42): 34 | seed_everything(seed) 35 | 36 | def _hw_intp(self, args): 37 | gpu_idx = [int(gpu) for gpu in args.gpu_idx.split(",") if gpu != ""] 38 | return { 39 | "gpu_idx": gpu_idx if len(gpu_idx) > 0 else None, 40 | "num_workers": args.num_workers, 41 | "gpu_on": True if len(gpu_idx) > 0 else False, 42 | } 43 | 44 | def _data_intp(self, args): 45 | data_option = dict(args) 46 | del data_option["ds_name"] 47 | data_option = check_data_option(args.ds_name, data_option) 48 | data = get_data(args.ds_name, data_option) 49 | assert data != None 50 | 51 | return {"data": data} 52 | 53 | def _network_intp(self, args): 54 | network_option = dict(args) 55 | network = network_option["network"] 56 | checkpoint = network_option["checkpoint"] 57 | del network_option["network"] 58 | del network_option["checkpoint"] 59 | 60 | network_option_dict = check_network_option(network, network_option) 61 | network = get_network(network, network_option_dict) 62 | assert network != None 63 | return {"network": network, "network_option": network_option_dict} 64 | 65 | def _opt_intp(self, args, network): 66 | from ..utils import func 67 | args = dict(args) 68 | opt = args["opt"] 69 | del args["opt"] 70 | 71 | func.function_arg_checker(optim.__dict__[opt], args) 72 | 73 | opt = optim.__dict__[opt](network.parameters(), **args) 74 | 75 | return {"opt": opt} 76 | 77 | def _log_intp(self, args): 78 | run_only_test = False 79 | if "test_dir" in args.keys(): 80 | run_only_test = True 81 | return { 82 | "project_name": args.project_name, 83 | "val_log_freq_epoch": args.val_log_freq_epoch, 84 | "run_only_test": run_only_test, 85 | "epoch": args.epoch 86 | } 87 | 88 | def run(self, profile:bool=True): 89 | network = self.network_args 90 | optimizer = self.opt_args["opt"] 91 | 92 | plm = PLModule(network=network, optimizer=optimizer) 93 | 94 | trainer = Trainer( 95 | logger=TensorBoardLogger(save_dir="./Logs", name=self.log_args["project_name"]), 96 | gpus=self.hw_args["gpu_idx"], 97 | check_val_every_n_epoch=self.log_args["val_log_freq_epoch"], 98 | max_epochs=self.log_args["epoch"], 99 | min_epochs=self.log_args["epoch"], 100 | profiler=profile, 101 | ) 102 | 103 | trainer.fit(plm, datamodule=self.data_args["data"]) 104 | 105 | trainer.test(plm, datamodule=self.data_args["data"]) 106 | 107 | return plm.final_target 108 | --------------------------------------------------------------------------------