├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── bin ├── predict.py └── train.py ├── cifar_pipeline ├── __init__.py ├── configs │ ├── __init__.py │ ├── base.py │ ├── fixup │ │ ├── base.py │ │ ├── resnet110_bn.py │ │ ├── resnet110_fixup.py │ │ ├── resnet110_fixup_0_0_1.py │ │ ├── resnet110_fixup_0_1.py │ │ ├── resnet110_fixup_mixup.py │ │ └── wideresnet │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── batch_norm │ │ │ ├── 10000_layers.py │ │ │ ├── 1000_layers.py │ │ │ ├── 100_layers.py │ │ │ ├── 10_layers.py │ │ │ └── __init__.py │ │ │ ├── fixup │ │ │ ├── 10000_layers.py │ │ │ ├── 1000_layers.py │ │ │ ├── 100_layers.py │ │ │ ├── 10_layers.py │ │ │ └── __init__.py │ │ │ ├── fixup_0 │ │ │ ├── 10000_layers.py │ │ │ ├── 1000_layers.py │ │ │ ├── 100_layers.py │ │ │ ├── 10_layers.py │ │ │ └── __init__.py │ │ │ ├── fixup_0_0_1 │ │ │ ├── 10000_layers.py │ │ │ ├── 1000_layers.py │ │ │ ├── 100_layers.py │ │ │ ├── 10_layers.py │ │ │ └── __init__.py │ │ │ ├── fixup_0_1 │ │ │ ├── 10000_layers.py │ │ │ ├── 1000_layers.py │ │ │ ├── 100_layers.py │ │ │ ├── 10_layers.py │ │ │ └── __init__.py │ │ │ └── fixup_10 │ │ │ ├── 10000_layers.py │ │ │ ├── 1000_layers.py │ │ │ ├── 100_layers.py │ │ │ ├── 10_layers.py │ │ │ └── __init__.py │ └── simple_cnn.py ├── dataset.py └── resnet_cifar.py ├── imagenet_pipeline ├── __init__.py ├── configs │ ├── __init__.py │ ├── base.py │ ├── resnet101_fixup.py │ ├── resnet101_fixup_128.py │ ├── resnet50.py │ ├── resnet50_fixup.py │ └── resnet50_fixup_128.py └── dataset.py ├── mnist_pipeline ├── __init__.py ├── configs │ ├── __init__.py │ ├── base.py │ ├── resnet18.py │ └── simple_cnn.py ├── dataset.py └── tests │ ├── __init__.py │ ├── test_dataset.py │ └── test_train.py ├── pipeline ├── __init__.py ├── config_base.py ├── core.py ├── datasets │ ├── __init__.py │ ├── base.py │ └── mixup.py ├── logger.py ├── losses │ └── vector_cross_entropy.py ├── metrics │ ├── __init__.py │ ├── accuracy.py │ └── base.py ├── models │ ├── __init__.py │ ├── base.py │ └── image_models │ │ ├── __init__.py │ │ ├── encoders │ │ ├── __init__.py │ │ └── resnet.py │ │ ├── resnet_fixup.py │ │ ├── wide_resnet.py │ │ └── wide_resnet_fixup.py ├── predictors │ ├── __init__.py │ ├── base.py │ └── classification.py ├── preprocessing │ ├── __init__.py │ ├── audio_preprocessing │ │ └── __init__.py │ ├── image_preprocessing │ │ └── __init__.py │ └── text_preprocessing │ │ └── __init__.py ├── schedulers │ ├── __init__.py │ ├── base.py │ ├── dropout │ │ ├── __init__.py │ │ ├── increase_step.py │ │ └── utils.py │ └── learning_rate │ │ ├── __init__.py │ │ ├── cyclical_lr_scheduler.py │ │ └── reduce_on_plateau.py ├── storage │ ├── __init__.py │ ├── predictions.py │ └── state.py ├── trainers │ ├── __init__.py │ ├── base.py │ ├── classification.py │ └── segmentation.py └── utils.py ├── requirements.txt └── tests ├── __init__.py ├── common.py ├── test_metrics.py ├── test_schedulers.py └── test_storage.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # PyCharm 107 | .idea/ 108 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: python 3 | python: 4 | - "3.6" 5 | # command to install dependencies 6 | install: 7 | - pip install -r requirements.txt 8 | - wget https://www.dropbox.com/s/pzljfuwzo8hpb18/mnist.zip?dl=0 -O mnist.zip 9 | - mkdir ~/.pipeline 10 | - mkdir ~/.pipeline/mnist 11 | - unzip mnist.zip -d ~/.pipeline/mnist/ 12 | - free -g 13 | # command to run tests 14 | script: 15 | - pytest -vsx 16 | 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Pavel Ostyakov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pipeline 2 | 3 | ## How to run training 4 | 5 | First of all, create a config. You may find some examples of configs in folders mnist_pipeline, cifar_pipeline and imagenet_pipeline. 6 | Then, call: 7 | 8 | `python3 bin/train.py path_to_config` 9 | 10 | 11 | For example, for reproducing results from Fixup paper just call: 12 | 13 | `python3 bin/train.py cifar_pipeline/configs/resnet110_fixup.py` 14 | -------------------------------------------------------------------------------- /bin/predict.py: -------------------------------------------------------------------------------- 1 | from pipeline.utils import load_predict_config, run_predict 2 | 3 | import argparse 4 | 5 | 6 | def main(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("config_path") 9 | args = parser.parse_args() 10 | 11 | config = load_predict_config(args.config_path) 12 | run_predict(config) 13 | 14 | 15 | if __name__ == "__main__": 16 | main() 17 | -------------------------------------------------------------------------------- /bin/train.py: -------------------------------------------------------------------------------- 1 | from pipeline.utils import load_config, run_train 2 | 3 | import argparse 4 | 5 | 6 | def main(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("config_path") 9 | args = parser.parse_args() 10 | 11 | config = load_config(args.config_path) 12 | run_train(config) 13 | 14 | 15 | if __name__ == "__main__": 16 | main() 17 | -------------------------------------------------------------------------------- /cifar_pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/__init__.py -------------------------------------------------------------------------------- /cifar_pipeline/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/__init__.py -------------------------------------------------------------------------------- /cifar_pipeline/configs/base.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.optim as optim 3 | from torchvision.transforms import ToTensor 4 | 5 | from cifar_pipeline.dataset import CIFARImagesDataset, CIFARTargetsDataset 6 | from pipeline.config_base import ConfigBase 7 | from pipeline.datasets.base import DatasetWithPostprocessingFunc, DatasetComposer, OneHotTargetsDataset 8 | from pipeline.datasets.mixup import MixUpDatasetWrapper 9 | from pipeline.losses.vector_cross_entropy import VectorCrossEntropy 10 | from pipeline.metrics.accuracy import MetricsCalculatorAccuracy 11 | from pipeline.schedulers.learning_rate.reduce_on_plateau import SchedulerWrapperLossOnPlateau 12 | from pipeline.trainers.classification import TrainerClassification 13 | 14 | TRAIN_DATASET_PATH = "~/.pipeline/cifar/train" 15 | TEST_DATASET_PATH = "~/.pipeline/cifar/test" 16 | 17 | 18 | def get_dataset(path, transforms, train, use_mixup): 19 | images_dataset = DatasetWithPostprocessingFunc( 20 | CIFARImagesDataset(path=path, train=train, download=True), 21 | transforms) 22 | 23 | targets_dataset = CIFARTargetsDataset(path=path, train=train) 24 | if use_mixup: 25 | targets_dataset = OneHotTargetsDataset(targets_dataset, 10) 26 | 27 | return DatasetComposer([images_dataset, targets_dataset]) 28 | 29 | 30 | class ConfigCIFARBase(ConfigBase): 31 | def __init__(self, model, model_save_path, num_workers=8, batch_size=128, transforms=None, 32 | epoch_count=200, print_frequency=10, mixup_alpha=0): 33 | optimizer = optim.SGD( 34 | model.parameters(), 35 | lr=0.1, 36 | momentum=0.9, 37 | weight_decay=5e-4) 38 | 39 | scheduler = SchedulerWrapperLossOnPlateau(optimizer) 40 | loss = nn.CrossEntropyLoss() 41 | metrics_calculator = MetricsCalculatorAccuracy() 42 | trainer_cls = TrainerClassification 43 | 44 | if transforms is None: 45 | transforms = ToTensor() 46 | 47 | train_dataset = get_dataset(path=TRAIN_DATASET_PATH, transforms=transforms, train=True, 48 | use_mixup=mixup_alpha > 0) 49 | val_dataset = get_dataset(path=TEST_DATASET_PATH, transforms=transforms, train=False, 50 | use_mixup=mixup_alpha > 0) 51 | 52 | if mixup_alpha > 0: 53 | train_dataset = MixUpDatasetWrapper(train_dataset, alpha=mixup_alpha) 54 | loss = VectorCrossEntropy() 55 | 56 | super().__init__( 57 | model=model, 58 | model_save_path=model_save_path, 59 | optimizer=optimizer, 60 | scheduler=scheduler, 61 | loss=loss, 62 | metrics_calculator=metrics_calculator, 63 | batch_size=batch_size, 64 | num_workers=num_workers, 65 | train_dataset=train_dataset, 66 | val_dataset=val_dataset, 67 | trainer_cls=trainer_cls, 68 | print_frequency=print_frequency, 69 | epoch_count=epoch_count, 70 | device="cpu") 71 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/base.py: -------------------------------------------------------------------------------- 1 | from cifar_pipeline.dataset import CIFARImagesDataset, CIFARTargetsDataset 2 | 3 | from pipeline.config_base import ConfigBase 4 | from pipeline.schedulers.learning_rate.reduce_on_plateau import SchedulerWrapperLossOnPlateau 5 | from pipeline.metrics.accuracy import MetricsCalculatorAccuracy 6 | from pipeline.datasets.base import DatasetWithPostprocessingFunc, DatasetComposer, OneHotTargetsDataset 7 | from pipeline.trainers.classification import TrainerClassification 8 | from pipeline.datasets.mixup import MixUpDatasetWrapper 9 | from pipeline.losses.vector_cross_entropy import VectorCrossEntropy 10 | 11 | import torch.nn as nn 12 | import torch.optim as optim 13 | 14 | from torchvision.transforms import ToTensor, Compose, Normalize 15 | 16 | 17 | TRAIN_DATASET_PATH = "~/.pipeline/cifar/train" 18 | TEST_DATASET_PATH = "~/.pipeline/cifar/test" 19 | 20 | 21 | def get_dataset(path, transforms, train, use_mixup): 22 | images_dataset = DatasetWithPostprocessingFunc( 23 | CIFARImagesDataset(path=path, train=train, download=True), 24 | transforms) 25 | 26 | targets_dataset = CIFARTargetsDataset(path=path, train=train) 27 | if use_mixup: 28 | targets_dataset = OneHotTargetsDataset(targets_dataset, 10) 29 | 30 | return DatasetComposer([images_dataset, targets_dataset]) 31 | 32 | 33 | class ConfigCIFARBase(ConfigBase): 34 | def __init__(self, model, model_save_path, num_workers=8, batch_size=128, transforms=None, 35 | epoch_count=200, print_frequency=10, use_mixup=False): 36 | parameters_bias = [p[1] for p in model.named_parameters() if 'bias' in p[0]] 37 | parameters_scale = [p[1] for p in model.named_parameters() if 'scale' in p[0]] 38 | parameters_others = [p[1] for p in model.named_parameters() if not ('bias' in p[0] or 'scale' in p[0])] 39 | 40 | optimizer = optim.SGD( 41 | [{'params': parameters_bias, 'lr': 0.1/10.}, 42 | {'params': parameters_scale, 'lr': 0.1/10.}, 43 | {'params': parameters_others}], 44 | lr=0.1, 45 | momentum=0.9, 46 | weight_decay=5e-4) 47 | 48 | scheduler = SchedulerWrapperLossOnPlateau(optimizer) 49 | loss = nn.CrossEntropyLoss() 50 | metrics_calculator = MetricsCalculatorAccuracy() 51 | trainer_cls = TrainerClassification 52 | 53 | if transforms is None: 54 | transforms = Compose([ToTensor(), Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) 55 | 56 | train_dataset = get_dataset(path=TRAIN_DATASET_PATH, transforms=transforms, train=True, use_mixup=use_mixup) 57 | val_dataset = get_dataset(path=TEST_DATASET_PATH, transforms=transforms, train=False, use_mixup=use_mixup) 58 | 59 | 60 | if use_mixup: 61 | train_dataset = MixUpDatasetWrapper(train_dataset, alpha=0.7) 62 | loss = VectorCrossEntropy() 63 | 64 | super().__init__( 65 | model=model, 66 | model_save_path=model_save_path, 67 | optimizer=optimizer, 68 | scheduler=scheduler, 69 | loss=loss, 70 | metrics_calculator=metrics_calculator, 71 | batch_size=batch_size, 72 | num_workers=num_workers, 73 | train_dataset=train_dataset, 74 | val_dataset=val_dataset, 75 | trainer_cls=trainer_cls, 76 | print_frequency=print_frequency, 77 | epoch_count=epoch_count) 78 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/resnet110_bn.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigCIFARBase 2 | 3 | from cifar_pipeline.resnet_cifar import resnet110 4 | 5 | from torch.nn import DataParallel 6 | 7 | MODEL_SAVE_PATH = "models/cifar_resnet110_bn" 8 | 9 | 10 | class Config(ConfigCIFARBase): 11 | def __init__(self): 12 | model = resnet110(use_fixup=False) 13 | 14 | super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH, 15 | epoch_count=100, batch_size=128) 16 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/resnet110_fixup.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigCIFARBase 2 | 3 | from cifar_pipeline.resnet_cifar import resnet110 4 | 5 | from torch.nn import DataParallel 6 | 7 | MODEL_SAVE_PATH = "models/cifar_resnet110_fixup" 8 | 9 | 10 | class Config(ConfigCIFARBase): 11 | def __init__(self): 12 | model = resnet110(use_fixup=True) 13 | 14 | super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH, 15 | epoch_count=100, batch_size=128) 16 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/resnet110_fixup_0_0_1.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigCIFARBase 2 | 3 | from cifar_pipeline.resnet_cifar import resnet110 4 | 5 | from torch.nn import DataParallel 6 | 7 | MODEL_SAVE_PATH = "models/cifar_resnet110_fixup_0_0_1" 8 | 9 | 10 | class Config(ConfigCIFARBase): 11 | def __init__(self): 12 | model = resnet110(use_fixup=True, fixup_coeff=0.01) 13 | 14 | super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH, 15 | epoch_count=100, batch_size=128) 16 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/resnet110_fixup_0_1.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigCIFARBase 2 | 3 | from cifar_pipeline.resnet_cifar import resnet110 4 | 5 | from torch.nn import DataParallel 6 | 7 | MODEL_SAVE_PATH = "models/cifar_resnet110_fixup_0_1" 8 | 9 | 10 | class Config(ConfigCIFARBase): 11 | def __init__(self): 12 | model = resnet110(use_fixup=True, fixup_coeff=0.1) 13 | 14 | super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH, 15 | epoch_count=100, batch_size=128) 16 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/resnet110_fixup_mixup.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigCIFARBase 2 | 3 | from cifar_pipeline.resnet_cifar import resnet110 4 | 5 | from torch.nn import DataParallel 6 | 7 | MODEL_SAVE_PATH = "models/cifar_resnet110_fixup_mixup" 8 | 9 | 10 | class Config(ConfigCIFARBase): 11 | def __init__(self): 12 | model = resnet110(use_fixup=True) 13 | 14 | super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH, 15 | epoch_count=100, batch_size=128, use_mixup=True) 16 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/__init__.py -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/base.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigCIFARBase 2 | 3 | from pipeline.models.image_models.wide_resnet_fixup import WideResNet as WideResNetFixup 4 | from pipeline.models.image_models.wide_resnet import WideResNet as WideResNetBatchNorm 5 | 6 | from enum import auto 7 | from torch.nn import DataParallel 8 | 9 | MODEL_SAVE_PATH = "models/cifar_wideresnet_{}_{}_layers" 10 | 11 | 12 | class ConfigWideResNetBase(ConfigCIFARBase): 13 | BATCH_NORM = auto() 14 | FIXUP = auto() 15 | 16 | def __init__(self, num_layers, fixup_coeff=1, normalization_type=BATCH_NORM, batch_size=128): 17 | if normalization_type == self.BATCH_NORM: 18 | model = WideResNetBatchNorm(depth=num_layers, num_classes=10) 19 | norm_type = "batchnorm" 20 | else: 21 | model = WideResNetFixup(depth=num_layers, num_classes=10, fixup_coeff=fixup_coeff) 22 | norm_type = "fixup_coeff_{}".format(fixup_coeff) 23 | 24 | super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH.format(norm_type, num_layers), 25 | epoch_count=1, batch_size=batch_size) 26 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/batch_norm/10000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10000, normalization_type=ConfigWideResNetBase.BATCH_NORM, batch_size=64) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/batch_norm/1000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=1000, normalization_type=ConfigWideResNetBase.BATCH_NORM) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/batch_norm/100_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=100, normalization_type=ConfigWideResNetBase.BATCH_NORM) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/batch_norm/10_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10, normalization_type=ConfigWideResNetBase.BATCH_NORM) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/batch_norm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/batch_norm/__init__.py -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup/10000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10000, normalization_type=ConfigWideResNetBase.FIXUP, batch_size=64) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup/1000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=1000, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup/100_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=100, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup/10_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/fixup/__init__.py -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0/10000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10000, fixup_coeff=0, normalization_type=ConfigWideResNetBase.FIXUP, batch_size=64) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0/1000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=1000, fixup_coeff=0, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0/100_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=100, fixup_coeff=0, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0/10_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10, fixup_coeff=0, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/fixup_0/__init__.py -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/10000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10000, fixup_coeff=0.01, normalization_type=ConfigWideResNetBase.FIXUP, batch_size=64) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/1000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=1000, fixup_coeff=0.01, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/100_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=100, fixup_coeff=0.01, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/10_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10, fixup_coeff=0.01, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/__init__.py -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/10000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10000, fixup_coeff=0.1, normalization_type=ConfigWideResNetBase.FIXUP, batch_size=64) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/1000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=1000, fixup_coeff=0.1, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/100_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=100, fixup_coeff=0.1, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/10_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10, fixup_coeff=0.1, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/__init__.py -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_10/10000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10000, fixup_coeff=10, normalization_type=ConfigWideResNetBase.FIXUP, batch_size=64) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_10/1000_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=1000, fixup_coeff=10, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_10/100_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=100, fixup_coeff=10, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_10/10_layers.py: -------------------------------------------------------------------------------- 1 | from ..base import ConfigWideResNetBase 2 | 3 | 4 | class Config(ConfigWideResNetBase): 5 | def __init__(self): 6 | super().__init__(num_layers=10, fixup_coeff=10, normalization_type=ConfigWideResNetBase.FIXUP) 7 | -------------------------------------------------------------------------------- /cifar_pipeline/configs/fixup/wideresnet/fixup_10/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/fixup_10/__init__.py -------------------------------------------------------------------------------- /cifar_pipeline/configs/simple_cnn.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | from torchvision.transforms import ToTensor 7 | 8 | from pipeline.models.base import Flatten 9 | from .base import ConfigCIFARBase 10 | 11 | MODEL_SAVE_PATH = "models/cifar_simple_cnn" 12 | BATCH_SIZE = 128 13 | 14 | SEED = 85 15 | random.seed(SEED) 16 | np.random.seed(SEED) 17 | torch.random.manual_seed(SEED) 18 | 19 | 20 | def get_model(): 21 | model = nn.Sequential( 22 | nn.Conv2d(3, 16, kernel_size=3, padding=1), 23 | nn.ReLU(), 24 | nn.MaxPool2d(kernel_size=2), 25 | nn.Conv2d(16, 64, kernel_size=3, padding=1), 26 | nn.ReLU(), 27 | nn.MaxPool2d(kernel_size=2), 28 | nn.Conv2d(64, 128, kernel_size=3, padding=1), 29 | nn.ReLU(), 30 | nn.Conv2d(128, 128, kernel_size=3, padding=1), 31 | nn.ReLU(), 32 | nn.AdaptiveAvgPool2d(1), 33 | Flatten(), 34 | nn.Linear(128, 10) 35 | ) 36 | return model 37 | 38 | 39 | class Config(ConfigCIFARBase): 40 | def __init__(self): 41 | model = get_model() 42 | transforms = ToTensor() 43 | super().__init__(model=model, model_save_path=MODEL_SAVE_PATH, 44 | epoch_count=2, batch_size=BATCH_SIZE, transforms=transforms) 45 | -------------------------------------------------------------------------------- /cifar_pipeline/dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | from torchvision.datasets.cifar import CIFAR10 3 | 4 | 5 | class CIFARDataset(data.Dataset): 6 | def __init__(self, path, download=True, train=True): 7 | self._dataset = CIFAR10(path, download=download, train=train) 8 | 9 | def get_image(self, item): 10 | return self._dataset[item][0] 11 | 12 | def get_class(self, item): 13 | return self._dataset[item][1] 14 | 15 | def __len__(self): 16 | return len(self._dataset) 17 | 18 | def __getitem__(self, item): 19 | return self._dataset[item] 20 | 21 | 22 | class CIFARImagesDataset(CIFARDataset): 23 | def __getitem__(self, item): 24 | return self.get_image(item) 25 | 26 | 27 | class CIFARTargetsDataset(CIFARDataset): 28 | def __getitem__(self, item): 29 | return self.get_class(item) 30 | -------------------------------------------------------------------------------- /cifar_pipeline/resnet_cifar.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import torch.nn.init as init 4 | import torch 5 | import math 6 | 7 | 8 | def _weights_init(m): 9 | if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d): 10 | init.kaiming_normal(m.weight) 11 | 12 | 13 | class LambdaLayer(nn.Module): 14 | def __init__(self, lambd): 15 | super(LambdaLayer, self).__init__() 16 | self.lambd = lambd 17 | 18 | def forward(self, x): 19 | return self.lambd(x) 20 | 21 | 22 | class BasicBlock(nn.Module): 23 | expansion = 1 24 | m = 2 25 | 26 | def __init__(self, in_planes, planes, stride=1, use_fixup=False, fixup_l=1, fixup_coeff=1): 27 | super(BasicBlock, self).__init__() 28 | self._use_fixup = use_fixup 29 | self._fixup_l = fixup_l 30 | self._fixup_coeff = fixup_coeff 31 | 32 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 33 | self.bn1 = nn.BatchNorm2d(planes) 34 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 35 | self.bn2 = nn.BatchNorm2d(planes) 36 | 37 | self.shortcut = nn.Sequential() 38 | if stride != 1 or in_planes != planes: 39 | self.shortcut = LambdaLayer(lambda x: 40 | F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0)) 41 | 42 | if use_fixup: 43 | self.scale = nn.Parameter(torch.ones(1)) 44 | self.biases = nn.ParameterList([nn.Parameter(torch.zeros(1)) for _ in range(4)]) 45 | 46 | k = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels 47 | self.conv1.weight.data.normal_(0, fixup_coeff * fixup_l ** (-1 / (2 * self.m - 2)) * math.sqrt(2. / k)) 48 | self.conv2.weight.data.zero_() 49 | 50 | def forward(self, x): 51 | if self._use_fixup: 52 | out = F.relu(self.conv1(x + self.biases[0]) + self.biases[1]) 53 | out = self.scale * self.conv2(out + self.biases[2]) + self.biases[3] 54 | else: 55 | out = F.relu(self.bn1(self.conv1(x))) 56 | out = self.bn2(self.conv2(out)) 57 | out += self.shortcut(x) 58 | out = F.relu(out) 59 | return out 60 | 61 | 62 | class ResNet(nn.Module): 63 | def __init__(self, block, num_blocks, num_classes=10, use_fixup=False, fixup_coeff=1): 64 | super(ResNet, self).__init__() 65 | self.in_planes = 16 66 | 67 | fixup_l = sum(num_blocks) 68 | 69 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 70 | self.bn1 = nn.BatchNorm2d(16) if not use_fixup else nn.Sequential() 71 | self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1, 72 | use_fixup=use_fixup, fixup_l=fixup_l, fixup_coeff=fixup_coeff) 73 | self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2, 74 | use_fixup=use_fixup, fixup_l=fixup_l, fixup_coeff=fixup_coeff) 75 | self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2, 76 | use_fixup=use_fixup, fixup_l=fixup_l, fixup_coeff=fixup_coeff) 77 | self.linear = nn.Linear(64, num_classes) 78 | 79 | self.bias1 = nn.Parameter(torch.zeros(1)) 80 | self.bias2 = nn.Parameter(torch.zeros(1)) 81 | if not use_fixup: 82 | self.apply(_weights_init) 83 | else: 84 | self.linear.weight.data.zero_() 85 | self.linear.bias.data.zero_() 86 | 87 | k = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels 88 | self.conv1.weight.data.normal_(0, math.sqrt(2. / k)) 89 | 90 | def _make_layer(self, block, planes, num_blocks, stride, use_fixup, fixup_l, fixup_coeff): 91 | strides = [stride] + [1]*(num_blocks-1) 92 | layers = [] 93 | for stride in strides: 94 | layers.append(block(self.in_planes, planes, stride, use_fixup, fixup_l, fixup_coeff)) 95 | self.in_planes = planes * block.expansion 96 | 97 | return nn.Sequential(*layers) 98 | 99 | def forward(self, x): 100 | out = F.relu(self.bn1(self.conv1(x)) + self.bias1) 101 | out = self.layer1(out) 102 | out = self.layer2(out) 103 | out = self.layer3(out) 104 | out = F.avg_pool2d(out, out.size()[3]) 105 | out = out.view(out.size(0), -1) 106 | out = self.linear(out + self.bias2) 107 | return out 108 | 109 | 110 | def resnet110(use_fixup=False, fixup_coeff=1): 111 | return ResNet(BasicBlock, [18, 18, 18], use_fixup=use_fixup, fixup_coeff=fixup_coeff) 112 | -------------------------------------------------------------------------------- /imagenet_pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/imagenet_pipeline/__init__.py -------------------------------------------------------------------------------- /imagenet_pipeline/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/imagenet_pipeline/configs/__init__.py -------------------------------------------------------------------------------- /imagenet_pipeline/configs/base.py: -------------------------------------------------------------------------------- 1 | from imagenet_pipeline.dataset import ImageNetImagesDataset, ImageNetTargetsDataset 2 | 3 | from pipeline.config_base import ConfigBase 4 | from pipeline.schedulers.learning_rate.reduce_on_plateau import SchedulerWrapperLossOnPlateau 5 | from pipeline.metrics.accuracy import MetricsCalculatorAccuracy 6 | from pipeline.datasets.base import DatasetWithPostprocessingFunc, DatasetComposer, OneHotTargetsDataset 7 | from pipeline.trainers.classification import TrainerClassification 8 | 9 | from pipeline.datasets.mixup import MixUpDatasetWrapper 10 | from pipeline.losses.vector_cross_entropy import VectorCrossEntropy 11 | import torch.nn as nn 12 | import torch.optim as optim 13 | 14 | from torchvision.transforms import ToTensor, Compose, Normalize 15 | 16 | 17 | TRAIN_DATASET_PATH = "~/train" 18 | TEST_DATASET_PATH = "~/val" 19 | 20 | 21 | def get_dataset(path, transforms, use_mixup): 22 | images_dataset = DatasetWithPostprocessingFunc( 23 | ImageNetImagesDataset(path=path), 24 | transforms) 25 | 26 | targets_dataset = ImageNetTargetsDataset(path=path) 27 | 28 | if use_mixup: 29 | targets_dataset = OneHotTargetsDataset(targets_dataset, 1000) 30 | return DatasetComposer([images_dataset, targets_dataset]) 31 | 32 | 33 | class ConfigImageNetBase(ConfigBase): 34 | def __init__(self, model, model_save_path, num_workers=16, batch_size=128, learning_rate=0.1, transforms=None, use_mixup=False): 35 | parameters_bias = [p[1] for p in model.named_parameters() if 'bias' in p[0]] 36 | parameters_scale = [p[1] for p in model.named_parameters() if 'scale' in p[0]] 37 | parameters_others = [p[1] for p in model.named_parameters() if not ('bias' in p[0] or 'scale' in p[0])] 38 | 39 | optimizer = optim.SGD( 40 | [{'params': parameters_bias, 'lr': learning_rate/10.}, 41 | {'params': parameters_scale, 'lr': learning_rate/10.}, 42 | {'params': parameters_others}], 43 | lr=learning_rate, 44 | momentum=0.9, 45 | weight_decay=5e-4) 46 | scheduler = SchedulerWrapperLossOnPlateau(optimizer) 47 | loss = nn.CrossEntropyLoss() 48 | metrics_calculator = MetricsCalculatorAccuracy() 49 | trainer_cls = TrainerClassification 50 | 51 | if transforms is None: 52 | transforms = Compose([ToTensor(), Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) 53 | 54 | train_dataset = get_dataset(path=TRAIN_DATASET_PATH, transforms=transforms, use_mixup=use_mixup) 55 | val_dataset = get_dataset(path=TEST_DATASET_PATH, transforms=transforms, use_mixup=use_mixup) 56 | 57 | if use_mixup: 58 | train_dataset = MixUpDatasetWrapper(train_dataset, alpha=0.7) 59 | loss = VectorCrossEntropy() 60 | 61 | super().__init__( 62 | model=model, 63 | model_save_path=model_save_path, 64 | optimizer=optimizer, 65 | scheduler=scheduler, 66 | loss=loss, 67 | metrics_calculator=metrics_calculator, 68 | batch_size=batch_size, 69 | num_workers=num_workers, 70 | train_dataset=train_dataset, 71 | val_dataset=val_dataset, 72 | trainer_cls=trainer_cls, 73 | print_frequency=100) 74 | -------------------------------------------------------------------------------- /imagenet_pipeline/configs/resnet101_fixup.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigImageNetBase 2 | 3 | from torch.nn import DataParallel 4 | 5 | 6 | from pipeline.models.image_models.resnet_fixup import resnet101 7 | 8 | MODEL_SAVE_PATH = "models/imagenet_resnet_101_fixup" 9 | 10 | 11 | class Config(ConfigImageNetBase): 12 | def __init__(self, model_save_path=MODEL_SAVE_PATH): 13 | super().__init__(model=DataParallel(resnet101()), model_save_path=model_save_path, use_mixup=True, batch_size=128 * 8, learning_rate=0.1 * 8) 14 | -------------------------------------------------------------------------------- /imagenet_pipeline/configs/resnet101_fixup_128.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigImageNetBase 2 | 3 | from torch.nn import DataParallel 4 | 5 | 6 | from pipeline.models.image_models.resnet_fixup import resnet101 7 | 8 | MODEL_SAVE_PATH = "models/imagenet_resnet_101_fixup_128" 9 | 10 | 11 | class Config(ConfigImageNetBase): 12 | def __init__(self, model_save_path=MODEL_SAVE_PATH): 13 | super().__init__(model=DataParallel(resnet101()), model_save_path=model_save_path, use_mixup=True, batch_size=128, learning_rate=0.1) 14 | -------------------------------------------------------------------------------- /imagenet_pipeline/configs/resnet50.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigImageNetBase 2 | 3 | from torch.nn import DataParallel 4 | 5 | from torchvision.models import resnet50 6 | 7 | MODEL_SAVE_PATH = "models/imagenet_resnet_50" 8 | 9 | 10 | class Config(ConfigImageNetBase): 11 | def __init__(self, model_save_path=MODEL_SAVE_PATH): 12 | super().__init__(model=DataParallel(resnet50()), model_save_path=model_save_path) 13 | -------------------------------------------------------------------------------- /imagenet_pipeline/configs/resnet50_fixup.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigImageNetBase 2 | 3 | from torch.nn import DataParallel 4 | 5 | 6 | from pipeline.models.image_models.resnet_fixup import resnet50 7 | 8 | MODEL_SAVE_PATH = "models/imagenet_resnet_50_fixup" 9 | 10 | 11 | class Config(ConfigImageNetBase): 12 | def __init__(self, model_save_path=MODEL_SAVE_PATH): 13 | super().__init__(model=DataParallel(resnet50()), model_save_path=model_save_path, use_mixup=True, batch_size=128 * 7, learning_rate=0.1 * 7) 14 | -------------------------------------------------------------------------------- /imagenet_pipeline/configs/resnet50_fixup_128.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigImageNetBase 2 | 3 | from torch.nn import DataParallel 4 | 5 | 6 | from pipeline.models.image_models.resnet_fixup import resnet50 7 | 8 | MODEL_SAVE_PATH = "models/imagenet_resnet_50_fixup_128" 9 | 10 | 11 | class Config(ConfigImageNetBase): 12 | def __init__(self, model_save_path=MODEL_SAVE_PATH): 13 | super().__init__(model=DataParallel(resnet50()), model_save_path=model_save_path, use_mixup=True, batch_size=128, learning_rate=0.1) 14 | -------------------------------------------------------------------------------- /imagenet_pipeline/dataset.py: -------------------------------------------------------------------------------- 1 | from pipeline.core import PipelineError 2 | from pipeline.utils import get_path 3 | 4 | from PIL import Image 5 | 6 | import torch.utils.data as data 7 | 8 | import os 9 | import glob 10 | 11 | IMAGE_SIZE = (224, 224) 12 | 13 | 14 | class ImageNetDataset(data.Dataset): 15 | def __init__(self, path): 16 | path = get_path(path) 17 | if not os.path.exists(path): 18 | raise PipelineError("Path {} does not exist".format(path)) 19 | 20 | self._paths = sorted(glob.glob(os.path.join(path, "*/*.JPEG"))) 21 | 22 | classes = set() 23 | for path in self._paths: 24 | class_name = os.path.basename(os.path.dirname(path)) 25 | classes.add(class_name) 26 | 27 | classes = sorted(list(classes)) 28 | self._class_to_id = dict((class_name, i) for i, class_name in enumerate(classes)) 29 | 30 | def get_image(self, item): 31 | path = self._paths[item] 32 | image = Image.open(path).resize(IMAGE_SIZE).convert("RGB") 33 | return image 34 | 35 | def get_class(self, item): 36 | path = self._paths[item] 37 | class_name = os.path.basename(os.path.dirname(path)) 38 | result = self._class_to_id[class_name] 39 | return result 40 | 41 | def __len__(self): 42 | return len(self._paths) 43 | 44 | def __getitem__(self, item): 45 | return self.get_image(item), self.get_class(item) 46 | 47 | 48 | class ImageNetImagesDataset(ImageNetDataset): 49 | def __getitem__(self, item): 50 | return self.get_image(item) 51 | 52 | 53 | class ImageNetTargetsDataset(ImageNetDataset): 54 | def __getitem__(self, item): 55 | return self.get_class(item) 56 | -------------------------------------------------------------------------------- /mnist_pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/mnist_pipeline/__init__.py -------------------------------------------------------------------------------- /mnist_pipeline/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/mnist_pipeline/configs/__init__.py -------------------------------------------------------------------------------- /mnist_pipeline/configs/base.py: -------------------------------------------------------------------------------- 1 | from mnist_pipeline.dataset import MNISTImagesDataset, MNISTTargetsDataset 2 | 3 | from pipeline.config_base import ConfigBase, PredictConfigBase 4 | from pipeline.schedulers.learning_rate.reduce_on_plateau import SchedulerWrapperLossOnPlateau 5 | from pipeline.metrics.accuracy import MetricsCalculatorAccuracy 6 | from pipeline.datasets.base import DatasetWithPostprocessingFunc, DatasetComposer 7 | from pipeline.trainers.classification import TrainerClassification 8 | from pipeline.predictors.classification import PredictorClassification 9 | 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | 13 | from torchvision.transforms import ToTensor 14 | 15 | 16 | TRAIN_DATASET_PATH = "~/.pipeline/mnist/train.csv" 17 | TEST_DATASET_PATH = "~/.pipeline/mnist/test.csv" 18 | 19 | VAL_RATIO = 0.2 20 | 21 | 22 | def get_dataset(mode, transforms): 23 | images_dataset = DatasetWithPostprocessingFunc( 24 | MNISTImagesDataset(path=TRAIN_DATASET_PATH, mode=mode, val_ratio=VAL_RATIO), 25 | transforms) 26 | 27 | targets_dataset = MNISTTargetsDataset( 28 | path=TRAIN_DATASET_PATH, mode=mode, val_ratio=VAL_RATIO) 29 | 30 | return DatasetComposer([images_dataset, targets_dataset]) 31 | 32 | 33 | class ConfigMNISTBase(ConfigBase): 34 | def __init__(self, model, model_save_path, num_workers=4, batch_size=128, transforms=None): 35 | optimizer = optim.Adam(model.parameters()) 36 | scheduler = SchedulerWrapperLossOnPlateau(optimizer) 37 | loss = nn.CrossEntropyLoss() 38 | metrics_calculator = MetricsCalculatorAccuracy() 39 | trainer_cls = TrainerClassification 40 | 41 | if transforms is None: 42 | transforms = ToTensor() 43 | 44 | train_dataset = get_dataset(mode=MNISTImagesDataset.MODE_TRAIN, transforms=transforms) 45 | val_dataset = get_dataset(mode=MNISTImagesDataset.MODE_VAL, transforms=transforms) 46 | 47 | super().__init__( 48 | model=model, 49 | model_save_path=model_save_path, 50 | optimizer=optimizer, 51 | scheduler=scheduler, 52 | loss=loss, 53 | metrics_calculator=metrics_calculator, 54 | batch_size=batch_size, 55 | num_workers=num_workers, 56 | train_dataset=train_dataset, 57 | val_dataset=val_dataset, 58 | trainer_cls=trainer_cls) 59 | 60 | 61 | class PredictConfigMNISTBase(PredictConfigBase): 62 | def __init__(self, model, model_save_path, num_workers=4, batch_size=128): 63 | predictor_cls = PredictorClassification 64 | 65 | images_dataset = DatasetWithPostprocessingFunc( 66 | MNISTImagesDataset(path=TRAIN_DATASET_PATH, mode=MNISTImagesDataset.MODE_VAL, val_ratio=VAL_RATIO), 67 | ToTensor()) 68 | 69 | dataset = DatasetComposer([images_dataset, list(range(len(images_dataset)))]) 70 | 71 | super().__init__( 72 | model=model, 73 | model_save_path=model_save_path, 74 | dataset=dataset, 75 | predictor_cls=predictor_cls, 76 | num_workers=num_workers, 77 | batch_size=batch_size) 78 | -------------------------------------------------------------------------------- /mnist_pipeline/configs/resnet18.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigMNISTBase 2 | 3 | from pipeline.models.image_models.encoders.resnet import Resnet18FeatureExtractor 4 | 5 | import torch.nn as nn 6 | 7 | 8 | class Config(ConfigMNISTBase): 9 | def __init__(self, model_save_path="models/resnet18"): 10 | model = nn.Sequential( 11 | Resnet18FeatureExtractor(input_channels=1), 12 | nn.Linear(Resnet18FeatureExtractor.NUM_FEATURES, 10) 13 | ) 14 | 15 | super().__init__(model=model, model_save_path=model_save_path) 16 | -------------------------------------------------------------------------------- /mnist_pipeline/configs/simple_cnn.py: -------------------------------------------------------------------------------- 1 | from .base import ConfigMNISTBase, PredictConfigMNISTBase 2 | 3 | from pipeline.models.base import Flatten 4 | 5 | import torch.nn as nn 6 | 7 | 8 | MODEL_SAVE_PATH = "models/simple_cnn" 9 | 10 | 11 | def get_model(): 12 | model = nn.Sequential( 13 | nn.Conv2d(1, 16, kernel_size=3, padding=1), 14 | nn.ReLU(), 15 | nn.MaxPool2d(kernel_size=2), 16 | nn.Conv2d(16, 64, kernel_size=3, padding=1), 17 | nn.ReLU(), 18 | nn.MaxPool2d(kernel_size=2), 19 | nn.Conv2d(64, 128, kernel_size=3, padding=1), 20 | nn.ReLU(), 21 | nn.Conv2d(128, 128, kernel_size=3, padding=1), 22 | nn.ReLU(), 23 | nn.AdaptiveAvgPool2d(1), 24 | Flatten(), 25 | nn.Linear(128, 10) 26 | ) 27 | return model 28 | 29 | 30 | class Config(ConfigMNISTBase): 31 | def __init__(self, model_save_path=MODEL_SAVE_PATH): 32 | super().__init__(model=get_model(), model_save_path=model_save_path) 33 | 34 | 35 | class PredictConfig(PredictConfigMNISTBase): 36 | def __init__(self, model_save_path=MODEL_SAVE_PATH): 37 | super().__init__(model=get_model(), model_save_path=model_save_path) 38 | -------------------------------------------------------------------------------- /mnist_pipeline/dataset.py: -------------------------------------------------------------------------------- 1 | from pipeline.core import PipelineError 2 | from pipeline.utils import get_path 3 | 4 | import torch.utils.data as data 5 | from enum import auto 6 | 7 | import os 8 | import pandas as pd 9 | 10 | 11 | class MNISTDataset(data.Dataset): 12 | MODE_TRAIN = auto() 13 | MODE_VAL = auto() 14 | 15 | def __init__(self, path, mode, val_ratio): 16 | path = get_path(path) 17 | if not os.path.exists(path): 18 | raise PipelineError("Path {} does not exist".format(path)) 19 | 20 | dataset = pd.read_csv(path).values 21 | train_length = int(len(dataset) * (1 - val_ratio)) 22 | if mode == self.MODE_TRAIN: 23 | dataset = dataset[:train_length] 24 | else: 25 | dataset = dataset[train_length:] 26 | 27 | self._dataset = dataset 28 | 29 | def __len__(self): 30 | return len(self._dataset) 31 | 32 | def __getitem__(self, item): 33 | row = self._dataset[item] 34 | 35 | image = row[1:].reshape(28, 28, 1).astype("uint8") 36 | target = int(row[0]) 37 | return image, target 38 | 39 | 40 | class MNISTImagesDataset(MNISTDataset): 41 | def __init__(self, path, mode, val_ratio): 42 | super().__init__(path, mode, val_ratio) 43 | 44 | def __getitem__(self, item): 45 | image, _ = super().__getitem__(item) 46 | return image 47 | 48 | 49 | class MNISTTargetsDataset(MNISTDataset): 50 | def __init__(self, path, mode, val_ratio): 51 | super().__init__(path, mode, val_ratio) 52 | 53 | def __getitem__(self, item): 54 | _, target = super().__getitem__(item) 55 | return target 56 | -------------------------------------------------------------------------------- /mnist_pipeline/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/mnist_pipeline/tests/__init__.py -------------------------------------------------------------------------------- /mnist_pipeline/tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | from mnist_pipeline.dataset import MNISTDataset, MNISTImagesDataset, MNISTTargetsDataset 2 | from mnist_pipeline.configs.base import TRAIN_DATASET_PATH 3 | 4 | from pipeline.utils import get_path 5 | 6 | import os 7 | 8 | 9 | class TestMNISTDataset: 10 | def setup(self): 11 | assert os.path.exists(get_path(TRAIN_DATASET_PATH)), "You need to download MNIST dataset first" 12 | 13 | def test_train_dataset(self): 14 | dataset = MNISTDataset(TRAIN_DATASET_PATH, mode=MNISTDataset.MODE_TRAIN, val_ratio=0.2) 15 | assert len(dataset) == 33600 16 | 17 | _, _ = dataset[33599] 18 | image, target = dataset[0] 19 | 20 | assert 0 <= target < 10 21 | 22 | assert image.shape == (28, 28, 1) 23 | 24 | def test_val_dataset(self): 25 | dataset = MNISTDataset(TRAIN_DATASET_PATH, mode=MNISTDataset.MODE_VAL, val_ratio=0.2) 26 | assert len(dataset) == 8400 27 | 28 | _, _ = dataset[8399] 29 | image, target = dataset[0] 30 | 31 | assert 0 <= target < 10 32 | 33 | assert image.shape == (28, 28, 1) 34 | 35 | dataset = MNISTDataset(TRAIN_DATASET_PATH, mode=MNISTDataset.MODE_VAL, val_ratio=0) 36 | assert len(dataset) == 0 37 | 38 | def test_images_dataset(self): 39 | dataset = MNISTImagesDataset(TRAIN_DATASET_PATH, mode=MNISTDataset.MODE_VAL, val_ratio=1) 40 | 41 | image = dataset[10] 42 | assert image.shape == (28, 28, 1) 43 | 44 | assert image.min() >= 0 45 | assert 1 <= image.max() <= 255 46 | 47 | def test_targets_dataset(self): 48 | dataset = MNISTTargetsDataset(TRAIN_DATASET_PATH, mode=MNISTDataset.MODE_TRAIN, val_ratio=0.5234) 49 | 50 | target = dataset[51] 51 | 52 | assert 0 <= target <= 9 53 | 54 | assert type(target) == int 55 | -------------------------------------------------------------------------------- /mnist_pipeline/tests/test_train.py: -------------------------------------------------------------------------------- 1 | from mnist_pipeline.configs.simple_cnn import Config, PredictConfig 2 | 3 | from pipeline.utils import run_train, run_predict 4 | import tempfile 5 | import shutil 6 | import os 7 | import hashlib 8 | 9 | 10 | class TestMNISTTrain: 11 | def test_mnist_train(self): 12 | test_path = tempfile.mkdtemp() 13 | config = Config(model_save_path=test_path) 14 | config.epoch_count = 2 15 | run_train(config) 16 | 17 | assert os.path.exists(os.path.join(test_path, "log.txt")) 18 | assert os.path.exists(os.path.join(test_path, "epoch_0")) 19 | assert os.path.exists(os.path.join(test_path, "epoch_1")) 20 | assert not os.path.exists(os.path.join(test_path, "epoch_2")) 21 | assert os.path.exists(os.path.join(test_path, "state")) 22 | 23 | with open(os.path.join(test_path, "epoch_1"), "rb") as fin: 24 | model_checkpoint_hash = hashlib.md5(fin.read()).hexdigest() 25 | 26 | run_train(config) 27 | 28 | with open(os.path.join(test_path, "epoch_1"), "rb") as fin: 29 | new_model_checkpoint_hash = hashlib.md5(fin.read()).hexdigest() 30 | 31 | assert model_checkpoint_hash == new_model_checkpoint_hash 32 | assert not os.path.exists(os.path.join(test_path, "epoch_2")) 33 | 34 | predict_config = PredictConfig(model_save_path=test_path) 35 | run_predict(predict_config) 36 | 37 | assert os.path.exists(os.path.join(test_path, "predictions", "predictions")) 38 | assert os.path.exists(os.path.join(test_path, "predictions", "identifiers")) 39 | 40 | shutil.rmtree(test_path) 41 | -------------------------------------------------------------------------------- /pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/__init__.py -------------------------------------------------------------------------------- /pipeline/config_base.py: -------------------------------------------------------------------------------- 1 | from .datasets.base import EmptyDataset 2 | from .metrics.base import MetricsCalculatorEmpty 3 | from pipeline.schedulers.base import SchedulerWrapperIdentity 4 | from .storage.state import StateStorageFile 5 | from .storage.predictions import PredictionsStorageFiles 6 | 7 | import torch 8 | import os 9 | 10 | 11 | class ConfigBase: 12 | def __init__( 13 | self, 14 | model, 15 | model_save_path, 16 | train_dataset, 17 | optimizer, 18 | loss, 19 | trainer_cls, 20 | device=None, 21 | val_dataset=None, 22 | scheduler=None, 23 | metrics_calculator=None, 24 | batch_size=1, 25 | num_workers=0, 26 | epoch_count=None, 27 | print_frequency=1, 28 | state_storage=None): 29 | 30 | if val_dataset is None: 31 | val_dataset = EmptyDataset() 32 | 33 | if scheduler is None: 34 | scheduler = SchedulerWrapperIdentity() 35 | 36 | if metrics_calculator is None: 37 | metrics_calculator = MetricsCalculatorEmpty() 38 | 39 | if device is None: 40 | device = "cuda" if torch.cuda.is_available() else "cpu" 41 | 42 | if state_storage is None: 43 | state_storage = StateStorageFile(os.path.join(model_save_path, "state")) 44 | 45 | self.model = model 46 | self.model_save_path = model_save_path 47 | self.train_dataset = train_dataset 48 | self.val_dataset = val_dataset 49 | self.batch_size = batch_size 50 | self.num_workers = num_workers 51 | self.scheduler = scheduler 52 | self.metrics_calculator = metrics_calculator 53 | self.loss = loss 54 | self.optimizer = optimizer 55 | self.epoch_count = epoch_count 56 | self.print_frequency = print_frequency 57 | self.trainer_cls = trainer_cls 58 | self.device = device 59 | self.state_storage = state_storage 60 | 61 | 62 | class PredictConfigBase: 63 | def __init__( 64 | self, 65 | model, 66 | model_save_path, 67 | dataset, 68 | predictor_cls, 69 | device=None, 70 | batch_size=1, 71 | num_workers=0, 72 | print_frequency=1, 73 | predictions_storage=None): 74 | if device is None: 75 | device = "cuda" if torch.cuda.is_available() else "cpu" 76 | 77 | if predictions_storage is None: 78 | predictions_storage = PredictionsStorageFiles(os.path.join(model_save_path, "predictions")) 79 | 80 | self.model = model 81 | self.dataset = dataset 82 | self.model_save_path = model_save_path 83 | self.batch_size = batch_size 84 | self.num_workers = num_workers 85 | self.print_frequency = print_frequency 86 | self.predictor_cls = predictor_cls 87 | self.device = device 88 | self.predictions_storage = predictions_storage 89 | -------------------------------------------------------------------------------- /pipeline/core.py: -------------------------------------------------------------------------------- 1 | class PipelineError(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /pipeline/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/datasets/__init__.py -------------------------------------------------------------------------------- /pipeline/datasets/base.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | import torch 3 | 4 | from typing import Sequence 5 | 6 | 7 | class EmptyDataset(data.Dataset): 8 | def __len__(self): 9 | return 0 10 | 11 | def __getitem__(self, item: int): 12 | assert False, "This code is unreachable" 13 | 14 | 15 | class DatasetComposer(data.Dataset): 16 | def __init__(self, datasets: Sequence): 17 | self._datasets = datasets 18 | self._dataset_length = len(datasets[0]) 19 | for dataset in datasets: 20 | assert self._dataset_length == len(dataset) 21 | 22 | def __len__(self): 23 | return self._dataset_length 24 | 25 | def __getitem__(self, item: int): 26 | return tuple(dataset[item] for dataset in self._datasets) 27 | 28 | 29 | class OneHotTargetsDataset(data.Dataset): 30 | def __init__(self, targets: Sequence, class_count: int): 31 | self._targets = targets 32 | self._class_count = class_count 33 | 34 | def __len__(self): 35 | return len(self._targets) 36 | 37 | def __getitem__(self, item: int): 38 | target = self._targets[item] 39 | result = torch.zeros(self._class_count, dtype=torch.float32) 40 | result[target] = 1 41 | return result 42 | 43 | 44 | class MultiLabelTargetsDataset(data.Dataset): 45 | def __init__(self, targets: Sequence, class_count: int): 46 | self._targets = targets 47 | self._class_count = class_count 48 | 49 | def __len__(self): 50 | return len(self._targets) 51 | 52 | def __getitem__(self, item: int): 53 | target = self._targets[item] 54 | result = torch.zeros(self._class_count, dtype=torch.float32) 55 | 56 | for class_id in target: 57 | result[class_id] = 1 58 | 59 | return result 60 | 61 | 62 | class DatasetWithPostprocessingFunc(data.Dataset): 63 | def __init__(self, dataset, postprocessing_func): 64 | self._dataset = dataset 65 | self._postprocessing_func = postprocessing_func 66 | 67 | def __len__(self): 68 | return len(self._dataset) 69 | 70 | def __getitem__(self, item): 71 | return self._postprocessing_func(self._dataset[item]) 72 | -------------------------------------------------------------------------------- /pipeline/datasets/mixup.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | import random 3 | import numpy as np 4 | 5 | 6 | class MixUpDatasetWrapper(data.Dataset): 7 | def __init__(self, dataset, alpha=1): 8 | super().__init__() 9 | self._dataset = dataset 10 | self._alpha = alpha 11 | 12 | def __len__(self): 13 | return len(self._dataset) 14 | 15 | def __getitem__(self, item): 16 | first = self._dataset[item] 17 | second = random.choice(self._dataset) 18 | 19 | coeff = np.random.beta(self._alpha, self._alpha) 20 | 21 | result = [] 22 | for elem1, elem2 in zip(first, second): 23 | result.append(elem1 * coeff + elem2 * (1 - coeff)) 24 | 25 | return tuple(result) 26 | -------------------------------------------------------------------------------- /pipeline/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | 5 | LOGGER = logging.getLogger() 6 | FORMATTER = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") 7 | 8 | 9 | def setup_logger(out_file=None, stderr=True, stderr_level=logging.INFO, file_level=logging.DEBUG): 10 | LOGGER.handlers = [] 11 | LOGGER.setLevel(min(stderr_level, file_level)) 12 | 13 | if stderr: 14 | handler = logging.StreamHandler(sys.stderr) 15 | handler.setFormatter(FORMATTER) 16 | handler.setLevel(stderr_level) 17 | LOGGER.addHandler(handler) 18 | 19 | if out_file is not None: 20 | handler = logging.FileHandler(out_file) 21 | handler.setFormatter(FORMATTER) 22 | handler.setLevel(file_level) 23 | LOGGER.addHandler(handler) 24 | 25 | LOGGER.info("logger set up") 26 | return LOGGER 27 | -------------------------------------------------------------------------------- /pipeline/losses/vector_cross_entropy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class VectorCrossEntropy(nn.Module): 6 | def __init__(self): 7 | super().__init__() 8 | self._log_softmax = nn.LogSoftmax(dim=1) 9 | 10 | def forward(self, input, target): 11 | input = self._log_softmax(input) 12 | loss = -torch.sum(input * target) 13 | loss = loss / input.shape[0] 14 | return loss 15 | 16 | -------------------------------------------------------------------------------- /pipeline/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/metrics/__init__.py -------------------------------------------------------------------------------- /pipeline/metrics/accuracy.py: -------------------------------------------------------------------------------- 1 | from .base import MetricsCalculatorBase 2 | from ..core import PipelineError 3 | 4 | from sklearn.metrics import accuracy_score 5 | 6 | import numpy as np 7 | 8 | 9 | class MetricsCalculatorAccuracy(MetricsCalculatorBase): 10 | def __init__(self, border=0.5): 11 | super().__init__() 12 | self.zero_cache() 13 | self._border = border 14 | 15 | def zero_cache(self): 16 | self._predictions = [] 17 | self._true_labels = [] 18 | 19 | def add(self, y_predicted, y_true): 20 | self._predictions.append(y_predicted.cpu().data.numpy()) 21 | self._true_labels.append(y_true.cpu().data.numpy()) 22 | 23 | def calculate(self): 24 | if not self._predictions: 25 | raise PipelineError("You need to add predictions for calculating the accuracy first") 26 | 27 | y_pred = np.concatenate(self._predictions) 28 | y_true = np.concatenate(self._true_labels) 29 | 30 | if y_pred.shape[-1] == 1: 31 | # Binary classification 32 | y_pred = (y_pred >= self._border).astype("int") 33 | else: 34 | y_pred = np.argmax(y_pred, -1) 35 | 36 | if len(y_true.shape) != 1: 37 | y_true = np.argmax(y_true, -1) 38 | 39 | result = accuracy_score(y_true, y_pred) 40 | return {"accuracy": result} 41 | -------------------------------------------------------------------------------- /pipeline/metrics/base.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class MetricsCalculatorBase(abc.ABC): 5 | @abc.abstractmethod 6 | def zero_cache(self): 7 | pass 8 | 9 | @abc.abstractmethod 10 | def add(self, y_predicted, y_true): 11 | pass 12 | 13 | @abc.abstractmethod 14 | def calculate(self): 15 | pass 16 | 17 | 18 | class MetricsCalculatorEmpty(MetricsCalculatorBase): 19 | def zero_cache(self): 20 | pass 21 | 22 | def add(self, y_predicted, y_true): 23 | pass 24 | 25 | def calculate(self): 26 | return {} 27 | -------------------------------------------------------------------------------- /pipeline/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/models/__init__.py -------------------------------------------------------------------------------- /pipeline/models/base.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Flatten(nn.Module): 5 | def forward(self, x): 6 | return x.view(x.shape[0], -1) 7 | -------------------------------------------------------------------------------- /pipeline/models/image_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/models/image_models/__init__.py -------------------------------------------------------------------------------- /pipeline/models/image_models/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/models/image_models/encoders/__init__.py -------------------------------------------------------------------------------- /pipeline/models/image_models/encoders/resnet.py: -------------------------------------------------------------------------------- 1 | from torchvision.models import resnet 2 | import torch.nn as nn 3 | 4 | 5 | class ResnetModelFeatureExtractorBase(nn.Module): 6 | def __init__(self, model, input_channels): 7 | super().__init__() 8 | 9 | model.fc = nn.Sequential() 10 | model.avgpool = nn.AdaptiveAvgPool2d(1) 11 | 12 | if input_channels != 3: 13 | model.conv1 = nn.Conv2d( 14 | input_channels, 15 | model.conv1.out_channels, 16 | kernel_size=model.conv1.kernel_size, 17 | stride=model.conv1.stride, 18 | padding=model.conv1.padding, 19 | bias=model.conv1.bias) 20 | 21 | self._model = model 22 | 23 | def forward(self, input): 24 | return self._model(input) 25 | 26 | 27 | class Resnet18FeatureExtractor(ResnetModelFeatureExtractorBase): 28 | NUM_FEATURES = 512 29 | 30 | def __init__(self, pretrained=True, input_channels=3): 31 | model = resnet.resnet18(pretrained=pretrained) 32 | super().__init__( 33 | model=model, 34 | input_channels=input_channels) 35 | 36 | 37 | class Resnet34FeatureExtractor(ResnetModelFeatureExtractorBase): 38 | NUM_FEATURES = 512 39 | 40 | def __init__(self, pretrained=True, input_channels=3): 41 | model = resnet.resnet34(pretrained=pretrained) 42 | super().__init__( 43 | model=model, 44 | input_channels=input_channels) 45 | 46 | 47 | class Resnet50FeatureExtractor(ResnetModelFeatureExtractorBase): 48 | NUM_FEATURES = 2048 49 | 50 | def __init__(self, pretrained=True, input_channels=3): 51 | model = resnet.resnet50(pretrained=pretrained) 52 | super().__init__( 53 | model=model, 54 | input_channels=input_channels) 55 | 56 | 57 | class Resnet101FeatureExtractor(ResnetModelFeatureExtractorBase): 58 | NUM_FEATURES = 2048 59 | 60 | def __init__(self, pretrained=True, input_channels=3): 61 | model = resnet.resnet101(pretrained=pretrained) 62 | super().__init__( 63 | model=model, 64 | input_channels=input_channels) 65 | 66 | 67 | class Resnet152FeatureExtractor(ResnetModelFeatureExtractorBase): 68 | NUM_FEATURES = 2048 69 | 70 | def __init__(self, pretrained=True, input_channels=3): 71 | model = resnet.resnet152(pretrained=pretrained) 72 | super().__init__( 73 | model=model, 74 | input_channels=input_channels) 75 | -------------------------------------------------------------------------------- /pipeline/models/image_models/resnet_fixup.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch 4 | 5 | 6 | class Bottleneck(nn.Module): 7 | expansion = 4 8 | m = 3 9 | 10 | def __init__(self, inplanes, planes, stride=1, downsample=None, fixup_l=1): 11 | super(Bottleneck, self).__init__() 12 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 13 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 14 | padding=1, bias=False) 15 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 16 | self.relu = nn.ReLU(inplace=True) 17 | self.downsample = downsample 18 | self.stride = stride 19 | 20 | self.scale = nn.Parameter(torch.ones(1)) 21 | self.biases = nn.ParameterList([nn.Parameter(torch.zeros(1)) for _ in range(6)]) 22 | 23 | k = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels 24 | self.conv1.weight.data.normal_(0, fixup_l ** (-1 / (2 * self.m - 2)) * math.sqrt(2. / k)) 25 | 26 | k = self.conv2.kernel_size[0] * self.conv2.kernel_size[1] * self.conv2.out_channels 27 | self.conv2.weight.data.normal_(0, fixup_l ** (-1 / (2 * self.m - 2)) * math.sqrt(2. / k)) 28 | self.conv3.weight.data.zero_() 29 | 30 | if downsample is not None: 31 | k = self.downsample.kernel_size[0] * self.downsample.kernel_size[1] * self.downsample.out_channels 32 | self.downsample.weight.data.normal_(0, math.sqrt(2. / k)) 33 | 34 | def forward(self, x): 35 | residual = x 36 | 37 | out = self.conv1(x + self.biases[0]) 38 | out = self.relu(out + self.biases[1]) 39 | 40 | out = self.conv2(out + self.biases[2]) 41 | out = self.relu(out + self.biases[3]) 42 | 43 | out = self.scale * self.conv3(out + self.biases[4]) + self.biases[5] 44 | 45 | if self.downsample is not None: 46 | residual = self.downsample(x) 47 | 48 | out += residual 49 | out = self.relu(out) 50 | 51 | return out 52 | 53 | 54 | class ResNet(nn.Module): 55 | 56 | def __init__(self, block, layers, num_classes=1000, input_channels=3): 57 | self.inplanes = 64 58 | super(ResNet, self).__init__() 59 | self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, 60 | bias=False) 61 | self.relu = nn.ReLU(inplace=True) 62 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 63 | fixup_l = sum(layers) 64 | self.layer1 = self._make_layer(block, 64, layers[0], fixup_l=fixup_l) 65 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, fixup_l=fixup_l) 66 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, fixup_l=fixup_l) 67 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, fixup_l=fixup_l) 68 | self.avgpool = nn.AvgPool2d(7, stride=1) 69 | self.bias1 = nn.Parameter(torch.zeros(1)) 70 | self.bias2 = nn.Parameter(torch.zeros(1)) 71 | self.fc = nn.Linear(512 * block.expansion, num_classes) 72 | 73 | self.fc.weight.data.zero_() 74 | self.fc.bias.data.zero_() 75 | 76 | n = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels 77 | self.conv1.weight.data.normal_(0, math.sqrt(2. / n)) 78 | 79 | def _make_layer(self, block, planes, blocks, fixup_l, stride=1): 80 | downsample = None 81 | if stride != 1 or self.inplanes != planes * block.expansion: 82 | downsample = nn.Conv2d(self.inplanes, planes * block.expansion, 83 | kernel_size=1, stride=stride, bias=True) 84 | 85 | layers = [] 86 | layers.append(block(self.inplanes, planes, stride, downsample, fixup_l=fixup_l)) 87 | self.inplanes = planes * block.expansion 88 | for i in range(1, blocks): 89 | layers.append(block(self.inplanes, planes, fixup_l=fixup_l)) 90 | 91 | return nn.Sequential(*layers) 92 | 93 | def forward(self, x): 94 | x = self.conv1(x) 95 | x = self.relu(x + self.bias1) 96 | x = self.maxpool(x) 97 | 98 | x = self.layer1(x) 99 | x = self.layer2(x) 100 | x = self.layer3(x) 101 | x = self.layer4(x) 102 | 103 | x = self.avgpool(x) 104 | x = x.view(x.size(0), -1) 105 | x = self.fc(x + self.bias2) 106 | 107 | return x 108 | 109 | 110 | def resnet50(**kwargs): 111 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 112 | return model 113 | 114 | 115 | def resnet101(**kwargs): 116 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 117 | return model 118 | 119 | 120 | def resnet152(**kwargs): 121 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 122 | return model 123 | -------------------------------------------------------------------------------- /pipeline/models/image_models/wide_resnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wide ResNet by Sergey Zagoruyko and Nikos Komodakis 3 | Fixup initialization by Hongyi Zhang, Yann N. Dauphin, Tengyu Ma 4 | Based on code by xternalz and Andy Brock: 5 | https://github.com/xternalz/WideResNet-pytorch 6 | https://github.com/ajbrock/BoilerPlate 7 | """ 8 | 9 | import math 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | 14 | 15 | class BasicBlock(nn.Module): 16 | def __init__(self, in_planes, out_planes, stride, dropout=0.0): 17 | super(BasicBlock, self).__init__() 18 | self.bn1 = nn.BatchNorm2d(in_planes) 19 | self.relu1 = nn.ReLU(inplace=True) 20 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 21 | padding=1, bias=False) 22 | self.bn2 = nn.BatchNorm2d(out_planes) 23 | self.relu2 = nn.ReLU(inplace=True) 24 | self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, 25 | padding=1, bias=False) 26 | self.dropout = dropout 27 | self.equalInOut = (in_planes == out_planes) 28 | self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, 29 | padding=0, bias=False) or None 30 | def forward(self, x): 31 | if not self.equalInOut: 32 | x = self.relu1(self.bn1(x)) 33 | else: 34 | out = self.relu1(self.bn1(x)) 35 | out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) 36 | if self.dropout > 0: 37 | out = F.dropout(out, p=self.dropout, training=self.training) 38 | out = self.conv2(out) 39 | return torch.add(x if self.equalInOut else self.convShortcut(x), out) 40 | 41 | 42 | class NetworkBlock(nn.Module): 43 | def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropout): 44 | super(NetworkBlock, self).__init__() 45 | self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropout) 46 | 47 | def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropout): 48 | layers = [] 49 | 50 | for i in range(int(nb_layers)): 51 | _in_planes = i == 0 and in_planes or out_planes 52 | _stride = i == 0 and stride or 1 53 | layers.append(block(_in_planes, out_planes, _stride, dropout=dropout)) 54 | 55 | return nn.Sequential(*layers) 56 | 57 | def forward(self, x): 58 | return self.layer(x) 59 | 60 | 61 | class WideResNet(nn.Module): 62 | def __init__(self, depth, num_classes, widen_factor=1, dropout=0.0): 63 | super(WideResNet, self).__init__() 64 | 65 | nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor] 66 | 67 | assert (depth - 4) % 6 == 0, "You need to change the number of layers" 68 | n = (depth - 4) / 6 69 | 70 | block = BasicBlock 71 | 72 | self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1, padding=1, bias=False) 73 | 74 | self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropout=dropout) 75 | self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropout=dropout) 76 | self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropout=dropout) 77 | 78 | self.bn1 = nn.BatchNorm2d(nChannels[3]) 79 | 80 | self.relu = nn.ReLU(inplace=True) 81 | self.fc = nn.Linear(nChannels[3], num_classes) 82 | self.nChannels = nChannels[3] 83 | 84 | for m in self.modules(): 85 | if isinstance(m, nn.Conv2d): 86 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 87 | m.weight.data.normal_(0, math.sqrt(2. / n)) 88 | elif isinstance(m, nn.BatchNorm2d): 89 | m.weight.data.fill_(1) 90 | m.bias.data.zero_() 91 | elif isinstance(m, nn.Linear): 92 | m.bias.data.zero_() 93 | 94 | def forward(self, x): 95 | out = self.conv1(x) 96 | out = self.block1(out) 97 | out = self.block2(out) 98 | out = self.block3(out) 99 | 100 | out = self.relu(self.bn1(out)) 101 | out = F.adaptive_avg_pool2d(out, 1) 102 | out = out.view(-1, self.nChannels) 103 | return self.fc(out) 104 | -------------------------------------------------------------------------------- /pipeline/models/image_models/wide_resnet_fixup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wide ResNet by Sergey Zagoruyko and Nikos Komodakis 3 | Fixup initialization by Hongyi Zhang, Yann N. Dauphin, Tengyu Ma 4 | Based on code by xternalz and Andy Brock: 5 | https://github.com/xternalz/WideResNet-pytorch 6 | https://github.com/ajbrock/BoilerPlate 7 | """ 8 | 9 | import math 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | 14 | 15 | class BasicBlock(nn.Module): 16 | m = 2 17 | 18 | def __init__(self, in_planes, out_planes, stride, dropout, fixup_l, fixup_coeff): 19 | super(BasicBlock, self).__init__() 20 | 21 | self._dropout = dropout 22 | 23 | self.relu = nn.ReLU(inplace=True) 24 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 25 | self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False) 26 | 27 | self.equalInOut = in_planes == out_planes 28 | self.conv_res = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False) 29 | self.conv_res = not self.equalInOut and self.conv_res or None 30 | 31 | self.scale = nn.Parameter(torch.ones(1)) 32 | self.biases = nn.ParameterList([nn.Parameter(torch.zeros(1)) for _ in range(4)]) 33 | 34 | k = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels 35 | self.conv1.weight.data.normal_(0, fixup_coeff * fixup_l ** (-1 / (2 * self.m - 2)) * math.sqrt(2. / k)) 36 | self.conv2.weight.data.zero_() 37 | 38 | if self.conv_res is not None: 39 | k = self.conv_res.kernel_size[0] * self.conv_res.kernel_size[1] * self.conv_res.out_channels 40 | self.conv_res.weight.data.normal_(0, math.sqrt(2. / k)) 41 | 42 | def forward(self, x): 43 | x_out = self.relu(x + self.biases[0]) 44 | out = self.conv1(x_out) + self.biases[1] 45 | out = self.relu(out) + self.biases[2] 46 | if self._dropout > 0: 47 | out = F.dropout(out, p=self._dropout, training=self.training) 48 | out = self.scale * self.conv2(out) + self.biases[3] 49 | 50 | if self.equalInOut: 51 | return torch.add(x, out) 52 | 53 | return torch.add(self.conv_res(x_out), out) 54 | 55 | 56 | class NetworkBlock(nn.Module): 57 | def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropout, fixup_l, fixup_coeff): 58 | super(NetworkBlock, self).__init__() 59 | self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropout, fixup_l, fixup_coeff) 60 | 61 | def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropout, fixup_l, fixup_coeff): 62 | layers = [] 63 | 64 | for i in range(int(nb_layers)): 65 | _in_planes = i == 0 and in_planes or out_planes 66 | _stride = i == 0 and stride or 1 67 | layers.append(block(_in_planes, out_planes, _stride, dropout=dropout, fixup_l=fixup_l, fixup_coeff=fixup_coeff)) 68 | 69 | return nn.Sequential(*layers) 70 | 71 | def forward(self, x): 72 | return self.layer(x) 73 | 74 | 75 | class WideResNet(nn.Module): 76 | def __init__(self, depth, num_classes, widen_factor=1, dropout=0.0, fixup_coeff=1): 77 | super(WideResNet, self).__init__() 78 | 79 | nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor] 80 | 81 | assert (depth - 4) % 6 == 0, "You need to change the number of layers" 82 | n = (depth - 4) / 6 83 | 84 | block = BasicBlock 85 | fixup_l = n * 3 86 | 87 | self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1, padding=1, bias=False) 88 | 89 | self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropout=dropout, 90 | fixup_l=fixup_l, fixup_coeff=fixup_coeff) 91 | self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropout=dropout, 92 | fixup_l=fixup_l, fixup_coeff=fixup_coeff) 93 | self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropout=dropout, 94 | fixup_l=fixup_l, fixup_coeff=fixup_coeff) 95 | 96 | self.relu = nn.ReLU(inplace=True) 97 | self.fc = nn.Linear(nChannels[3], num_classes) 98 | self.nChannels = nChannels[3] 99 | 100 | self.fc.bias.data.zero_() 101 | self.fc.weight.data.zero_() 102 | 103 | k = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels 104 | self.conv1.weight.data.normal_(0, math.sqrt(2. / k)) 105 | 106 | self.bias1 = nn.Parameter(torch.zeros(1)) 107 | self.bias2 = nn.Parameter(torch.zeros(1)) 108 | 109 | def forward(self, x): 110 | out = self.conv1(x) + self.bias1 111 | out = self.block1(out) 112 | out = self.block2(out) 113 | out = self.block3(out) 114 | 115 | out = self.relu(out) 116 | out = F.adaptive_avg_pool2d(out, 1) 117 | out = out.view(-1, self.nChannels) 118 | return self.fc(out + self.bias2) 119 | -------------------------------------------------------------------------------- /pipeline/predictors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/predictors/__init__.py -------------------------------------------------------------------------------- /pipeline/predictors/base.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Iterable 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from ..logger import LOGGER 8 | from ..storage.predictions import PredictionsStorageBase 9 | from ..utils import move_to_device, load_model 10 | 11 | import os 12 | 13 | 14 | class PredictorBase: 15 | def __init__( 16 | self, 17 | model: nn.Module, 18 | data_loader: Iterable, 19 | print_frequency: None or int, 20 | device: str, 21 | model_save_path: str, 22 | predictions_storage: PredictionsStorageBase) -> None: 23 | 24 | self.model = model.to(device) 25 | self.data_loader = data_loader 26 | self.print_frequency = print_frequency 27 | self.device = device 28 | self.model_save_path = model_save_path 29 | self.predictions_storage = predictions_storage 30 | 31 | def predict_step(self, input_data: torch.Tensor): 32 | input_data = move_to_device(input_data, device=self.device) 33 | model_output = self.model(input_data) 34 | return model_output 35 | 36 | def log_predict_step(self, step_id: int, predict_time: float): 37 | if self.print_frequency is None or step_id % self.print_frequency == 0: 38 | LOGGER.info("[{} s] Predict step {}".format(predict_time, step_id)) 39 | return True 40 | 41 | return False 42 | 43 | def log_predict_completed(self, predict_time: float): 44 | LOGGER.info("[{} s] Predict is completed".format(predict_time)) 45 | return True 46 | 47 | def load_last_model(self): 48 | if os.path.exists(self.model_save_path): 49 | epochs = filter(lambda file: file.startswith("epoch_"), os.listdir(self.model_save_path)) 50 | epochs = map(lambda file: int(file[file.find("_") + 1]), epochs) 51 | epochs = list(epochs) 52 | 53 | if epochs: 54 | last_model_path = os.path.join(self.model_save_path, "epoch_{}".format(max(epochs))) 55 | load_model(self.model, last_model_path) 56 | return 57 | 58 | LOGGER.info("Model not found in {}. Starting to train a model from scratch...".format(self.model_save_path)) 59 | 60 | def run(self): 61 | self.load_last_model() 62 | self.model.eval() 63 | 64 | step_count = 0 65 | start_time = time.time() 66 | 67 | with torch.no_grad(): 68 | for step_id, (input_data, ids) in enumerate(self.data_loader): 69 | model_output = self.predict_step(input_data) 70 | self.predictions_storage.add_batch(ids, model_output) 71 | 72 | step_count += 1 73 | predict_time = time.time() - start_time 74 | self.log_predict_step(step_id, predict_time) 75 | 76 | self.predictions_storage.sort_by_id() 77 | self.predictions_storage.flush() 78 | predict_time = time.time() - start_time 79 | self.log_predict_completed(predict_time) 80 | return predict_time 81 | -------------------------------------------------------------------------------- /pipeline/predictors/classification.py: -------------------------------------------------------------------------------- 1 | from .base import PredictorBase 2 | import torch 3 | 4 | 5 | class PredictorClassification(PredictorBase): 6 | def predict_step(self, input_data: torch.Tensor): 7 | result = super().predict_step(input_data) 8 | result = torch.softmax(result, dim=-1) 9 | return result 10 | -------------------------------------------------------------------------------- /pipeline/preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/preprocessing/__init__.py -------------------------------------------------------------------------------- /pipeline/preprocessing/audio_preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/preprocessing/audio_preprocessing/__init__.py -------------------------------------------------------------------------------- /pipeline/preprocessing/image_preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/preprocessing/image_preprocessing/__init__.py -------------------------------------------------------------------------------- /pipeline/preprocessing/text_preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/preprocessing/text_preprocessing/__init__.py -------------------------------------------------------------------------------- /pipeline/schedulers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/schedulers/__init__.py -------------------------------------------------------------------------------- /pipeline/schedulers/base.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class SchedulerBase(abc.ABC): 5 | @abc.abstractmethod 6 | def step(self, loss, metrics, epoch_id): 7 | pass 8 | 9 | 10 | class SchedulerWrapperBase(SchedulerBase): 11 | def __init__(self, scheduler): 12 | self._scheduler = scheduler 13 | 14 | 15 | class SchedulerWrapperIdentity(SchedulerWrapperBase): 16 | def __init__(self, *args, **kwargs): 17 | super().__init__(None) 18 | 19 | def step(self, loss, metrics, epoch_id): 20 | pass 21 | 22 | 23 | class SchedulerWrapperLossBase(SchedulerWrapperBase): 24 | def __init__(self, scheduler): 25 | super().__init__(scheduler) 26 | 27 | def step(self, loss, metrics, epoch_id): 28 | return self._scheduler.step(loss, epoch_id) 29 | 30 | 31 | class SchedulerWrapperMetricsMeanBase(SchedulerWrapperBase): 32 | def __init__(self, scheduler): 33 | super().__init__(scheduler) 34 | 35 | def step(self, loss, metrics, epoch_id): 36 | values = list(metrics.values()) 37 | mean_metrics = sum(values) / len(values) 38 | return self._scheduler.step(mean_metrics, epoch_id) 39 | -------------------------------------------------------------------------------- /pipeline/schedulers/dropout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/schedulers/dropout/__init__.py -------------------------------------------------------------------------------- /pipeline/schedulers/dropout/increase_step.py: -------------------------------------------------------------------------------- 1 | from ..base import SchedulerBase 2 | 3 | from .utils import set_dropout_probability 4 | 5 | 6 | class SchedulerWrapperIncreaseStep(SchedulerBase): 7 | def __init__(self, model, epoch_count, initial_value=0, max_value=0.5): 8 | self._model = model 9 | self._epoch_count = epoch_count 10 | self._initial_value = initial_value 11 | self._max_value = max_value 12 | 13 | def step(self, loss, metrics, epoch_id): 14 | new_value = (self._max_value - self._initial_value) / self._epoch_count * (epoch_id + 1) 15 | set_dropout_probability(self._model, new_value) 16 | -------------------------------------------------------------------------------- /pipeline/schedulers/dropout/utils.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from torch.nn.modules.dropout import _DropoutNd 3 | 4 | 5 | def set_dropout_probability(module, probability): 6 | if isinstance(module, _DropoutNd): 7 | module.p = probability 8 | return 9 | 10 | for child in module.children(): 11 | set_dropout_probability(child, probability) 12 | -------------------------------------------------------------------------------- /pipeline/schedulers/learning_rate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/schedulers/learning_rate/__init__.py -------------------------------------------------------------------------------- /pipeline/schedulers/learning_rate/cyclical_lr_scheduler.py: -------------------------------------------------------------------------------- 1 | from ..base import SchedulerWrapperLossBase, SchedulerWrapperMetricsMeanBase 2 | 3 | from torch.optim.lr_scheduler import CosineAnnealingLR 4 | 5 | 6 | class SchedulerWrapperLossOnCyclic(SchedulerWrapperLossBase): 7 | def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1): 8 | scheduler = CosineAnnealingLR( 9 | optimizer, 10 | T_max=T_max, 11 | eta_min=eta_min, 12 | last_epoch=last_epoch, 13 | ) 14 | super().__init__(scheduler) 15 | 16 | 17 | class SchedulerWrapperMetricsMeanOnCyclic(SchedulerWrapperMetricsMeanBase): 18 | def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1): 19 | scheduler = CosineAnnealingLR( 20 | optimizer, 21 | T_max=T_max, 22 | eta_min=eta_min, 23 | last_epoch=last_epoch, 24 | ) 25 | super().__init__(scheduler) -------------------------------------------------------------------------------- /pipeline/schedulers/learning_rate/reduce_on_plateau.py: -------------------------------------------------------------------------------- 1 | from ..base import SchedulerWrapperLossBase, SchedulerWrapperMetricsMeanBase 2 | 3 | from torch.optim.lr_scheduler import ReduceLROnPlateau 4 | 5 | 6 | class SchedulerWrapperLossOnPlateau(SchedulerWrapperLossBase): 7 | def __init__(self, optimizer, mode="min", factor=0.5, patience=3, verbose=True, cooldown=3, min_lr=1e-8): 8 | scheduler = ReduceLROnPlateau( 9 | optimizer, 10 | mode=mode, 11 | factor=factor, 12 | patience=patience, 13 | verbose=verbose, 14 | cooldown=cooldown, 15 | min_lr=min_lr 16 | ) 17 | super().__init__(scheduler) 18 | 19 | 20 | class SchedulerWrapperMetricsMeanOnPlateau(SchedulerWrapperMetricsMeanBase): 21 | def __init__(self, optimizer, mode="max", factor=0.5, patience=3, verbose=True, cooldown=3, min_lr=1e-8): 22 | scheduler = ReduceLROnPlateau( 23 | optimizer, 24 | mode=mode, 25 | factor=factor, 26 | patience=patience, 27 | verbose=verbose, 28 | cooldown=cooldown, 29 | min_lr=min_lr 30 | ) 31 | super().__init__(scheduler) 32 | -------------------------------------------------------------------------------- /pipeline/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/storage/__init__.py -------------------------------------------------------------------------------- /pipeline/storage/predictions.py: -------------------------------------------------------------------------------- 1 | from ..core import PipelineError 2 | 3 | import abc 4 | import torch 5 | import os 6 | 7 | 8 | class PredictionsStorageBase(abc.ABC): 9 | @abc.abstractmethod 10 | def add(self, identifier, prediction): 11 | pass 12 | 13 | def add_batch(self, identifiers, predictions): 14 | for identifier, prediction in zip(identifiers, predictions): 15 | self.add(identifier, prediction) 16 | 17 | @abc.abstractmethod 18 | def flush(self): 19 | pass 20 | 21 | @abc.abstractmethod 22 | def get_all(self): 23 | pass 24 | 25 | @abc.abstractmethod 26 | def get_by_id(self, identifier): 27 | pass 28 | 29 | def get_by_id_batch(self, identifiers): 30 | result = [] 31 | for identifier in identifiers: 32 | result.append(self.get_by_id(identifier)) 33 | 34 | return torch.stack(result) 35 | 36 | @abc.abstractmethod 37 | def sort_by_id(self): 38 | pass 39 | 40 | 41 | class PredictionsStorageFiles(PredictionsStorageBase): 42 | def __init__(self, path): 43 | if os.path.exists(path) and not os.path.isdir(path): 44 | raise PipelineError("{} should be a directory".format(path)) 45 | 46 | os.makedirs(path, exist_ok=True) 47 | 48 | self._path = path 49 | 50 | self._identifiers = [] 51 | self._predictions = [] 52 | 53 | self._identifier_to_element_id = {} 54 | 55 | if os.path.exists(os.path.join(self._path, "identifiers")): 56 | self._load_predictions() 57 | 58 | def _load_predictions(self): 59 | self._identifiers = torch.load(os.path.join(self._path, "identifiers")) 60 | self._predictions = torch.load(os.path.join(self._path, "predictions")) 61 | 62 | assert len(self._identifiers) == len(self._predictions) 63 | 64 | for i, identifier in enumerate(self._identifiers): 65 | self._identifier_to_element_id[identifier] = i 66 | 67 | def _save_predictions(self): 68 | assert len(self._identifiers) == len(self._predictions) 69 | 70 | with open(os.path.join(self._path, "identifiers"), "wb") as fout: 71 | torch.save(self._identifiers, fout) 72 | 73 | with open(os.path.join(self._path, "predictions"), "wb") as fout: 74 | torch.save(self._predictions, fout) 75 | 76 | def add(self, identifier, prediction): 77 | self._identifiers.append(identifier) 78 | self._predictions.append(prediction) 79 | self._identifier_to_element_id[identifier] = len(self._identifiers) 80 | 81 | def flush(self): 82 | self._save_predictions() 83 | 84 | def get_all(self): 85 | return self._identifiers, self._predictions 86 | 87 | def get_by_id(self, identifier): 88 | if identifier not in self._identifier_to_element_id: 89 | raise PipelineError("Key error: {}".format(identifier)) 90 | 91 | element_id = self._identifier_to_element_id[identifier] 92 | return self._predictions[element_id] 93 | 94 | def sort_by_id(self): 95 | result = sorted(zip(self._identifiers, self._predictions), key=lambda x: x[0]) 96 | self._identifiers, self._predictions = list(zip(*result)) 97 | self.flush() 98 | -------------------------------------------------------------------------------- /pipeline/storage/state.py: -------------------------------------------------------------------------------- 1 | from ..core import PipelineError 2 | 3 | import abc 4 | import pickle 5 | import os 6 | 7 | 8 | class StateStorageBase(abc.ABC): 9 | @abc.abstractmethod 10 | def has_key(self, key: str): 11 | pass 12 | 13 | @abc.abstractmethod 14 | def get_value(self, key: str): 15 | pass 16 | 17 | @abc.abstractmethod 18 | def remove_key(self, key: str): 19 | pass 20 | 21 | @abc.abstractmethod 22 | def set_value(self, key: str, value: object): 23 | pass 24 | 25 | 26 | class StateStorageEmpty(StateStorageBase): 27 | def set_value(self, key: str, value: object): 28 | pass 29 | 30 | def get_value(self, key: str): 31 | raise PipelineError("Key error: {}".format(key)) 32 | 33 | def has_key(self, key: str): 34 | return False 35 | 36 | def remove_key(self, key: str): 37 | raise PipelineError("Key error: {}".format(key)) 38 | 39 | 40 | class StateStorageFile(StateStorageBase): 41 | def __init__(self, path: str): 42 | self._path = path 43 | 44 | if not os.path.exists(path): 45 | os.makedirs(os.path.dirname(path), exist_ok=True) 46 | with open(path, "wb") as fout: 47 | pickle.dump({}, fout) 48 | 49 | with open(path, "rb") as fin: 50 | self._state = pickle.load(fin) 51 | 52 | def _save(self): 53 | with open(self._path, "wb") as fout: 54 | pickle.dump(self._state, fout) 55 | 56 | def has_key(self, key: str): 57 | return key in self._state 58 | 59 | def get_value(self, key: str): 60 | if key not in self._state: 61 | raise PipelineError("Key error: {}".format(key)) 62 | 63 | return self._state[key] 64 | 65 | def set_value(self, key: str, value: object): 66 | self._state[key] = value 67 | 68 | self._save() 69 | 70 | def remove_key(self, key: str): 71 | if key not in self._state: 72 | raise PipelineError("Key error: {}".format(key)) 73 | 74 | del self._state[key] 75 | 76 | self._save() 77 | 78 | -------------------------------------------------------------------------------- /pipeline/trainers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/trainers/__init__.py -------------------------------------------------------------------------------- /pipeline/trainers/base.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Iterable 3 | 4 | import torch 5 | import torch.nn as nn 6 | from torch.optim import Optimizer 7 | 8 | from ..core import PipelineError 9 | from ..logger import LOGGER 10 | from ..metrics.base import MetricsCalculatorBase 11 | from pipeline.schedulers.base import SchedulerWrapperMetricsMeanBase, SchedulerWrapperBase 12 | from ..storage.state import StateStorageBase 13 | from ..utils import move_to_device, save_model, load_model 14 | 15 | import os 16 | 17 | 18 | class TrainerBase: 19 | def __init__( 20 | self, 21 | model: nn.Module, 22 | train_data_loader: Iterable, 23 | val_data_loader: Iterable, 24 | epoch_count: int, 25 | optimizer: Optimizer, 26 | scheduler: SchedulerWrapperBase, 27 | loss: nn.Module, 28 | metrics_calculator: MetricsCalculatorBase, 29 | print_frequency: None or int, 30 | device: str, 31 | model_save_path: str, 32 | state_storage: StateStorageBase) -> None: 33 | 34 | self.model = model.to(device) 35 | self.train_data_loader = train_data_loader 36 | self.val_data_loader = val_data_loader 37 | self.epoch_count = epoch_count 38 | self.optimizer = optimizer 39 | self.scheduler = scheduler 40 | self.loss = loss 41 | self.metrics_calculator = metrics_calculator 42 | self.print_frequency = print_frequency 43 | self.device = device 44 | self.model_save_path = model_save_path 45 | self.state_storage = state_storage 46 | 47 | def train_step(self, input_data: torch.Tensor, target: torch.Tensor): 48 | input_data = move_to_device(input_data, device=self.device) 49 | target = move_to_device(target, device=self.device) 50 | 51 | model_output = self.model(input_data) 52 | 53 | self.optimizer.zero_grad() 54 | loss = self.loss(model_output, target) 55 | 56 | loss.backward() 57 | 58 | self.optimizer.step(closure=None) 59 | 60 | return loss.cpu().data.numpy() 61 | 62 | def predict_step(self, input_data: torch.Tensor): 63 | input_data = move_to_device(input_data, device=self.device) 64 | model_output = self.model(input_data) 65 | return model_output 66 | 67 | def log_train_step(self, epoch_id: int, step_id: int, epoch_time: float, loss: float, mean_loss: float): 68 | if self.print_frequency is None or step_id % self.print_frequency == 0: 69 | LOGGER.info("[{} s] Epoch {}. Train step {}. Loss {}. Mean loss {}".format( 70 | epoch_time, epoch_id, step_id, loss, mean_loss)) 71 | return True 72 | 73 | return False 74 | 75 | def log_validation_step(self, epoch_id: int, step_id: int, epoch_time: float, loss: float, mean_loss: float): 76 | if self.print_frequency is None or step_id % self.print_frequency == 0: 77 | LOGGER.info("[{} s] Epoch {}. Validation step {}. Loss {}. Mean loss {}".format( 78 | epoch_time, epoch_id, step_id, loss, mean_loss)) 79 | 80 | return True 81 | 82 | return False 83 | 84 | def log_train_epoch(self, epoch_id: int, epoch_time: float, mean_loss: float): 85 | LOGGER.info("Training Epoch {} has completed. Time: {}. Mean loss: {}".format( 86 | epoch_id, epoch_time, mean_loss)) 87 | return True 88 | 89 | def log_validation_epoch(self, epoch_id: int, epoch_time: float, mean_loss: float, metrics: dict): 90 | LOGGER.info("Validation Epoch {} has completed. Time: {}. Mean loss: {}. Metrics: {}".format( 91 | epoch_id, epoch_time, mean_loss, str(metrics))) 92 | return True 93 | 94 | def run_train_epoch(self, epoch_id: int): 95 | self.model.train() 96 | 97 | start_time = time.time() 98 | mean_loss = 0 99 | step_count = 0 100 | 101 | for step_id, (input_data, target) in enumerate(self.train_data_loader): 102 | loss = self.train_step(input_data, target) 103 | epoch_time = time.time() - start_time 104 | 105 | mean_loss += loss 106 | step_count += 1 107 | 108 | self.log_train_step(epoch_id, step_id, epoch_time, loss, mean_loss / step_count) 109 | 110 | epoch_time = time.time() - start_time 111 | mean_loss /= max(step_count, 1) 112 | 113 | self.log_train_epoch(epoch_id, epoch_time, mean_loss) 114 | 115 | return epoch_time, mean_loss 116 | 117 | def run_validation_epoch(self, epoch_id: int): 118 | self.model.eval() 119 | 120 | self.metrics_calculator.zero_cache() 121 | mean_loss = 0 122 | step_count = 0 123 | start_time = time.time() 124 | 125 | with torch.no_grad(): 126 | for step_id, (input_data, target) in enumerate(self.val_data_loader): 127 | target = move_to_device(target, device=self.device) 128 | model_output = self.predict_step(input_data) 129 | 130 | loss = self.loss(model_output, target) 131 | mean_loss += loss 132 | step_count += 1 133 | epoch_time = time.time() - start_time 134 | 135 | self.metrics_calculator.add(model_output, target) 136 | self.log_validation_step(epoch_id, step_id, epoch_time, loss, mean_loss / step_count) 137 | 138 | epoch_time = time.time() - start_time 139 | mean_loss /= max(step_count, 1) 140 | metrics = self.metrics_calculator.calculate() 141 | 142 | self.log_validation_epoch(epoch_id, epoch_time, mean_loss, metrics) 143 | 144 | return epoch_time, mean_loss, metrics 145 | 146 | def load_optimizer_state(self): 147 | if not self.state_storage.has_key("learning_rates"): 148 | return 149 | 150 | learning_rates = self.state_storage.get_value("learning_rates") 151 | 152 | for learning_rate, param_group in zip(learning_rates, self.optimizer.param_groups): 153 | param_group["lr"] = learning_rate 154 | 155 | def save_optimizer_state(self): 156 | learning_rates = [] 157 | for param_group in self.optimizer.param_groups: 158 | learning_rates.append(float(param_group['lr'])) 159 | 160 | self.state_storage.set_value("learning_rates", learning_rates) 161 | 162 | def save_last_model(self, epoch_id): 163 | os.makedirs(self.model_save_path, exist_ok=True) 164 | model_path = os.path.join(self.model_save_path, "epoch_{}".format(epoch_id)) 165 | save_model(self.model, model_path) 166 | LOGGER.info("Model was saved in {}".format(model_path)) 167 | 168 | def load_last_model(self, epoch_id): 169 | last_model_path = os.path.join(self.model_save_path, "epoch_{}".format(epoch_id)) 170 | load_model(self.model, last_model_path) 171 | 172 | def run(self): 173 | start_epoch_id = 0 174 | 175 | if self.state_storage.has_key("start_epoch_id"): 176 | start_epoch_id = self.state_storage.get_value("start_epoch_id") 177 | try: 178 | self.load_last_model(start_epoch_id - 1) 179 | except: 180 | LOGGER.exception("Exception occurs during loading a model. Starting to train a model from scratch...") 181 | else: 182 | LOGGER.info("Model not found in {}. Starting to train a model from scratch...".format(self.model_save_path)) 183 | 184 | self.load_optimizer_state() 185 | 186 | epoch_id = start_epoch_id 187 | while self.epoch_count is None or epoch_id < self.epoch_count: 188 | _, mean_train_loss = self.run_train_epoch(epoch_id) 189 | 190 | if self.val_data_loader is None: 191 | if isinstance(self.scheduler, SchedulerWrapperMetricsMeanBase): 192 | raise PipelineError("You can't use a scheduler based on metrics without validation data") 193 | self.scheduler.step(mean_train_loss, {}, epoch_id) 194 | continue 195 | 196 | _, mean_validation_loss, validation_metrics = self.run_validation_epoch(epoch_id) 197 | self.scheduler.step(mean_validation_loss, validation_metrics, epoch_id) 198 | 199 | self.state_storage.set_value("start_epoch_id", epoch_id + 1) 200 | self.save_optimizer_state() 201 | self.save_last_model(epoch_id) 202 | 203 | epoch_id += 1 204 | -------------------------------------------------------------------------------- /pipeline/trainers/classification.py: -------------------------------------------------------------------------------- 1 | from .base import TrainerBase 2 | 3 | 4 | class TrainerClassification(TrainerBase): 5 | pass 6 | -------------------------------------------------------------------------------- /pipeline/trainers/segmentation.py: -------------------------------------------------------------------------------- 1 | from .base import TrainerBase 2 | 3 | 4 | class TrainerSegmentation(TrainerBase): 5 | pass 6 | -------------------------------------------------------------------------------- /pipeline/utils.py: -------------------------------------------------------------------------------- 1 | from .logger import setup_logger 2 | 3 | from torch.utils.data import DataLoader 4 | from torch.nn import DataParallel 5 | 6 | import importlib 7 | import torch 8 | import os 9 | 10 | 11 | def _load_cls(module_path, cls_name): 12 | module_path_fixed = module_path 13 | if module_path_fixed.endswith(".py"): 14 | module_path_fixed = module_path_fixed[:-3] 15 | module_path_fixed = module_path_fixed.replace("/", ".") 16 | module = importlib.import_module(module_path_fixed) 17 | assert hasattr(module, cls_name), "{} file should contain {} class".format(module_path, cls_name) 18 | 19 | cls = getattr(module, cls_name) 20 | return cls 21 | 22 | 23 | def load_config(config_path: str): 24 | return _load_cls(config_path, "Config")() 25 | 26 | 27 | def load_predict_config(config_path: str): 28 | return _load_cls(config_path, "PredictConfig")() 29 | 30 | 31 | def move_to_device(tensor: list or tuple or torch.Tensor, device: str): 32 | if isinstance(tensor, list): 33 | return [move_to_device(elem, device=device) for elem in tensor] 34 | if isinstance(tensor, tuple): 35 | return (move_to_device(elem, device=device) for elem in tensor) 36 | return tensor.to(device) 37 | 38 | 39 | def get_path(path): 40 | return os.path.expanduser(path) 41 | 42 | 43 | def save_model(model, path): 44 | if isinstance(model, DataParallel): 45 | model = model.module 46 | 47 | with open(path, "wb") as fout: 48 | torch.save(model.state_dict(), fout) 49 | 50 | 51 | def load_model(model, path): 52 | with open(path, "rb") as fin: 53 | state_dict = torch.load(fin) 54 | 55 | model.load_state_dict(state_dict) 56 | 57 | 58 | def run_train(config): 59 | train_data_loader = DataLoader( 60 | config.train_dataset, 61 | batch_size=config.batch_size, 62 | shuffle=True, 63 | pin_memory=True, 64 | num_workers=config.num_workers) 65 | 66 | val_data_loader = DataLoader( 67 | config.val_dataset, 68 | batch_size=config.batch_size, 69 | shuffle=False, 70 | num_workers=config.num_workers) 71 | 72 | model = config.model 73 | 74 | model_save_path = config.model_save_path 75 | os.makedirs(model_save_path, exist_ok=True) 76 | 77 | logger_path = os.path.join(model_save_path, "log.txt") 78 | setup_logger(out_file=logger_path) 79 | 80 | trainer = config.trainer_cls( 81 | model=model, 82 | train_data_loader=train_data_loader, 83 | val_data_loader=val_data_loader, 84 | epoch_count=config.epoch_count, 85 | optimizer=config.optimizer, 86 | scheduler=config.scheduler, 87 | loss=config.loss, 88 | metrics_calculator=config.metrics_calculator, 89 | print_frequency=config.print_frequency, 90 | device=config.device, 91 | model_save_path=config.model_save_path, 92 | state_storage=config.state_storage 93 | ) 94 | 95 | trainer.run() 96 | 97 | 98 | def run_predict(config): 99 | data_loader = DataLoader( 100 | config.dataset, 101 | batch_size=config.batch_size, 102 | shuffle=False, 103 | pin_memory=True, 104 | num_workers=config.num_workers) 105 | 106 | model = config.model 107 | 108 | model_save_path = config.model_save_path 109 | assert os.path.exists(model_save_path), "{} does not exist".format(model_save_path) 110 | 111 | logger_path = os.path.join(model_save_path, "log_predict.txt") 112 | setup_logger(out_file=logger_path) 113 | 114 | predictor = config.predictor_cls( 115 | model=model, 116 | data_loader=data_loader, 117 | print_frequency=config.print_frequency, 118 | device=config.device, 119 | model_save_path=model_save_path, 120 | predictions_storage=config.predictions_storage) 121 | 122 | predictor.run() 123 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.0.0 2 | pandas 3 | numpy 4 | torchvision 5 | scikit-learn 6 | Pillow 7 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/tests/__init__.py -------------------------------------------------------------------------------- /tests/common.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import os 3 | 4 | 5 | def make_temp_path(): 6 | _, path = tempfile.mkstemp() 7 | os.remove(path) 8 | return path 9 | -------------------------------------------------------------------------------- /tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | from pipeline.metrics.accuracy import MetricsCalculatorAccuracy 2 | from pipeline.core import PipelineError 3 | 4 | 5 | import pytest 6 | 7 | 8 | class TestClassificationMetrics: 9 | def test_accuracy(self): 10 | metrics_calculator = MetricsCalculatorAccuracy(border=0.4) 11 | 12 | with pytest.raises(PipelineError): 13 | metrics_calculator.calculate() 14 | 15 | -------------------------------------------------------------------------------- /tests/test_schedulers.py: -------------------------------------------------------------------------------- 1 | from pipeline.schedulers.learning_rate.reduce_on_plateau import SchedulerWrapperLossOnPlateau, SchedulerWrapperMetricsMeanOnPlateau 2 | 3 | from torch.optim import Adam 4 | import torch.nn as nn 5 | 6 | 7 | class TestReduceLROnPlateau: 8 | def test_wrapper_loss(self): 9 | first_layer = nn.Linear(10, 5) 10 | second_layer = nn.Linear(5, 1) 11 | 12 | optimizer = Adam([{"params": first_layer.parameters(), "lr": 1}, 13 | {"params": second_layer.parameters(), "lr": 2}]) 14 | scheduler = SchedulerWrapperLossOnPlateau(optimizer, factor=0.5, patience=1, min_lr=0.1, cooldown=2) 15 | 16 | assert optimizer.param_groups[0]["lr"] == 1 17 | assert optimizer.param_groups[1]["lr"] == 2 18 | 19 | scheduler.step(loss=10, metrics={"a": 5}, epoch_id=0) 20 | assert optimizer.param_groups[0]["lr"] == 1 21 | assert optimizer.param_groups[1]["lr"] == 2 22 | 23 | scheduler.step(loss=11, metrics={"a": 3}, epoch_id=1) 24 | assert optimizer.param_groups[0]["lr"] == 1 25 | assert optimizer.param_groups[1]["lr"] == 2 26 | 27 | scheduler.step(loss=12, metrics={"a": 1}, epoch_id=2) 28 | assert optimizer.param_groups[0]["lr"] == 0.5 29 | assert optimizer.param_groups[1]["lr"] == 1 30 | 31 | scheduler.step(loss=13, metrics={"a": 2}, epoch_id=3) 32 | scheduler.step(loss=14, metrics={"a": 5}, epoch_id=4) 33 | scheduler.step(loss=14, metrics={"a": 2}, epoch_id=5) 34 | 35 | assert optimizer.param_groups[0]["lr"] == 0.5 36 | assert optimizer.param_groups[1]["lr"] == 1 37 | 38 | scheduler.step(loss=14, metrics={"a": 100}, epoch_id=6) 39 | assert optimizer.param_groups[0]["lr"] == 0.25 40 | assert optimizer.param_groups[1]["lr"] == 0.5 41 | 42 | scheduler.step(loss=9, metrics={"a": 21}, epoch_id=7) 43 | scheduler.step(loss=8, metrics={"a": 21}, epoch_id=7) 44 | 45 | assert optimizer.param_groups[0]["lr"] == 0.25 46 | assert optimizer.param_groups[1]["lr"] == 0.5 47 | 48 | scheduler.step(loss=13, metrics={"a": 3}, epoch_id=8) 49 | 50 | assert optimizer.param_groups[0]["lr"] == 0.25 51 | assert optimizer.param_groups[1]["lr"] == 0.5 52 | 53 | scheduler.step(loss=14, metrics=None, epoch_id=9) 54 | 55 | assert optimizer.param_groups[0]["lr"] == 0.125 56 | assert optimizer.param_groups[1]["lr"] == 0.25 57 | 58 | for epoch_id in range(10, 30): 59 | scheduler.step(loss=14, metrics={"absd": "asdasd"}, epoch_id=epoch_id) 60 | 61 | assert optimizer.param_groups[0]["lr"] == 0.1 62 | assert optimizer.param_groups[1]["lr"] == 0.1 63 | 64 | def test_wrapper_metrics(self): 65 | model = nn.Linear(10, 1) 66 | 67 | optimizer = Adam(model.parameters(), lr=1) 68 | scheduler = SchedulerWrapperMetricsMeanOnPlateau(optimizer, factor=0.5, patience=0, min_lr=0.1, cooldown=0) 69 | 70 | assert optimizer.param_groups[0]["lr"] == 1 71 | 72 | scheduler.step(loss=None, metrics={"a": 1, "b": 1}, epoch_id=0) 73 | assert optimizer.param_groups[0]["lr"] == 1 74 | 75 | scheduler.step(loss="abacaba", metrics={"a": 1, "b": 0}, epoch_id=1) 76 | scheduler.step(loss=-10, metrics={"a": 1, "b": 1}, epoch_id=2) 77 | assert optimizer.param_groups[0]["lr"] == 0.25 78 | 79 | scheduler.step(loss=123, metrics={"a": 1, "b": 2}, epoch_id=3) 80 | assert optimizer.param_groups[0]["lr"] == 0.25 81 | 82 | scheduler.step(loss=0, metrics={"a": 2}, epoch_id=4) 83 | assert optimizer.param_groups[0]["lr"] == 0.25 84 | 85 | scheduler.step(loss=0, metrics={"aasda": 1.1}, epoch_id=5) 86 | assert optimizer.param_groups[0]["lr"] == 0.125 87 | 88 | for epoch_id in range(6, 20): 89 | scheduler.step(loss=0, metrics={"c": 1}, epoch_id=epoch_id) 90 | assert optimizer.param_groups[0]["lr"] == 0.1 91 | -------------------------------------------------------------------------------- /tests/test_storage.py: -------------------------------------------------------------------------------- 1 | from .common import make_temp_path 2 | 3 | from pipeline.storage.state import StateStorageEmpty, StateStorageFile 4 | from pipeline.core import PipelineError 5 | 6 | import pytest 7 | 8 | 9 | class TestStateStorageEmpty: 10 | def test_set_value(self): 11 | state_storage = StateStorageEmpty() 12 | state_storage.set_value("key_name", 123) 13 | 14 | def test_get_value(self): 15 | state_storage = StateStorageEmpty() 16 | 17 | with pytest.raises(PipelineError): 18 | state_storage.get_value("some_key") 19 | 20 | state_storage.set_value("some_key", 123) 21 | with pytest.raises(PipelineError): 22 | state_storage.get_value("some_key") 23 | 24 | def test_has_key(self): 25 | state_storage = StateStorageEmpty() 26 | 27 | assert not state_storage.has_key("key") 28 | state_storage.set_value("key", "abacaba") 29 | 30 | assert not state_storage.has_key("key") 31 | 32 | def test_remove_key(self): 33 | state_storage = StateStorageEmpty() 34 | 35 | with pytest.raises(PipelineError): 36 | state_storage.remove_key("abacaba") 37 | 38 | state_storage.set_value("abacaba", 9.23) 39 | with pytest.raises(PipelineError): 40 | state_storage.remove_key("abacaba") 41 | 42 | 43 | class TestStateStorageFile: 44 | def test_basic(self): 45 | path = make_temp_path() 46 | state_storage = StateStorageFile(path) 47 | 48 | assert not state_storage.has_key("key") 49 | 50 | with pytest.raises(PipelineError): 51 | state_storage.remove_key("abacaba") 52 | 53 | with pytest.raises(PipelineError): 54 | state_storage.get_value("some_key") 55 | 56 | def test_save_load(self): 57 | path = make_temp_path() 58 | state_storage = StateStorageFile(path) 59 | 60 | state_storage.set_value("aba", 123) 61 | assert state_storage.get_value("aba") == 123 62 | assert state_storage.has_key("aba") 63 | 64 | state_storage = StateStorageFile(path) 65 | assert state_storage.get_value("aba") == 123 66 | assert state_storage.has_key("aba") 67 | 68 | state_storage.remove_key("aba") 69 | assert not state_storage.has_key("aba") 70 | 71 | state_storage = StateStorageFile(path) 72 | assert not state_storage.has_key("aba") 73 | --------------------------------------------------------------------------------