├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── bin
    ├── predict.py
    └── train.py
├── cifar_pipeline
    ├── __init__.py
    ├── configs
    │   ├── __init__.py
    │   ├── base.py
    │   ├── fixup
    │   │   ├── base.py
    │   │   ├── resnet110_bn.py
    │   │   ├── resnet110_fixup.py
    │   │   ├── resnet110_fixup_0_0_1.py
    │   │   ├── resnet110_fixup_0_1.py
    │   │   ├── resnet110_fixup_mixup.py
    │   │   └── wideresnet
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── batch_norm
    │   │   │       ├── 10000_layers.py
    │   │   │       ├── 1000_layers.py
    │   │   │       ├── 100_layers.py
    │   │   │       ├── 10_layers.py
    │   │   │       └── __init__.py
    │   │   │   ├── fixup
    │   │   │       ├── 10000_layers.py
    │   │   │       ├── 1000_layers.py
    │   │   │       ├── 100_layers.py
    │   │   │       ├── 10_layers.py
    │   │   │       └── __init__.py
    │   │   │   ├── fixup_0
    │   │   │       ├── 10000_layers.py
    │   │   │       ├── 1000_layers.py
    │   │   │       ├── 100_layers.py
    │   │   │       ├── 10_layers.py
    │   │   │       └── __init__.py
    │   │   │   ├── fixup_0_0_1
    │   │   │       ├── 10000_layers.py
    │   │   │       ├── 1000_layers.py
    │   │   │       ├── 100_layers.py
    │   │   │       ├── 10_layers.py
    │   │   │       └── __init__.py
    │   │   │   ├── fixup_0_1
    │   │   │       ├── 10000_layers.py
    │   │   │       ├── 1000_layers.py
    │   │   │       ├── 100_layers.py
    │   │   │       ├── 10_layers.py
    │   │   │       └── __init__.py
    │   │   │   └── fixup_10
    │   │   │       ├── 10000_layers.py
    │   │   │       ├── 1000_layers.py
    │   │   │       ├── 100_layers.py
    │   │   │       ├── 10_layers.py
    │   │   │       └── __init__.py
    │   └── simple_cnn.py
    ├── dataset.py
    └── resnet_cifar.py
├── imagenet_pipeline
    ├── __init__.py
    ├── configs
    │   ├── __init__.py
    │   ├── base.py
    │   ├── resnet101_fixup.py
    │   ├── resnet101_fixup_128.py
    │   ├── resnet50.py
    │   ├── resnet50_fixup.py
    │   └── resnet50_fixup_128.py
    └── dataset.py
├── mnist_pipeline
    ├── __init__.py
    ├── configs
    │   ├── __init__.py
    │   ├── base.py
    │   ├── resnet18.py
    │   └── simple_cnn.py
    ├── dataset.py
    └── tests
    │   ├── __init__.py
    │   ├── test_dataset.py
    │   └── test_train.py
├── pipeline
    ├── __init__.py
    ├── config_base.py
    ├── core.py
    ├── datasets
    │   ├── __init__.py
    │   ├── base.py
    │   └── mixup.py
    ├── logger.py
    ├── losses
    │   └── vector_cross_entropy.py
    ├── metrics
    │   ├── __init__.py
    │   ├── accuracy.py
    │   └── base.py
    ├── models
    │   ├── __init__.py
    │   ├── base.py
    │   └── image_models
    │   │   ├── __init__.py
    │   │   ├── encoders
    │   │       ├── __init__.py
    │   │       └── resnet.py
    │   │   ├── resnet_fixup.py
    │   │   ├── wide_resnet.py
    │   │   └── wide_resnet_fixup.py
    ├── predictors
    │   ├── __init__.py
    │   ├── base.py
    │   └── classification.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── audio_preprocessing
    │   │   └── __init__.py
    │   ├── image_preprocessing
    │   │   └── __init__.py
    │   └── text_preprocessing
    │   │   └── __init__.py
    ├── schedulers
    │   ├── __init__.py
    │   ├── base.py
    │   ├── dropout
    │   │   ├── __init__.py
    │   │   ├── increase_step.py
    │   │   └── utils.py
    │   └── learning_rate
    │   │   ├── __init__.py
    │   │   ├── cyclical_lr_scheduler.py
    │   │   └── reduce_on_plateau.py
    ├── storage
    │   ├── __init__.py
    │   ├── predictions.py
    │   └── state.py
    ├── trainers
    │   ├── __init__.py
    │   ├── base.py
    │   ├── classification.py
    │   └── segmentation.py
    └── utils.py
├── requirements.txt
└── tests
    ├── __init__.py
    ├── common.py
    ├── test_metrics.py
    ├── test_schedulers.py
    └── test_storage.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # PyCharm
107 | .idea/
108 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: xenial
 2 | language: python
 3 | python:
 4 |   - "3.6"
 5 | # command to install dependencies
 6 | install:
 7 |   - pip install -r requirements.txt
 8 |   - wget https://www.dropbox.com/s/pzljfuwzo8hpb18/mnist.zip?dl=0 -O mnist.zip
 9 |   - mkdir ~/.pipeline
10 |   - mkdir ~/.pipeline/mnist
11 |   - unzip mnist.zip -d ~/.pipeline/mnist/
12 |   - free -g
13 | # command to run tests
14 | script:
15 |   - pytest -vsx
16 |  
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Pavel Ostyakov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pipeline
 2 | 
 3 | ## How to run training
 4 | 
 5 | First of all, create a config. You may find some examples of configs in folders mnist_pipeline, cifar_pipeline and imagenet_pipeline.
 6 | Then, call:
 7 | 
 8 | `python3 bin/train.py path_to_config`
 9 | 
10 | 
11 | For example, for reproducing results from Fixup paper just call:
12 | 
13 | `python3 bin/train.py cifar_pipeline/configs/resnet110_fixup.py`
14 | 


--------------------------------------------------------------------------------
/bin/predict.py:
--------------------------------------------------------------------------------
 1 | from pipeline.utils import load_predict_config, run_predict
 2 | 
 3 | import argparse
 4 | 
 5 | 
 6 | def main():
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument("config_path")
 9 |     args = parser.parse_args()
10 | 
11 |     config = load_predict_config(args.config_path)
12 |     run_predict(config)
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     main()
17 | 


--------------------------------------------------------------------------------
/bin/train.py:
--------------------------------------------------------------------------------
 1 | from pipeline.utils import load_config, run_train
 2 | 
 3 | import argparse
 4 | 
 5 | 
 6 | def main():
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument("config_path")
 9 |     args = parser.parse_args()
10 | 
11 |     config = load_config(args.config_path)
12 |     run_train(config)
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     main()
17 | 


--------------------------------------------------------------------------------
/cifar_pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/__init__.py


--------------------------------------------------------------------------------
/cifar_pipeline/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/__init__.py


--------------------------------------------------------------------------------
/cifar_pipeline/configs/base.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.optim as optim
 3 | from torchvision.transforms import ToTensor
 4 | 
 5 | from cifar_pipeline.dataset import CIFARImagesDataset, CIFARTargetsDataset
 6 | from pipeline.config_base import ConfigBase
 7 | from pipeline.datasets.base import DatasetWithPostprocessingFunc, DatasetComposer, OneHotTargetsDataset
 8 | from pipeline.datasets.mixup import MixUpDatasetWrapper
 9 | from pipeline.losses.vector_cross_entropy import VectorCrossEntropy
10 | from pipeline.metrics.accuracy import MetricsCalculatorAccuracy
11 | from pipeline.schedulers.learning_rate.reduce_on_plateau import SchedulerWrapperLossOnPlateau
12 | from pipeline.trainers.classification import TrainerClassification
13 | 
14 | TRAIN_DATASET_PATH = "~/.pipeline/cifar/train"
15 | TEST_DATASET_PATH = "~/.pipeline/cifar/test"
16 | 
17 | 
18 | def get_dataset(path, transforms, train, use_mixup):
19 |     images_dataset = DatasetWithPostprocessingFunc(
20 |         CIFARImagesDataset(path=path, train=train, download=True),
21 |         transforms)
22 | 
23 |     targets_dataset = CIFARTargetsDataset(path=path, train=train)
24 |     if use_mixup:
25 |         targets_dataset = OneHotTargetsDataset(targets_dataset, 10)
26 | 
27 |     return DatasetComposer([images_dataset, targets_dataset])
28 | 
29 | 
30 | class ConfigCIFARBase(ConfigBase):
31 |     def __init__(self, model, model_save_path, num_workers=8, batch_size=128, transforms=None,
32 |                  epoch_count=200, print_frequency=10, mixup_alpha=0):
33 |         optimizer = optim.SGD(
34 |             model.parameters(),
35 |             lr=0.1,
36 |             momentum=0.9,
37 |             weight_decay=5e-4)
38 | 
39 |         scheduler = SchedulerWrapperLossOnPlateau(optimizer)
40 |         loss = nn.CrossEntropyLoss()
41 |         metrics_calculator = MetricsCalculatorAccuracy()
42 |         trainer_cls = TrainerClassification
43 | 
44 |         if transforms is None:
45 |             transforms = ToTensor()
46 | 
47 |         train_dataset = get_dataset(path=TRAIN_DATASET_PATH, transforms=transforms, train=True,
48 |                                     use_mixup=mixup_alpha > 0)
49 |         val_dataset = get_dataset(path=TEST_DATASET_PATH, transforms=transforms, train=False,
50 |                                   use_mixup=mixup_alpha > 0)
51 | 
52 |         if mixup_alpha > 0:
53 |             train_dataset = MixUpDatasetWrapper(train_dataset, alpha=mixup_alpha)
54 |             loss = VectorCrossEntropy()
55 | 
56 |         super().__init__(
57 |             model=model,
58 |             model_save_path=model_save_path,
59 |             optimizer=optimizer,
60 |             scheduler=scheduler,
61 |             loss=loss,
62 |             metrics_calculator=metrics_calculator,
63 |             batch_size=batch_size,
64 |             num_workers=num_workers,
65 |             train_dataset=train_dataset,
66 |             val_dataset=val_dataset,
67 |             trainer_cls=trainer_cls,
68 |             print_frequency=print_frequency,
69 |             epoch_count=epoch_count,
70 |             device="cpu")
71 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/base.py:
--------------------------------------------------------------------------------
 1 | from cifar_pipeline.dataset import CIFARImagesDataset, CIFARTargetsDataset
 2 | 
 3 | from pipeline.config_base import ConfigBase
 4 | from pipeline.schedulers.learning_rate.reduce_on_plateau import SchedulerWrapperLossOnPlateau
 5 | from pipeline.metrics.accuracy import MetricsCalculatorAccuracy
 6 | from pipeline.datasets.base import DatasetWithPostprocessingFunc, DatasetComposer, OneHotTargetsDataset
 7 | from pipeline.trainers.classification import TrainerClassification
 8 | from pipeline.datasets.mixup import MixUpDatasetWrapper
 9 | from pipeline.losses.vector_cross_entropy import VectorCrossEntropy
10 | 
11 | import torch.nn as nn
12 | import torch.optim as optim
13 | 
14 | from torchvision.transforms import ToTensor, Compose, Normalize
15 | 
16 | 
17 | TRAIN_DATASET_PATH = "~/.pipeline/cifar/train"
18 | TEST_DATASET_PATH = "~/.pipeline/cifar/test"
19 | 
20 | 
21 | def get_dataset(path, transforms, train, use_mixup):
22 |     images_dataset = DatasetWithPostprocessingFunc(
23 |         CIFARImagesDataset(path=path, train=train, download=True),
24 |         transforms)
25 | 
26 |     targets_dataset = CIFARTargetsDataset(path=path, train=train)
27 |     if use_mixup:
28 |         targets_dataset = OneHotTargetsDataset(targets_dataset, 10)
29 | 
30 |     return DatasetComposer([images_dataset, targets_dataset])
31 | 
32 | 
33 | class ConfigCIFARBase(ConfigBase):
34 |     def __init__(self, model, model_save_path, num_workers=8, batch_size=128, transforms=None,
35 |                  epoch_count=200, print_frequency=10, use_mixup=False):
36 |         parameters_bias = [p[1] for p in model.named_parameters() if 'bias' in p[0]]
37 |         parameters_scale = [p[1] for p in model.named_parameters() if 'scale' in p[0]]
38 |         parameters_others = [p[1] for p in model.named_parameters() if not ('bias' in p[0] or 'scale' in p[0])]
39 | 
40 |         optimizer = optim.SGD(
41 |                     [{'params': parameters_bias, 'lr': 0.1/10.},
42 |                              {'params': parameters_scale, 'lr': 0.1/10.},
43 |                              {'params': parameters_others}],
44 |                     lr=0.1,
45 |                     momentum=0.9,
46 |                     weight_decay=5e-4)
47 | 
48 |         scheduler = SchedulerWrapperLossOnPlateau(optimizer)
49 |         loss = nn.CrossEntropyLoss()
50 |         metrics_calculator = MetricsCalculatorAccuracy()
51 |         trainer_cls = TrainerClassification
52 | 
53 |         if transforms is None:
54 |             transforms = Compose([ToTensor(), Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
55 | 
56 |         train_dataset = get_dataset(path=TRAIN_DATASET_PATH, transforms=transforms, train=True, use_mixup=use_mixup)
57 |         val_dataset = get_dataset(path=TEST_DATASET_PATH, transforms=transforms, train=False, use_mixup=use_mixup)
58 | 
59 | 
60 |         if use_mixup:
61 |             train_dataset = MixUpDatasetWrapper(train_dataset, alpha=0.7)
62 |             loss = VectorCrossEntropy()
63 | 
64 |         super().__init__(
65 |             model=model,
66 |             model_save_path=model_save_path,
67 |             optimizer=optimizer,
68 |             scheduler=scheduler,
69 |             loss=loss,
70 |             metrics_calculator=metrics_calculator,
71 |             batch_size=batch_size,
72 |             num_workers=num_workers,
73 |             train_dataset=train_dataset,
74 |             val_dataset=val_dataset,
75 |             trainer_cls=trainer_cls,
76 |             print_frequency=print_frequency,
77 |             epoch_count=epoch_count)
78 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/resnet110_bn.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigCIFARBase
 2 | 
 3 | from cifar_pipeline.resnet_cifar import resnet110
 4 | 
 5 | from torch.nn import DataParallel
 6 | 
 7 | MODEL_SAVE_PATH = "models/cifar_resnet110_bn"
 8 | 
 9 | 
10 | class Config(ConfigCIFARBase):
11 |     def __init__(self):
12 |         model = resnet110(use_fixup=False)
13 | 
14 |         super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH,
15 |                          epoch_count=100, batch_size=128)
16 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/resnet110_fixup.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigCIFARBase
 2 | 
 3 | from cifar_pipeline.resnet_cifar import resnet110
 4 | 
 5 | from torch.nn import DataParallel
 6 | 
 7 | MODEL_SAVE_PATH = "models/cifar_resnet110_fixup"
 8 | 
 9 | 
10 | class Config(ConfigCIFARBase):
11 |     def __init__(self):
12 |         model = resnet110(use_fixup=True)
13 | 
14 |         super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH,
15 |                          epoch_count=100, batch_size=128)
16 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/resnet110_fixup_0_0_1.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigCIFARBase
 2 | 
 3 | from cifar_pipeline.resnet_cifar import resnet110
 4 | 
 5 | from torch.nn import DataParallel
 6 | 
 7 | MODEL_SAVE_PATH = "models/cifar_resnet110_fixup_0_0_1"
 8 | 
 9 | 
10 | class Config(ConfigCIFARBase):
11 |     def __init__(self):
12 |         model = resnet110(use_fixup=True, fixup_coeff=0.01)
13 | 
14 |         super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH,
15 |                          epoch_count=100, batch_size=128)
16 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/resnet110_fixup_0_1.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigCIFARBase
 2 | 
 3 | from cifar_pipeline.resnet_cifar import resnet110
 4 | 
 5 | from torch.nn import DataParallel
 6 | 
 7 | MODEL_SAVE_PATH = "models/cifar_resnet110_fixup_0_1"
 8 | 
 9 | 
10 | class Config(ConfigCIFARBase):
11 |     def __init__(self):
12 |         model = resnet110(use_fixup=True, fixup_coeff=0.1)
13 | 
14 |         super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH,
15 |                          epoch_count=100, batch_size=128)
16 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/resnet110_fixup_mixup.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigCIFARBase
 2 | 
 3 | from cifar_pipeline.resnet_cifar import resnet110
 4 | 
 5 | from torch.nn import DataParallel
 6 | 
 7 | MODEL_SAVE_PATH = "models/cifar_resnet110_fixup_mixup"
 8 | 
 9 | 
10 | class Config(ConfigCIFARBase):
11 |     def __init__(self):
12 |         model = resnet110(use_fixup=True)
13 | 
14 |         super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH,
15 |                          epoch_count=100, batch_size=128, use_mixup=True)
16 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/__init__.py


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/base.py:
--------------------------------------------------------------------------------
 1 | from ..base import ConfigCIFARBase
 2 | 
 3 | from pipeline.models.image_models.wide_resnet_fixup import WideResNet as WideResNetFixup
 4 | from pipeline.models.image_models.wide_resnet import WideResNet as WideResNetBatchNorm
 5 | 
 6 | from enum import auto
 7 | from torch.nn import DataParallel
 8 | 
 9 | MODEL_SAVE_PATH = "models/cifar_wideresnet_{}_{}_layers"
10 | 
11 | 
12 | class ConfigWideResNetBase(ConfigCIFARBase):
13 |     BATCH_NORM = auto()
14 |     FIXUP = auto()
15 | 
16 |     def __init__(self, num_layers, fixup_coeff=1, normalization_type=BATCH_NORM, batch_size=128):
17 |         if normalization_type == self.BATCH_NORM:
18 |             model = WideResNetBatchNorm(depth=num_layers, num_classes=10)
19 |             norm_type = "batchnorm"
20 |         else:
21 |             model = WideResNetFixup(depth=num_layers, num_classes=10, fixup_coeff=fixup_coeff)
22 |             norm_type = "fixup_coeff_{}".format(fixup_coeff)
23 | 
24 |         super().__init__(model=DataParallel(model), model_save_path=MODEL_SAVE_PATH.format(norm_type, num_layers),
25 |                          epoch_count=1, batch_size=batch_size)
26 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/batch_norm/10000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10000, normalization_type=ConfigWideResNetBase.BATCH_NORM, batch_size=64)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/batch_norm/1000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=1000, normalization_type=ConfigWideResNetBase.BATCH_NORM)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/batch_norm/100_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=100, normalization_type=ConfigWideResNetBase.BATCH_NORM)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/batch_norm/10_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10, normalization_type=ConfigWideResNetBase.BATCH_NORM)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/batch_norm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/batch_norm/__init__.py


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup/10000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10000, normalization_type=ConfigWideResNetBase.FIXUP, batch_size=64)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup/1000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=1000, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup/100_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=100, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup/10_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/fixup/__init__.py


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0/10000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10000, fixup_coeff=0, normalization_type=ConfigWideResNetBase.FIXUP, batch_size=64)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0/1000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=1000, fixup_coeff=0, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0/100_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=100, fixup_coeff=0, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0/10_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10, fixup_coeff=0, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/fixup_0/__init__.py


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/10000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10000, fixup_coeff=0.01, normalization_type=ConfigWideResNetBase.FIXUP, batch_size=64)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/1000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=1000, fixup_coeff=0.01, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/100_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=100, fixup_coeff=0.01, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/10_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10, fixup_coeff=0.01, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/fixup_0_0_1/__init__.py


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/10000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10000, fixup_coeff=0.1, normalization_type=ConfigWideResNetBase.FIXUP, batch_size=64)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/1000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=1000, fixup_coeff=0.1, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/100_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=100, fixup_coeff=0.1, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/10_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10, fixup_coeff=0.1, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/fixup_0_1/__init__.py


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_10/10000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10000, fixup_coeff=10, normalization_type=ConfigWideResNetBase.FIXUP, batch_size=64)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_10/1000_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=1000, fixup_coeff=10, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_10/100_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=100, fixup_coeff=10, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_10/10_layers.py:
--------------------------------------------------------------------------------
1 | from ..base import ConfigWideResNetBase
2 | 
3 | 
4 | class Config(ConfigWideResNetBase):
5 |     def __init__(self):
6 |         super().__init__(num_layers=10, fixup_coeff=10, normalization_type=ConfigWideResNetBase.FIXUP)
7 | 


--------------------------------------------------------------------------------
/cifar_pipeline/configs/fixup/wideresnet/fixup_10/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/cifar_pipeline/configs/fixup/wideresnet/fixup_10/__init__.py


--------------------------------------------------------------------------------
/cifar_pipeline/configs/simple_cnn.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | import torch.nn as nn
 6 | from torchvision.transforms import ToTensor
 7 | 
 8 | from pipeline.models.base import Flatten
 9 | from .base import ConfigCIFARBase
10 | 
11 | MODEL_SAVE_PATH = "models/cifar_simple_cnn"
12 | BATCH_SIZE = 128
13 | 
14 | SEED = 85
15 | random.seed(SEED)
16 | np.random.seed(SEED)
17 | torch.random.manual_seed(SEED)
18 | 
19 | 
20 | def get_model():
21 |     model = nn.Sequential(
22 |         nn.Conv2d(3, 16, kernel_size=3, padding=1),
23 |         nn.ReLU(),
24 |         nn.MaxPool2d(kernel_size=2),
25 |         nn.Conv2d(16, 64, kernel_size=3, padding=1),
26 |         nn.ReLU(),
27 |         nn.MaxPool2d(kernel_size=2),
28 |         nn.Conv2d(64, 128, kernel_size=3, padding=1),
29 |         nn.ReLU(),
30 |         nn.Conv2d(128, 128, kernel_size=3, padding=1),
31 |         nn.ReLU(),
32 |         nn.AdaptiveAvgPool2d(1),
33 |         Flatten(),
34 |         nn.Linear(128, 10)
35 |     )
36 |     return model
37 | 
38 | 
39 | class Config(ConfigCIFARBase):
40 |     def __init__(self):
41 |         model = get_model()
42 |         transforms = ToTensor()
43 |         super().__init__(model=model, model_save_path=MODEL_SAVE_PATH,
44 |                          epoch_count=2, batch_size=BATCH_SIZE, transforms=transforms)
45 | 


--------------------------------------------------------------------------------
/cifar_pipeline/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | from torchvision.datasets.cifar import CIFAR10
 3 | 
 4 | 
 5 | class CIFARDataset(data.Dataset):
 6 |     def __init__(self, path, download=True, train=True):
 7 |         self._dataset = CIFAR10(path, download=download, train=train)
 8 | 
 9 |     def get_image(self, item):
10 |         return self._dataset[item][0]
11 | 
12 |     def get_class(self, item):
13 |         return self._dataset[item][1]
14 | 
15 |     def __len__(self):
16 |         return len(self._dataset)
17 | 
18 |     def __getitem__(self, item):
19 |         return self._dataset[item]
20 | 
21 | 
22 | class CIFARImagesDataset(CIFARDataset):
23 |     def __getitem__(self, item):
24 |         return self.get_image(item)
25 | 
26 | 
27 | class CIFARTargetsDataset(CIFARDataset):
28 |     def __getitem__(self, item):
29 |         return self.get_class(item)
30 | 


--------------------------------------------------------------------------------
/cifar_pipeline/resnet_cifar.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | import torch.nn.init as init
  4 | import torch
  5 | import math
  6 | 
  7 | 
  8 | def _weights_init(m):
  9 |     if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
 10 |         init.kaiming_normal(m.weight)
 11 | 
 12 | 
 13 | class LambdaLayer(nn.Module):
 14 |     def __init__(self, lambd):
 15 |         super(LambdaLayer, self).__init__()
 16 |         self.lambd = lambd
 17 | 
 18 |     def forward(self, x):
 19 |         return self.lambd(x)
 20 | 
 21 | 
 22 | class BasicBlock(nn.Module):
 23 |     expansion = 1
 24 |     m = 2
 25 | 
 26 |     def __init__(self, in_planes, planes, stride=1, use_fixup=False, fixup_l=1, fixup_coeff=1):
 27 |         super(BasicBlock, self).__init__()
 28 |         self._use_fixup = use_fixup
 29 |         self._fixup_l = fixup_l
 30 |         self._fixup_coeff = fixup_coeff
 31 | 
 32 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 33 |         self.bn1 = nn.BatchNorm2d(planes)
 34 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 35 |         self.bn2 = nn.BatchNorm2d(planes)
 36 | 
 37 |         self.shortcut = nn.Sequential()
 38 |         if stride != 1 or in_planes != planes:
 39 |             self.shortcut = LambdaLayer(lambda x:
 40 |                                         F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
 41 | 
 42 |         if use_fixup:
 43 |             self.scale = nn.Parameter(torch.ones(1))
 44 |             self.biases = nn.ParameterList([nn.Parameter(torch.zeros(1)) for _ in range(4)])
 45 | 
 46 |             k = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels
 47 |             self.conv1.weight.data.normal_(0, fixup_coeff * fixup_l ** (-1 / (2 * self.m - 2)) * math.sqrt(2. / k))
 48 |             self.conv2.weight.data.zero_()
 49 | 
 50 |     def forward(self, x):
 51 |         if self._use_fixup:
 52 |             out = F.relu(self.conv1(x + self.biases[0]) + self.biases[1])
 53 |             out = self.scale * self.conv2(out + self.biases[2]) + self.biases[3]
 54 |         else:
 55 |             out = F.relu(self.bn1(self.conv1(x)))
 56 |             out = self.bn2(self.conv2(out))
 57 |         out += self.shortcut(x)
 58 |         out = F.relu(out)
 59 |         return out
 60 | 
 61 | 
 62 | class ResNet(nn.Module):
 63 |     def __init__(self, block, num_blocks, num_classes=10, use_fixup=False, fixup_coeff=1):
 64 |         super(ResNet, self).__init__()
 65 |         self.in_planes = 16
 66 | 
 67 |         fixup_l = sum(num_blocks)
 68 | 
 69 |         self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
 70 |         self.bn1 = nn.BatchNorm2d(16) if not use_fixup else nn.Sequential()
 71 |         self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1,
 72 |                                        use_fixup=use_fixup, fixup_l=fixup_l, fixup_coeff=fixup_coeff)
 73 |         self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2,
 74 |                                        use_fixup=use_fixup, fixup_l=fixup_l, fixup_coeff=fixup_coeff)
 75 |         self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2,
 76 |                                        use_fixup=use_fixup, fixup_l=fixup_l, fixup_coeff=fixup_coeff)
 77 |         self.linear = nn.Linear(64, num_classes)
 78 | 
 79 |         self.bias1 = nn.Parameter(torch.zeros(1))
 80 |         self.bias2 = nn.Parameter(torch.zeros(1))
 81 |         if not use_fixup:
 82 |             self.apply(_weights_init)
 83 |         else:
 84 |             self.linear.weight.data.zero_()
 85 |             self.linear.bias.data.zero_()
 86 | 
 87 |             k = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels
 88 |             self.conv1.weight.data.normal_(0, math.sqrt(2. / k))
 89 | 
 90 |     def _make_layer(self, block, planes, num_blocks, stride, use_fixup, fixup_l, fixup_coeff):
 91 |         strides = [stride] + [1]*(num_blocks-1)
 92 |         layers = []
 93 |         for stride in strides:
 94 |             layers.append(block(self.in_planes, planes, stride, use_fixup, fixup_l, fixup_coeff))
 95 |             self.in_planes = planes * block.expansion
 96 | 
 97 |         return nn.Sequential(*layers)
 98 | 
 99 |     def forward(self, x):
100 |         out = F.relu(self.bn1(self.conv1(x)) + self.bias1)
101 |         out = self.layer1(out)
102 |         out = self.layer2(out)
103 |         out = self.layer3(out)
104 |         out = F.avg_pool2d(out, out.size()[3])
105 |         out = out.view(out.size(0), -1)
106 |         out = self.linear(out + self.bias2)
107 |         return out
108 | 
109 | 
110 | def resnet110(use_fixup=False, fixup_coeff=1):
111 |     return ResNet(BasicBlock, [18, 18, 18], use_fixup=use_fixup, fixup_coeff=fixup_coeff)
112 | 


--------------------------------------------------------------------------------
/imagenet_pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/imagenet_pipeline/__init__.py


--------------------------------------------------------------------------------
/imagenet_pipeline/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/imagenet_pipeline/configs/__init__.py


--------------------------------------------------------------------------------
/imagenet_pipeline/configs/base.py:
--------------------------------------------------------------------------------
 1 | from imagenet_pipeline.dataset import ImageNetImagesDataset, ImageNetTargetsDataset
 2 | 
 3 | from pipeline.config_base import ConfigBase
 4 | from pipeline.schedulers.learning_rate.reduce_on_plateau import SchedulerWrapperLossOnPlateau
 5 | from pipeline.metrics.accuracy import MetricsCalculatorAccuracy
 6 | from pipeline.datasets.base import DatasetWithPostprocessingFunc, DatasetComposer, OneHotTargetsDataset
 7 | from pipeline.trainers.classification import TrainerClassification
 8 | 
 9 | from pipeline.datasets.mixup import MixUpDatasetWrapper
10 | from pipeline.losses.vector_cross_entropy import VectorCrossEntropy
11 | import torch.nn as nn
12 | import torch.optim as optim
13 | 
14 | from torchvision.transforms import ToTensor, Compose, Normalize
15 | 
16 | 
17 | TRAIN_DATASET_PATH = "~/train"
18 | TEST_DATASET_PATH = "~/val"
19 | 
20 | 
21 | def get_dataset(path, transforms, use_mixup):
22 |     images_dataset = DatasetWithPostprocessingFunc(
23 |         ImageNetImagesDataset(path=path),
24 |         transforms)
25 | 
26 |     targets_dataset = ImageNetTargetsDataset(path=path)
27 | 
28 |     if use_mixup:
29 |         targets_dataset = OneHotTargetsDataset(targets_dataset, 1000)
30 |     return DatasetComposer([images_dataset, targets_dataset])
31 | 
32 | 
33 | class ConfigImageNetBase(ConfigBase):
34 |     def __init__(self, model, model_save_path, num_workers=16, batch_size=128, learning_rate=0.1, transforms=None, use_mixup=False):
35 |         parameters_bias = [p[1] for p in model.named_parameters() if 'bias' in p[0]]
36 |         parameters_scale = [p[1] for p in model.named_parameters() if 'scale' in p[0]]
37 |         parameters_others = [p[1] for p in model.named_parameters() if not ('bias' in p[0] or 'scale' in p[0])]
38 | 
39 |         optimizer = optim.SGD(
40 |                     [{'params': parameters_bias, 'lr': learning_rate/10.},
41 |                              {'params': parameters_scale, 'lr': learning_rate/10.},
42 |                              {'params': parameters_others}],
43 |                     lr=learning_rate,
44 |                     momentum=0.9,
45 |                     weight_decay=5e-4)
46 |         scheduler = SchedulerWrapperLossOnPlateau(optimizer)
47 |         loss = nn.CrossEntropyLoss()
48 |         metrics_calculator = MetricsCalculatorAccuracy()
49 |         trainer_cls = TrainerClassification
50 | 
51 |         if transforms is None:
52 |             transforms = Compose([ToTensor(), Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
53 | 
54 |         train_dataset = get_dataset(path=TRAIN_DATASET_PATH, transforms=transforms, use_mixup=use_mixup)
55 |         val_dataset = get_dataset(path=TEST_DATASET_PATH, transforms=transforms, use_mixup=use_mixup)
56 |         
57 |         if use_mixup:
58 |             train_dataset = MixUpDatasetWrapper(train_dataset, alpha=0.7)
59 |             loss = VectorCrossEntropy()
60 | 
61 |         super().__init__(
62 |             model=model,
63 |             model_save_path=model_save_path,
64 |             optimizer=optimizer,
65 |             scheduler=scheduler,
66 |             loss=loss,
67 |             metrics_calculator=metrics_calculator,
68 |             batch_size=batch_size,
69 |             num_workers=num_workers,
70 |             train_dataset=train_dataset,
71 |             val_dataset=val_dataset,
72 |             trainer_cls=trainer_cls,
73 |             print_frequency=100)
74 | 


--------------------------------------------------------------------------------
/imagenet_pipeline/configs/resnet101_fixup.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigImageNetBase
 2 | 
 3 | from torch.nn import DataParallel
 4 | 
 5 | 
 6 | from pipeline.models.image_models.resnet_fixup import resnet101
 7 | 
 8 | MODEL_SAVE_PATH = "models/imagenet_resnet_101_fixup"
 9 | 
10 | 
11 | class Config(ConfigImageNetBase):
12 |     def __init__(self, model_save_path=MODEL_SAVE_PATH):
13 |         super().__init__(model=DataParallel(resnet101()), model_save_path=model_save_path, use_mixup=True, batch_size=128 * 8, learning_rate=0.1 * 8)
14 | 


--------------------------------------------------------------------------------
/imagenet_pipeline/configs/resnet101_fixup_128.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigImageNetBase
 2 | 
 3 | from torch.nn import DataParallel
 4 | 
 5 | 
 6 | from pipeline.models.image_models.resnet_fixup import resnet101
 7 | 
 8 | MODEL_SAVE_PATH = "models/imagenet_resnet_101_fixup_128"
 9 | 
10 | 
11 | class Config(ConfigImageNetBase):
12 |     def __init__(self, model_save_path=MODEL_SAVE_PATH):
13 |         super().__init__(model=DataParallel(resnet101()), model_save_path=model_save_path, use_mixup=True, batch_size=128, learning_rate=0.1)
14 | 


--------------------------------------------------------------------------------
/imagenet_pipeline/configs/resnet50.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigImageNetBase
 2 | 
 3 | from torch.nn import DataParallel
 4 | 
 5 | from torchvision.models import resnet50
 6 | 
 7 | MODEL_SAVE_PATH = "models/imagenet_resnet_50"
 8 | 
 9 | 
10 | class Config(ConfigImageNetBase):
11 |     def __init__(self, model_save_path=MODEL_SAVE_PATH):
12 |         super().__init__(model=DataParallel(resnet50()), model_save_path=model_save_path)
13 | 


--------------------------------------------------------------------------------
/imagenet_pipeline/configs/resnet50_fixup.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigImageNetBase
 2 | 
 3 | from torch.nn import DataParallel
 4 | 
 5 | 
 6 | from pipeline.models.image_models.resnet_fixup import resnet50
 7 | 
 8 | MODEL_SAVE_PATH = "models/imagenet_resnet_50_fixup"
 9 | 
10 | 
11 | class Config(ConfigImageNetBase):
12 |     def __init__(self, model_save_path=MODEL_SAVE_PATH):
13 |         super().__init__(model=DataParallel(resnet50()), model_save_path=model_save_path, use_mixup=True, batch_size=128 * 7, learning_rate=0.1 * 7)
14 | 


--------------------------------------------------------------------------------
/imagenet_pipeline/configs/resnet50_fixup_128.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigImageNetBase
 2 | 
 3 | from torch.nn import DataParallel
 4 | 
 5 | 
 6 | from pipeline.models.image_models.resnet_fixup import resnet50
 7 | 
 8 | MODEL_SAVE_PATH = "models/imagenet_resnet_50_fixup_128"
 9 | 
10 | 
11 | class Config(ConfigImageNetBase):
12 |     def __init__(self, model_save_path=MODEL_SAVE_PATH):
13 |         super().__init__(model=DataParallel(resnet50()), model_save_path=model_save_path, use_mixup=True, batch_size=128, learning_rate=0.1)
14 | 


--------------------------------------------------------------------------------
/imagenet_pipeline/dataset.py:
--------------------------------------------------------------------------------
 1 | from pipeline.core import PipelineError
 2 | from pipeline.utils import get_path
 3 | 
 4 | from PIL import Image
 5 | 
 6 | import torch.utils.data as data
 7 | 
 8 | import os
 9 | import glob
10 | 
11 | IMAGE_SIZE = (224, 224)
12 | 
13 | 
14 | class ImageNetDataset(data.Dataset):
15 |     def __init__(self, path):
16 |         path = get_path(path)
17 |         if not os.path.exists(path):
18 |             raise PipelineError("Path {} does not exist".format(path))
19 | 
20 |         self._paths = sorted(glob.glob(os.path.join(path, "*/*.JPEG")))
21 | 
22 |         classes = set()
23 |         for path in self._paths:
24 |             class_name = os.path.basename(os.path.dirname(path))
25 |             classes.add(class_name)
26 | 
27 |         classes = sorted(list(classes))
28 |         self._class_to_id = dict((class_name, i) for i, class_name in enumerate(classes))
29 | 
30 |     def get_image(self, item):
31 |         path = self._paths[item]
32 |         image = Image.open(path).resize(IMAGE_SIZE).convert("RGB")
33 |         return image
34 | 
35 |     def get_class(self, item):
36 |         path = self._paths[item]
37 |         class_name = os.path.basename(os.path.dirname(path))
38 |         result = self._class_to_id[class_name]
39 |         return result
40 | 
41 |     def __len__(self):
42 |         return len(self._paths)
43 | 
44 |     def __getitem__(self, item):
45 |         return self.get_image(item), self.get_class(item)
46 | 
47 | 
48 | class ImageNetImagesDataset(ImageNetDataset):
49 |     def __getitem__(self, item):
50 |         return self.get_image(item)
51 | 
52 | 
53 | class ImageNetTargetsDataset(ImageNetDataset):
54 |     def __getitem__(self, item):
55 |         return self.get_class(item)
56 | 


--------------------------------------------------------------------------------
/mnist_pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/mnist_pipeline/__init__.py


--------------------------------------------------------------------------------
/mnist_pipeline/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/mnist_pipeline/configs/__init__.py


--------------------------------------------------------------------------------
/mnist_pipeline/configs/base.py:
--------------------------------------------------------------------------------
 1 | from mnist_pipeline.dataset import MNISTImagesDataset, MNISTTargetsDataset
 2 | 
 3 | from pipeline.config_base import ConfigBase, PredictConfigBase
 4 | from pipeline.schedulers.learning_rate.reduce_on_plateau import SchedulerWrapperLossOnPlateau
 5 | from pipeline.metrics.accuracy import MetricsCalculatorAccuracy
 6 | from pipeline.datasets.base import DatasetWithPostprocessingFunc, DatasetComposer
 7 | from pipeline.trainers.classification import TrainerClassification
 8 | from pipeline.predictors.classification import PredictorClassification
 9 | 
10 | import torch.nn as nn
11 | import torch.optim as optim
12 | 
13 | from torchvision.transforms import ToTensor
14 | 
15 | 
16 | TRAIN_DATASET_PATH = "~/.pipeline/mnist/train.csv"
17 | TEST_DATASET_PATH = "~/.pipeline/mnist/test.csv"
18 | 
19 | VAL_RATIO = 0.2
20 | 
21 | 
22 | def get_dataset(mode, transforms):
23 |     images_dataset = DatasetWithPostprocessingFunc(
24 |         MNISTImagesDataset(path=TRAIN_DATASET_PATH, mode=mode, val_ratio=VAL_RATIO),
25 |         transforms)
26 | 
27 |     targets_dataset = MNISTTargetsDataset(
28 |         path=TRAIN_DATASET_PATH, mode=mode, val_ratio=VAL_RATIO)
29 | 
30 |     return DatasetComposer([images_dataset, targets_dataset])
31 | 
32 | 
33 | class ConfigMNISTBase(ConfigBase):
34 |     def __init__(self, model, model_save_path, num_workers=4, batch_size=128, transforms=None):
35 |         optimizer = optim.Adam(model.parameters())
36 |         scheduler = SchedulerWrapperLossOnPlateau(optimizer)
37 |         loss = nn.CrossEntropyLoss()
38 |         metrics_calculator = MetricsCalculatorAccuracy()
39 |         trainer_cls = TrainerClassification
40 | 
41 |         if transforms is None:
42 |             transforms = ToTensor()
43 | 
44 |         train_dataset = get_dataset(mode=MNISTImagesDataset.MODE_TRAIN, transforms=transforms)
45 |         val_dataset = get_dataset(mode=MNISTImagesDataset.MODE_VAL, transforms=transforms)
46 | 
47 |         super().__init__(
48 |             model=model,
49 |             model_save_path=model_save_path,
50 |             optimizer=optimizer,
51 |             scheduler=scheduler,
52 |             loss=loss,
53 |             metrics_calculator=metrics_calculator,
54 |             batch_size=batch_size,
55 |             num_workers=num_workers,
56 |             train_dataset=train_dataset,
57 |             val_dataset=val_dataset,
58 |             trainer_cls=trainer_cls)
59 | 
60 | 
61 | class PredictConfigMNISTBase(PredictConfigBase):
62 |     def __init__(self, model, model_save_path, num_workers=4, batch_size=128):
63 |         predictor_cls = PredictorClassification
64 | 
65 |         images_dataset = DatasetWithPostprocessingFunc(
66 |             MNISTImagesDataset(path=TRAIN_DATASET_PATH, mode=MNISTImagesDataset.MODE_VAL, val_ratio=VAL_RATIO),
67 |             ToTensor())
68 | 
69 |         dataset = DatasetComposer([images_dataset, list(range(len(images_dataset)))])
70 | 
71 |         super().__init__(
72 |             model=model,
73 |             model_save_path=model_save_path,
74 |             dataset=dataset,
75 |             predictor_cls=predictor_cls,
76 |             num_workers=num_workers,
77 |             batch_size=batch_size)
78 | 


--------------------------------------------------------------------------------
/mnist_pipeline/configs/resnet18.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigMNISTBase
 2 | 
 3 | from pipeline.models.image_models.encoders.resnet import Resnet18FeatureExtractor
 4 | 
 5 | import torch.nn as nn
 6 | 
 7 | 
 8 | class Config(ConfigMNISTBase):
 9 |     def __init__(self, model_save_path="models/resnet18"):
10 |         model = nn.Sequential(
11 |             Resnet18FeatureExtractor(input_channels=1),
12 |             nn.Linear(Resnet18FeatureExtractor.NUM_FEATURES, 10)
13 |         )
14 | 
15 |         super().__init__(model=model, model_save_path=model_save_path)
16 | 


--------------------------------------------------------------------------------
/mnist_pipeline/configs/simple_cnn.py:
--------------------------------------------------------------------------------
 1 | from .base import ConfigMNISTBase, PredictConfigMNISTBase
 2 | 
 3 | from pipeline.models.base import Flatten
 4 | 
 5 | import torch.nn as nn
 6 | 
 7 | 
 8 | MODEL_SAVE_PATH = "models/simple_cnn"
 9 | 
10 | 
11 | def get_model():
12 |     model = nn.Sequential(
13 |         nn.Conv2d(1, 16, kernel_size=3, padding=1),
14 |         nn.ReLU(),
15 |         nn.MaxPool2d(kernel_size=2),
16 |         nn.Conv2d(16, 64, kernel_size=3, padding=1),
17 |         nn.ReLU(),
18 |         nn.MaxPool2d(kernel_size=2),
19 |         nn.Conv2d(64, 128, kernel_size=3, padding=1),
20 |         nn.ReLU(),
21 |         nn.Conv2d(128, 128, kernel_size=3, padding=1),
22 |         nn.ReLU(),
23 |         nn.AdaptiveAvgPool2d(1),
24 |         Flatten(),
25 |         nn.Linear(128, 10)
26 |     )
27 |     return model
28 | 
29 | 
30 | class Config(ConfigMNISTBase):
31 |     def __init__(self, model_save_path=MODEL_SAVE_PATH):
32 |         super().__init__(model=get_model(), model_save_path=model_save_path)
33 | 
34 | 
35 | class PredictConfig(PredictConfigMNISTBase):
36 |     def __init__(self, model_save_path=MODEL_SAVE_PATH):
37 |         super().__init__(model=get_model(), model_save_path=model_save_path)
38 | 


--------------------------------------------------------------------------------
/mnist_pipeline/dataset.py:
--------------------------------------------------------------------------------
 1 | from pipeline.core import PipelineError
 2 | from pipeline.utils import get_path
 3 | 
 4 | import torch.utils.data as data
 5 | from enum import auto
 6 | 
 7 | import os
 8 | import pandas as pd
 9 | 
10 | 
11 | class MNISTDataset(data.Dataset):
12 |     MODE_TRAIN = auto()
13 |     MODE_VAL = auto()
14 | 
15 |     def __init__(self, path, mode, val_ratio):
16 |         path = get_path(path)
17 |         if not os.path.exists(path):
18 |             raise PipelineError("Path {} does not exist".format(path))
19 | 
20 |         dataset = pd.read_csv(path).values
21 |         train_length = int(len(dataset) * (1 - val_ratio))
22 |         if mode == self.MODE_TRAIN:
23 |             dataset = dataset[:train_length]
24 |         else:
25 |             dataset = dataset[train_length:]
26 | 
27 |         self._dataset = dataset
28 | 
29 |     def __len__(self):
30 |         return len(self._dataset)
31 | 
32 |     def __getitem__(self, item):
33 |         row = self._dataset[item]
34 | 
35 |         image = row[1:].reshape(28, 28, 1).astype("uint8")
36 |         target = int(row[0])
37 |         return image, target
38 | 
39 | 
40 | class MNISTImagesDataset(MNISTDataset):
41 |     def __init__(self, path, mode, val_ratio):
42 |         super().__init__(path, mode, val_ratio)
43 | 
44 |     def __getitem__(self, item):
45 |         image, _ = super().__getitem__(item)
46 |         return image
47 | 
48 | 
49 | class MNISTTargetsDataset(MNISTDataset):
50 |     def __init__(self, path, mode, val_ratio):
51 |         super().__init__(path, mode, val_ratio)
52 | 
53 |     def __getitem__(self, item):
54 |         _, target = super().__getitem__(item)
55 |         return target
56 | 


--------------------------------------------------------------------------------
/mnist_pipeline/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/mnist_pipeline/tests/__init__.py


--------------------------------------------------------------------------------
/mnist_pipeline/tests/test_dataset.py:
--------------------------------------------------------------------------------
 1 | from mnist_pipeline.dataset import MNISTDataset, MNISTImagesDataset, MNISTTargetsDataset
 2 | from mnist_pipeline.configs.base import TRAIN_DATASET_PATH
 3 | 
 4 | from pipeline.utils import get_path
 5 | 
 6 | import os
 7 | 
 8 | 
 9 | class TestMNISTDataset:
10 |     def setup(self):
11 |         assert os.path.exists(get_path(TRAIN_DATASET_PATH)), "You need to download MNIST dataset first"
12 | 
13 |     def test_train_dataset(self):
14 |         dataset = MNISTDataset(TRAIN_DATASET_PATH, mode=MNISTDataset.MODE_TRAIN, val_ratio=0.2)
15 |         assert len(dataset) == 33600
16 | 
17 |         _, _ = dataset[33599]
18 |         image, target = dataset[0]
19 | 
20 |         assert 0 <= target < 10
21 | 
22 |         assert image.shape == (28, 28, 1)
23 | 
24 |     def test_val_dataset(self):
25 |         dataset = MNISTDataset(TRAIN_DATASET_PATH, mode=MNISTDataset.MODE_VAL, val_ratio=0.2)
26 |         assert len(dataset) == 8400
27 | 
28 |         _, _ = dataset[8399]
29 |         image, target = dataset[0]
30 | 
31 |         assert 0 <= target < 10
32 | 
33 |         assert image.shape == (28, 28, 1)
34 | 
35 |         dataset = MNISTDataset(TRAIN_DATASET_PATH, mode=MNISTDataset.MODE_VAL, val_ratio=0)
36 |         assert len(dataset) == 0
37 | 
38 |     def test_images_dataset(self):
39 |         dataset = MNISTImagesDataset(TRAIN_DATASET_PATH, mode=MNISTDataset.MODE_VAL, val_ratio=1)
40 | 
41 |         image = dataset[10]
42 |         assert image.shape == (28, 28, 1)
43 | 
44 |         assert image.min() >= 0
45 |         assert 1 <= image.max() <= 255
46 | 
47 |     def test_targets_dataset(self):
48 |         dataset = MNISTTargetsDataset(TRAIN_DATASET_PATH, mode=MNISTDataset.MODE_TRAIN, val_ratio=0.5234)
49 | 
50 |         target = dataset[51]
51 | 
52 |         assert 0 <= target <= 9
53 | 
54 |         assert type(target) == int
55 | 


--------------------------------------------------------------------------------
/mnist_pipeline/tests/test_train.py:
--------------------------------------------------------------------------------
 1 | from mnist_pipeline.configs.simple_cnn import Config, PredictConfig
 2 | 
 3 | from pipeline.utils import run_train, run_predict
 4 | import tempfile
 5 | import shutil
 6 | import os
 7 | import hashlib
 8 | 
 9 | 
10 | class TestMNISTTrain:
11 |     def test_mnist_train(self):
12 |         test_path = tempfile.mkdtemp()
13 |         config = Config(model_save_path=test_path)
14 |         config.epoch_count = 2
15 |         run_train(config)
16 | 
17 |         assert os.path.exists(os.path.join(test_path, "log.txt"))
18 |         assert os.path.exists(os.path.join(test_path, "epoch_0"))
19 |         assert os.path.exists(os.path.join(test_path, "epoch_1"))
20 |         assert not os.path.exists(os.path.join(test_path, "epoch_2"))
21 |         assert os.path.exists(os.path.join(test_path, "state"))
22 | 
23 |         with open(os.path.join(test_path, "epoch_1"), "rb") as fin:
24 |             model_checkpoint_hash = hashlib.md5(fin.read()).hexdigest()
25 | 
26 |         run_train(config)
27 | 
28 |         with open(os.path.join(test_path, "epoch_1"), "rb") as fin:
29 |             new_model_checkpoint_hash = hashlib.md5(fin.read()).hexdigest()
30 | 
31 |         assert model_checkpoint_hash == new_model_checkpoint_hash
32 |         assert not os.path.exists(os.path.join(test_path, "epoch_2"))
33 | 
34 |         predict_config = PredictConfig(model_save_path=test_path)
35 |         run_predict(predict_config)
36 | 
37 |         assert os.path.exists(os.path.join(test_path, "predictions", "predictions"))
38 |         assert os.path.exists(os.path.join(test_path, "predictions", "identifiers"))
39 | 
40 |         shutil.rmtree(test_path)
41 | 


--------------------------------------------------------------------------------
/pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/__init__.py


--------------------------------------------------------------------------------
/pipeline/config_base.py:
--------------------------------------------------------------------------------
 1 | from .datasets.base import EmptyDataset
 2 | from .metrics.base import MetricsCalculatorEmpty
 3 | from pipeline.schedulers.base import SchedulerWrapperIdentity
 4 | from .storage.state import StateStorageFile
 5 | from .storage.predictions import PredictionsStorageFiles
 6 | 
 7 | import torch
 8 | import os
 9 | 
10 | 
11 | class ConfigBase:
12 |     def __init__(
13 |             self,
14 |             model,
15 |             model_save_path,
16 |             train_dataset,
17 |             optimizer,
18 |             loss,
19 |             trainer_cls,
20 |             device=None,
21 |             val_dataset=None,
22 |             scheduler=None,
23 |             metrics_calculator=None,
24 |             batch_size=1,
25 |             num_workers=0,
26 |             epoch_count=None,
27 |             print_frequency=1,
28 |             state_storage=None):
29 | 
30 |         if val_dataset is None:
31 |             val_dataset = EmptyDataset()
32 | 
33 |         if scheduler is None:
34 |             scheduler = SchedulerWrapperIdentity()
35 | 
36 |         if metrics_calculator is None:
37 |             metrics_calculator = MetricsCalculatorEmpty()
38 | 
39 |         if device is None:
40 |             device = "cuda" if torch.cuda.is_available() else "cpu"
41 | 
42 |         if state_storage is None:
43 |             state_storage = StateStorageFile(os.path.join(model_save_path, "state"))
44 | 
45 |         self.model = model
46 |         self.model_save_path = model_save_path
47 |         self.train_dataset = train_dataset
48 |         self.val_dataset = val_dataset
49 |         self.batch_size = batch_size
50 |         self.num_workers = num_workers
51 |         self.scheduler = scheduler
52 |         self.metrics_calculator = metrics_calculator
53 |         self.loss = loss
54 |         self.optimizer = optimizer
55 |         self.epoch_count = epoch_count
56 |         self.print_frequency = print_frequency
57 |         self.trainer_cls = trainer_cls
58 |         self.device = device
59 |         self.state_storage = state_storage
60 | 
61 | 
62 | class PredictConfigBase:
63 |     def __init__(
64 |             self,
65 |             model,
66 |             model_save_path,
67 |             dataset,
68 |             predictor_cls,
69 |             device=None,
70 |             batch_size=1,
71 |             num_workers=0,
72 |             print_frequency=1,
73 |             predictions_storage=None):
74 |         if device is None:
75 |             device = "cuda" if torch.cuda.is_available() else "cpu"
76 | 
77 |         if predictions_storage is None:
78 |             predictions_storage = PredictionsStorageFiles(os.path.join(model_save_path, "predictions"))
79 | 
80 |         self.model = model
81 |         self.dataset = dataset
82 |         self.model_save_path = model_save_path
83 |         self.batch_size = batch_size
84 |         self.num_workers = num_workers
85 |         self.print_frequency = print_frequency
86 |         self.predictor_cls = predictor_cls
87 |         self.device = device
88 |         self.predictions_storage = predictions_storage
89 | 


--------------------------------------------------------------------------------
/pipeline/core.py:
--------------------------------------------------------------------------------
1 | class PipelineError(Exception):
2 |     pass
3 | 


--------------------------------------------------------------------------------
/pipeline/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/datasets/__init__.py


--------------------------------------------------------------------------------
/pipeline/datasets/base.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | import torch
 3 | 
 4 | from typing import Sequence
 5 | 
 6 | 
 7 | class EmptyDataset(data.Dataset):
 8 |     def __len__(self):
 9 |         return 0
10 | 
11 |     def __getitem__(self, item: int):
12 |         assert False, "This code is unreachable"
13 | 
14 | 
15 | class DatasetComposer(data.Dataset):
16 |     def __init__(self, datasets: Sequence):
17 |         self._datasets = datasets
18 |         self._dataset_length = len(datasets[0])
19 |         for dataset in datasets:
20 |             assert self._dataset_length == len(dataset)
21 | 
22 |     def __len__(self):
23 |         return self._dataset_length
24 | 
25 |     def __getitem__(self, item: int):
26 |         return tuple(dataset[item] for dataset in self._datasets)
27 | 
28 | 
29 | class OneHotTargetsDataset(data.Dataset):
30 |     def __init__(self, targets: Sequence, class_count: int):
31 |         self._targets = targets
32 |         self._class_count = class_count
33 | 
34 |     def __len__(self):
35 |         return len(self._targets)
36 | 
37 |     def __getitem__(self, item: int):
38 |         target = self._targets[item]
39 |         result = torch.zeros(self._class_count, dtype=torch.float32)
40 |         result[target] = 1
41 |         return result
42 | 
43 | 
44 | class MultiLabelTargetsDataset(data.Dataset):
45 |     def __init__(self, targets: Sequence, class_count: int):
46 |         self._targets = targets
47 |         self._class_count = class_count
48 | 
49 |     def __len__(self):
50 |         return len(self._targets)
51 | 
52 |     def __getitem__(self, item: int):
53 |         target = self._targets[item]
54 |         result = torch.zeros(self._class_count, dtype=torch.float32)
55 | 
56 |         for class_id in target:
57 |             result[class_id] = 1
58 | 
59 |         return result
60 | 
61 | 
62 | class DatasetWithPostprocessingFunc(data.Dataset):
63 |     def __init__(self, dataset, postprocessing_func):
64 |         self._dataset = dataset
65 |         self._postprocessing_func = postprocessing_func
66 | 
67 |     def __len__(self):
68 |         return len(self._dataset)
69 | 
70 |     def __getitem__(self, item):
71 |         return self._postprocessing_func(self._dataset[item])
72 | 


--------------------------------------------------------------------------------
/pipeline/datasets/mixup.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | import random
 3 | import numpy as np
 4 | 
 5 | 
 6 | class MixUpDatasetWrapper(data.Dataset):
 7 |     def __init__(self, dataset, alpha=1):
 8 |         super().__init__()
 9 |         self._dataset = dataset
10 |         self._alpha = alpha
11 | 
12 |     def __len__(self):
13 |         return len(self._dataset)
14 | 
15 |     def __getitem__(self, item):
16 |         first = self._dataset[item]
17 |         second = random.choice(self._dataset)
18 | 
19 |         coeff = np.random.beta(self._alpha, self._alpha)
20 | 
21 |         result = []
22 |         for elem1, elem2 in zip(first, second):
23 |             result.append(elem1 * coeff + elem2 * (1 - coeff))
24 | 
25 |         return tuple(result)
26 | 


--------------------------------------------------------------------------------
/pipeline/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | 
 4 | 
 5 | LOGGER = logging.getLogger()
 6 | FORMATTER = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
 7 | 
 8 | 
 9 | def setup_logger(out_file=None, stderr=True, stderr_level=logging.INFO, file_level=logging.DEBUG):
10 |     LOGGER.handlers = []
11 |     LOGGER.setLevel(min(stderr_level, file_level))
12 | 
13 |     if stderr:
14 |         handler = logging.StreamHandler(sys.stderr)
15 |         handler.setFormatter(FORMATTER)
16 |         handler.setLevel(stderr_level)
17 |         LOGGER.addHandler(handler)
18 | 
19 |     if out_file is not None:
20 |         handler = logging.FileHandler(out_file)
21 |         handler.setFormatter(FORMATTER)
22 |         handler.setLevel(file_level)
23 |         LOGGER.addHandler(handler)
24 | 
25 |     LOGGER.info("logger set up")
26 |     return LOGGER
27 | 


--------------------------------------------------------------------------------
/pipeline/losses/vector_cross_entropy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class VectorCrossEntropy(nn.Module):
 6 |     def __init__(self):
 7 |         super().__init__()
 8 |         self._log_softmax = nn.LogSoftmax(dim=1)
 9 | 
10 |     def forward(self, input, target):
11 |         input = self._log_softmax(input)
12 |         loss = -torch.sum(input * target)
13 |         loss = loss / input.shape[0]
14 |         return loss
15 | 
16 | 


--------------------------------------------------------------------------------
/pipeline/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/metrics/__init__.py


--------------------------------------------------------------------------------
/pipeline/metrics/accuracy.py:
--------------------------------------------------------------------------------
 1 | from .base import MetricsCalculatorBase
 2 | from ..core import PipelineError
 3 | 
 4 | from sklearn.metrics import accuracy_score
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | class MetricsCalculatorAccuracy(MetricsCalculatorBase):
10 |     def __init__(self, border=0.5):
11 |         super().__init__()
12 |         self.zero_cache()
13 |         self._border = border
14 | 
15 |     def zero_cache(self):
16 |         self._predictions = []
17 |         self._true_labels = []
18 | 
19 |     def add(self, y_predicted, y_true):
20 |         self._predictions.append(y_predicted.cpu().data.numpy())
21 |         self._true_labels.append(y_true.cpu().data.numpy())
22 | 
23 |     def calculate(self):
24 |         if not self._predictions:
25 |             raise PipelineError("You need to add predictions for calculating the accuracy first")
26 | 
27 |         y_pred = np.concatenate(self._predictions)
28 |         y_true = np.concatenate(self._true_labels)
29 | 
30 |         if y_pred.shape[-1] == 1:
31 |             # Binary classification
32 |             y_pred = (y_pred >= self._border).astype("int")
33 |         else:
34 |             y_pred = np.argmax(y_pred, -1)
35 | 
36 |         if len(y_true.shape) != 1:
37 |             y_true = np.argmax(y_true, -1)
38 | 
39 |         result = accuracy_score(y_true, y_pred)
40 |         return {"accuracy": result}
41 | 


--------------------------------------------------------------------------------
/pipeline/metrics/base.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | 
 4 | class MetricsCalculatorBase(abc.ABC):
 5 |     @abc.abstractmethod
 6 |     def zero_cache(self):
 7 |         pass
 8 | 
 9 |     @abc.abstractmethod
10 |     def add(self, y_predicted, y_true):
11 |         pass
12 | 
13 |     @abc.abstractmethod
14 |     def calculate(self):
15 |         pass
16 | 
17 | 
18 | class MetricsCalculatorEmpty(MetricsCalculatorBase):
19 |     def zero_cache(self):
20 |         pass
21 | 
22 |     def add(self, y_predicted, y_true):
23 |         pass
24 | 
25 |     def calculate(self):
26 |         return {}
27 | 


--------------------------------------------------------------------------------
/pipeline/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/models/__init__.py


--------------------------------------------------------------------------------
/pipeline/models/base.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | 
3 | 
4 | class Flatten(nn.Module):
5 |     def forward(self, x):
6 |         return x.view(x.shape[0], -1)
7 | 


--------------------------------------------------------------------------------
/pipeline/models/image_models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/models/image_models/__init__.py


--------------------------------------------------------------------------------
/pipeline/models/image_models/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/models/image_models/encoders/__init__.py


--------------------------------------------------------------------------------
/pipeline/models/image_models/encoders/resnet.py:
--------------------------------------------------------------------------------
 1 | from torchvision.models import resnet
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class ResnetModelFeatureExtractorBase(nn.Module):
 6 |     def __init__(self, model, input_channels):
 7 |         super().__init__()
 8 | 
 9 |         model.fc = nn.Sequential()
10 |         model.avgpool = nn.AdaptiveAvgPool2d(1)
11 | 
12 |         if input_channels != 3:
13 |             model.conv1 = nn.Conv2d(
14 |                 input_channels,
15 |                 model.conv1.out_channels,
16 |                 kernel_size=model.conv1.kernel_size,
17 |                 stride=model.conv1.stride,
18 |                 padding=model.conv1.padding,
19 |                 bias=model.conv1.bias)
20 | 
21 |         self._model = model
22 | 
23 |     def forward(self, input):
24 |         return self._model(input)
25 | 
26 | 
27 | class Resnet18FeatureExtractor(ResnetModelFeatureExtractorBase):
28 |     NUM_FEATURES = 512
29 | 
30 |     def __init__(self, pretrained=True, input_channels=3):
31 |         model = resnet.resnet18(pretrained=pretrained)
32 |         super().__init__(
33 |             model=model,
34 |             input_channels=input_channels)
35 | 
36 | 
37 | class Resnet34FeatureExtractor(ResnetModelFeatureExtractorBase):
38 |     NUM_FEATURES = 512
39 | 
40 |     def __init__(self, pretrained=True, input_channels=3):
41 |         model = resnet.resnet34(pretrained=pretrained)
42 |         super().__init__(
43 |             model=model,
44 |             input_channels=input_channels)
45 | 
46 | 
47 | class Resnet50FeatureExtractor(ResnetModelFeatureExtractorBase):
48 |     NUM_FEATURES = 2048
49 | 
50 |     def __init__(self, pretrained=True, input_channels=3):
51 |         model = resnet.resnet50(pretrained=pretrained)
52 |         super().__init__(
53 |             model=model,
54 |             input_channels=input_channels)
55 | 
56 | 
57 | class Resnet101FeatureExtractor(ResnetModelFeatureExtractorBase):
58 |     NUM_FEATURES = 2048
59 | 
60 |     def __init__(self, pretrained=True, input_channels=3):
61 |         model = resnet.resnet101(pretrained=pretrained)
62 |         super().__init__(
63 |             model=model,
64 |             input_channels=input_channels)
65 | 
66 | 
67 | class Resnet152FeatureExtractor(ResnetModelFeatureExtractorBase):
68 |     NUM_FEATURES = 2048
69 | 
70 |     def __init__(self, pretrained=True, input_channels=3):
71 |         model = resnet.resnet152(pretrained=pretrained)
72 |         super().__init__(
73 |             model=model,
74 |             input_channels=input_channels)
75 | 


--------------------------------------------------------------------------------
/pipeline/models/image_models/resnet_fixup.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch
  4 | 
  5 | 
  6 | class Bottleneck(nn.Module):
  7 |     expansion = 4
  8 |     m = 3
  9 | 
 10 |     def __init__(self, inplanes, planes, stride=1, downsample=None, fixup_l=1):
 11 |         super(Bottleneck, self).__init__()
 12 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 13 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 14 |                                padding=1, bias=False)
 15 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 16 |         self.relu = nn.ReLU(inplace=True)
 17 |         self.downsample = downsample
 18 |         self.stride = stride
 19 | 
 20 |         self.scale = nn.Parameter(torch.ones(1))
 21 |         self.biases = nn.ParameterList([nn.Parameter(torch.zeros(1)) for _ in range(6)])
 22 | 
 23 |         k = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels
 24 |         self.conv1.weight.data.normal_(0, fixup_l ** (-1 / (2 * self.m - 2)) * math.sqrt(2. / k))
 25 | 
 26 |         k = self.conv2.kernel_size[0] * self.conv2.kernel_size[1] * self.conv2.out_channels
 27 |         self.conv2.weight.data.normal_(0, fixup_l ** (-1 / (2 * self.m - 2)) * math.sqrt(2. / k))
 28 |         self.conv3.weight.data.zero_()
 29 | 
 30 |         if downsample is not None:
 31 |             k = self.downsample.kernel_size[0] * self.downsample.kernel_size[1] * self.downsample.out_channels
 32 |             self.downsample.weight.data.normal_(0, math.sqrt(2. / k))
 33 | 
 34 |     def forward(self, x):
 35 |         residual = x
 36 | 
 37 |         out = self.conv1(x + self.biases[0])
 38 |         out = self.relu(out + self.biases[1])
 39 | 
 40 |         out = self.conv2(out + self.biases[2])
 41 |         out = self.relu(out + self.biases[3])
 42 | 
 43 |         out = self.scale * self.conv3(out + self.biases[4]) + self.biases[5]
 44 | 
 45 |         if self.downsample is not None:
 46 |             residual = self.downsample(x)
 47 | 
 48 |         out += residual
 49 |         out = self.relu(out)
 50 | 
 51 |         return out
 52 | 
 53 | 
 54 | class ResNet(nn.Module):
 55 | 
 56 |     def __init__(self, block, layers, num_classes=1000, input_channels=3):
 57 |         self.inplanes = 64
 58 |         super(ResNet, self).__init__()
 59 |         self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3,
 60 |                                bias=False)
 61 |         self.relu = nn.ReLU(inplace=True)
 62 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 63 |         fixup_l = sum(layers)
 64 |         self.layer1 = self._make_layer(block, 64, layers[0], fixup_l=fixup_l)
 65 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2, fixup_l=fixup_l)
 66 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2, fixup_l=fixup_l)
 67 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2, fixup_l=fixup_l)
 68 |         self.avgpool = nn.AvgPool2d(7, stride=1)
 69 |         self.bias1 = nn.Parameter(torch.zeros(1))
 70 |         self.bias2 = nn.Parameter(torch.zeros(1))
 71 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
 72 | 
 73 |         self.fc.weight.data.zero_()
 74 |         self.fc.bias.data.zero_()
 75 | 
 76 |         n = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels
 77 |         self.conv1.weight.data.normal_(0, math.sqrt(2. / n))
 78 | 
 79 |     def _make_layer(self, block, planes, blocks, fixup_l, stride=1):
 80 |         downsample = None
 81 |         if stride != 1 or self.inplanes != planes * block.expansion:
 82 |             downsample = nn.Conv2d(self.inplanes, planes * block.expansion,
 83 |                                    kernel_size=1, stride=stride, bias=True)
 84 | 
 85 |         layers = []
 86 |         layers.append(block(self.inplanes, planes, stride, downsample, fixup_l=fixup_l))
 87 |         self.inplanes = planes * block.expansion
 88 |         for i in range(1, blocks):
 89 |             layers.append(block(self.inplanes, planes, fixup_l=fixup_l))
 90 | 
 91 |         return nn.Sequential(*layers)
 92 | 
 93 |     def forward(self, x):
 94 |         x = self.conv1(x)
 95 |         x = self.relu(x + self.bias1)
 96 |         x = self.maxpool(x)
 97 | 
 98 |         x = self.layer1(x)
 99 |         x = self.layer2(x)
100 |         x = self.layer3(x)
101 |         x = self.layer4(x)
102 | 
103 |         x = self.avgpool(x)
104 |         x = x.view(x.size(0), -1)
105 |         x = self.fc(x + self.bias2)
106 | 
107 |         return x
108 | 
109 | 
110 | def resnet50(**kwargs):
111 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
112 |     return model
113 | 
114 | 
115 | def resnet101(**kwargs):
116 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
117 |     return model
118 | 
119 | 
120 | def resnet152(**kwargs):
121 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
122 |     return model
123 | 


--------------------------------------------------------------------------------
/pipeline/models/image_models/wide_resnet.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Wide ResNet by Sergey Zagoruyko and Nikos Komodakis
  3 | Fixup initialization by Hongyi Zhang, Yann N. Dauphin, Tengyu Ma
  4 | Based on code by xternalz and Andy Brock:
  5 | https://github.com/xternalz/WideResNet-pytorch
  6 | https://github.com/ajbrock/BoilerPlate
  7 | """
  8 | 
  9 | import math
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | 
 14 | 
 15 | class BasicBlock(nn.Module):
 16 |     def __init__(self, in_planes, out_planes, stride, dropout=0.0):
 17 |         super(BasicBlock, self).__init__()
 18 |         self.bn1 = nn.BatchNorm2d(in_planes)
 19 |         self.relu1 = nn.ReLU(inplace=True)
 20 |         self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 21 |                                padding=1, bias=False)
 22 |         self.bn2 = nn.BatchNorm2d(out_planes)
 23 |         self.relu2 = nn.ReLU(inplace=True)
 24 |         self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
 25 |                                padding=1, bias=False)
 26 |         self.dropout = dropout
 27 |         self.equalInOut = (in_planes == out_planes)
 28 |         self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
 29 |                                padding=0, bias=False) or None
 30 |     def forward(self, x):
 31 |         if not self.equalInOut:
 32 |             x = self.relu1(self.bn1(x))
 33 |         else:
 34 |             out = self.relu1(self.bn1(x))
 35 |         out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
 36 |         if self.dropout > 0:
 37 |             out = F.dropout(out, p=self.dropout, training=self.training)
 38 |         out = self.conv2(out)
 39 |         return torch.add(x if self.equalInOut else self.convShortcut(x), out)
 40 | 
 41 | 
 42 | class NetworkBlock(nn.Module):
 43 |     def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropout):
 44 |         super(NetworkBlock, self).__init__()
 45 |         self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropout)
 46 | 
 47 |     def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropout):
 48 |         layers = []
 49 | 
 50 |         for i in range(int(nb_layers)):
 51 |             _in_planes = i == 0 and in_planes or out_planes
 52 |             _stride = i == 0 and stride or 1
 53 |             layers.append(block(_in_planes, out_planes, _stride, dropout=dropout))
 54 | 
 55 |         return nn.Sequential(*layers)
 56 | 
 57 |     def forward(self, x):
 58 |         return self.layer(x)
 59 | 
 60 | 
 61 | class WideResNet(nn.Module):
 62 |     def __init__(self, depth, num_classes, widen_factor=1, dropout=0.0):
 63 |         super(WideResNet, self).__init__()
 64 | 
 65 |         nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor]
 66 | 
 67 |         assert (depth - 4) % 6 == 0, "You need to change the number of layers"
 68 |         n = (depth - 4) / 6
 69 | 
 70 |         block = BasicBlock
 71 | 
 72 |         self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1, padding=1, bias=False)
 73 | 
 74 |         self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropout=dropout)
 75 |         self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropout=dropout)
 76 |         self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropout=dropout)
 77 | 
 78 |         self.bn1 = nn.BatchNorm2d(nChannels[3])
 79 | 
 80 |         self.relu = nn.ReLU(inplace=True)
 81 |         self.fc = nn.Linear(nChannels[3], num_classes)
 82 |         self.nChannels = nChannels[3]
 83 | 
 84 |         for m in self.modules():
 85 |             if isinstance(m, nn.Conv2d):
 86 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 87 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 88 |             elif isinstance(m, nn.BatchNorm2d):
 89 |                 m.weight.data.fill_(1)
 90 |                 m.bias.data.zero_()
 91 |             elif isinstance(m, nn.Linear):
 92 |                 m.bias.data.zero_()
 93 | 
 94 |     def forward(self, x):
 95 |         out = self.conv1(x)
 96 |         out = self.block1(out)
 97 |         out = self.block2(out)
 98 |         out = self.block3(out)
 99 | 
100 |         out = self.relu(self.bn1(out))
101 |         out = F.adaptive_avg_pool2d(out, 1)
102 |         out = out.view(-1, self.nChannels)
103 |         return self.fc(out)
104 | 


--------------------------------------------------------------------------------
/pipeline/models/image_models/wide_resnet_fixup.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Wide ResNet by Sergey Zagoruyko and Nikos Komodakis
  3 | Fixup initialization by Hongyi Zhang, Yann N. Dauphin, Tengyu Ma
  4 | Based on code by xternalz and Andy Brock:
  5 | https://github.com/xternalz/WideResNet-pytorch
  6 | https://github.com/ajbrock/BoilerPlate
  7 | """
  8 | 
  9 | import math
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | 
 14 | 
 15 | class BasicBlock(nn.Module):
 16 |     m = 2
 17 | 
 18 |     def __init__(self, in_planes, out_planes, stride, dropout, fixup_l, fixup_coeff):
 19 |         super(BasicBlock, self).__init__()
 20 | 
 21 |         self._dropout = dropout
 22 | 
 23 |         self.relu = nn.ReLU(inplace=True)
 24 |         self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
 25 |         self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False)
 26 | 
 27 |         self.equalInOut = in_planes == out_planes
 28 |         self.conv_res = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False)
 29 |         self.conv_res = not self.equalInOut and self.conv_res or None
 30 | 
 31 |         self.scale = nn.Parameter(torch.ones(1))
 32 |         self.biases = nn.ParameterList([nn.Parameter(torch.zeros(1)) for _ in range(4)])
 33 | 
 34 |         k = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels
 35 |         self.conv1.weight.data.normal_(0, fixup_coeff * fixup_l ** (-1 / (2 * self.m - 2)) * math.sqrt(2. / k))
 36 |         self.conv2.weight.data.zero_()
 37 | 
 38 |         if self.conv_res is not None:
 39 |             k = self.conv_res.kernel_size[0] * self.conv_res.kernel_size[1] * self.conv_res.out_channels
 40 |             self.conv_res.weight.data.normal_(0, math.sqrt(2. / k))
 41 | 
 42 |     def forward(self, x):
 43 |         x_out = self.relu(x + self.biases[0])
 44 |         out = self.conv1(x_out) + self.biases[1]
 45 |         out = self.relu(out) + self.biases[2]
 46 |         if self._dropout > 0:
 47 |             out = F.dropout(out, p=self._dropout, training=self.training)
 48 |         out = self.scale * self.conv2(out) + self.biases[3]
 49 | 
 50 |         if self.equalInOut:
 51 |             return torch.add(x, out)
 52 | 
 53 |         return torch.add(self.conv_res(x_out), out)
 54 | 
 55 | 
 56 | class NetworkBlock(nn.Module):
 57 |     def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropout, fixup_l, fixup_coeff):
 58 |         super(NetworkBlock, self).__init__()
 59 |         self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropout, fixup_l, fixup_coeff)
 60 | 
 61 |     def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropout, fixup_l, fixup_coeff):
 62 |         layers = []
 63 | 
 64 |         for i in range(int(nb_layers)):
 65 |             _in_planes = i == 0 and in_planes or out_planes
 66 |             _stride = i == 0 and stride or 1
 67 |             layers.append(block(_in_planes, out_planes, _stride, dropout=dropout, fixup_l=fixup_l, fixup_coeff=fixup_coeff))
 68 | 
 69 |         return nn.Sequential(*layers)
 70 | 
 71 |     def forward(self, x):
 72 |         return self.layer(x)
 73 | 
 74 | 
 75 | class WideResNet(nn.Module):
 76 |     def __init__(self, depth, num_classes, widen_factor=1, dropout=0.0, fixup_coeff=1):
 77 |         super(WideResNet, self).__init__()
 78 | 
 79 |         nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor]
 80 | 
 81 |         assert (depth - 4) % 6 == 0, "You need to change the number of layers"
 82 |         n = (depth - 4) / 6
 83 | 
 84 |         block = BasicBlock
 85 |         fixup_l = n * 3
 86 | 
 87 |         self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1, padding=1, bias=False)
 88 | 
 89 |         self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropout=dropout,
 90 |                                    fixup_l=fixup_l, fixup_coeff=fixup_coeff)
 91 |         self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropout=dropout,
 92 |                                    fixup_l=fixup_l, fixup_coeff=fixup_coeff)
 93 |         self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropout=dropout,
 94 |                                    fixup_l=fixup_l, fixup_coeff=fixup_coeff)
 95 | 
 96 |         self.relu = nn.ReLU(inplace=True)
 97 |         self.fc = nn.Linear(nChannels[3], num_classes)
 98 |         self.nChannels = nChannels[3]
 99 | 
100 |         self.fc.bias.data.zero_()
101 |         self.fc.weight.data.zero_()
102 | 
103 |         k = self.conv1.kernel_size[0] * self.conv1.kernel_size[1] * self.conv1.out_channels
104 |         self.conv1.weight.data.normal_(0, math.sqrt(2. / k))
105 | 
106 |         self.bias1 = nn.Parameter(torch.zeros(1))
107 |         self.bias2 = nn.Parameter(torch.zeros(1))
108 | 
109 |     def forward(self, x):
110 |         out = self.conv1(x) + self.bias1
111 |         out = self.block1(out)
112 |         out = self.block2(out)
113 |         out = self.block3(out)
114 | 
115 |         out = self.relu(out)
116 |         out = F.adaptive_avg_pool2d(out, 1)
117 |         out = out.view(-1, self.nChannels)
118 |         return self.fc(out + self.bias2)
119 | 


--------------------------------------------------------------------------------
/pipeline/predictors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/predictors/__init__.py


--------------------------------------------------------------------------------
/pipeline/predictors/base.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from typing import Iterable
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from ..logger import LOGGER
 8 | from ..storage.predictions import PredictionsStorageBase
 9 | from ..utils import move_to_device, load_model
10 | 
11 | import os
12 | 
13 | 
14 | class PredictorBase:
15 |     def __init__(
16 |             self,
17 |             model: nn.Module,
18 |             data_loader: Iterable,
19 |             print_frequency: None or int,
20 |             device: str,
21 |             model_save_path: str,
22 |             predictions_storage: PredictionsStorageBase) -> None:
23 | 
24 |         self.model = model.to(device)
25 |         self.data_loader = data_loader
26 |         self.print_frequency = print_frequency
27 |         self.device = device
28 |         self.model_save_path = model_save_path
29 |         self.predictions_storage = predictions_storage
30 | 
31 |     def predict_step(self, input_data: torch.Tensor):
32 |         input_data = move_to_device(input_data, device=self.device)
33 |         model_output = self.model(input_data)
34 |         return model_output
35 | 
36 |     def log_predict_step(self, step_id: int, predict_time: float):
37 |         if self.print_frequency is None or step_id % self.print_frequency == 0:
38 |             LOGGER.info("[{} s] Predict step {}".format(predict_time, step_id))
39 |             return True
40 | 
41 |         return False
42 | 
43 |     def log_predict_completed(self, predict_time: float):
44 |         LOGGER.info("[{} s] Predict is completed".format(predict_time))
45 |         return True
46 | 
47 |     def load_last_model(self):
48 |         if os.path.exists(self.model_save_path):
49 |             epochs = filter(lambda file: file.startswith("epoch_"), os.listdir(self.model_save_path))
50 |             epochs = map(lambda file: int(file[file.find("_") + 1]), epochs)
51 |             epochs = list(epochs)
52 | 
53 |             if epochs:
54 |                 last_model_path = os.path.join(self.model_save_path, "epoch_{}".format(max(epochs)))
55 |                 load_model(self.model, last_model_path)
56 |                 return
57 | 
58 |         LOGGER.info("Model not found in {}. Starting to train a model from scratch...".format(self.model_save_path))
59 | 
60 |     def run(self):
61 |         self.load_last_model()
62 |         self.model.eval()
63 | 
64 |         step_count = 0
65 |         start_time = time.time()
66 | 
67 |         with torch.no_grad():
68 |             for step_id, (input_data, ids) in enumerate(self.data_loader):
69 |                 model_output = self.predict_step(input_data)
70 |                 self.predictions_storage.add_batch(ids, model_output)
71 | 
72 |                 step_count += 1
73 |                 predict_time = time.time() - start_time
74 |                 self.log_predict_step(step_id, predict_time)
75 | 
76 |         self.predictions_storage.sort_by_id()
77 |         self.predictions_storage.flush()
78 |         predict_time = time.time() - start_time
79 |         self.log_predict_completed(predict_time)
80 |         return predict_time
81 | 


--------------------------------------------------------------------------------
/pipeline/predictors/classification.py:
--------------------------------------------------------------------------------
 1 | from .base import PredictorBase
 2 | import torch
 3 | 
 4 | 
 5 | class PredictorClassification(PredictorBase):
 6 |     def predict_step(self, input_data: torch.Tensor):
 7 |         result = super().predict_step(input_data)
 8 |         result = torch.softmax(result, dim=-1)
 9 |         return result
10 | 


--------------------------------------------------------------------------------
/pipeline/preprocessing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/preprocessing/__init__.py


--------------------------------------------------------------------------------
/pipeline/preprocessing/audio_preprocessing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/preprocessing/audio_preprocessing/__init__.py


--------------------------------------------------------------------------------
/pipeline/preprocessing/image_preprocessing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/preprocessing/image_preprocessing/__init__.py


--------------------------------------------------------------------------------
/pipeline/preprocessing/text_preprocessing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/preprocessing/text_preprocessing/__init__.py


--------------------------------------------------------------------------------
/pipeline/schedulers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/schedulers/__init__.py


--------------------------------------------------------------------------------
/pipeline/schedulers/base.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | 
 4 | class SchedulerBase(abc.ABC):
 5 |     @abc.abstractmethod
 6 |     def step(self, loss, metrics, epoch_id):
 7 |         pass
 8 | 
 9 | 
10 | class SchedulerWrapperBase(SchedulerBase):
11 |     def __init__(self, scheduler):
12 |         self._scheduler = scheduler
13 | 
14 | 
15 | class SchedulerWrapperIdentity(SchedulerWrapperBase):
16 |     def __init__(self, *args, **kwargs):
17 |         super().__init__(None)
18 | 
19 |     def step(self, loss, metrics, epoch_id):
20 |         pass
21 | 
22 | 
23 | class SchedulerWrapperLossBase(SchedulerWrapperBase):
24 |     def __init__(self, scheduler):
25 |         super().__init__(scheduler)
26 | 
27 |     def step(self, loss, metrics, epoch_id):
28 |         return self._scheduler.step(loss, epoch_id)
29 | 
30 | 
31 | class SchedulerWrapperMetricsMeanBase(SchedulerWrapperBase):
32 |     def __init__(self, scheduler):
33 |         super().__init__(scheduler)
34 | 
35 |     def step(self, loss, metrics, epoch_id):
36 |         values = list(metrics.values())
37 |         mean_metrics = sum(values) / len(values)
38 |         return self._scheduler.step(mean_metrics, epoch_id)
39 | 


--------------------------------------------------------------------------------
/pipeline/schedulers/dropout/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/schedulers/dropout/__init__.py


--------------------------------------------------------------------------------
/pipeline/schedulers/dropout/increase_step.py:
--------------------------------------------------------------------------------
 1 | from ..base import SchedulerBase
 2 | 
 3 | from .utils import set_dropout_probability
 4 | 
 5 | 
 6 | class SchedulerWrapperIncreaseStep(SchedulerBase):
 7 |     def __init__(self, model, epoch_count, initial_value=0, max_value=0.5):
 8 |         self._model = model
 9 |         self._epoch_count = epoch_count
10 |         self._initial_value = initial_value
11 |         self._max_value = max_value
12 | 
13 |     def step(self, loss, metrics, epoch_id):
14 |         new_value = (self._max_value - self._initial_value) / self._epoch_count * (epoch_id + 1)
15 |         set_dropout_probability(self._model, new_value)
16 | 


--------------------------------------------------------------------------------
/pipeline/schedulers/dropout/utils.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from torch.nn.modules.dropout import _DropoutNd
 3 | 
 4 | 
 5 | def set_dropout_probability(module, probability):
 6 |     if isinstance(module, _DropoutNd):
 7 |         module.p = probability
 8 |         return
 9 | 
10 |     for child in module.children():
11 |         set_dropout_probability(child, probability)
12 | 


--------------------------------------------------------------------------------
/pipeline/schedulers/learning_rate/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/schedulers/learning_rate/__init__.py


--------------------------------------------------------------------------------
/pipeline/schedulers/learning_rate/cyclical_lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | from ..base import SchedulerWrapperLossBase, SchedulerWrapperMetricsMeanBase
 2 | 
 3 | from torch.optim.lr_scheduler import CosineAnnealingLR
 4 | 
 5 | 
 6 | class SchedulerWrapperLossOnCyclic(SchedulerWrapperLossBase):
 7 |     def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1):
 8 |         scheduler = CosineAnnealingLR(
 9 |             optimizer,
10 |             T_max=T_max,
11 |             eta_min=eta_min,
12 |             last_epoch=last_epoch,
13 |         )
14 |         super().__init__(scheduler)
15 | 
16 | 
17 | class SchedulerWrapperMetricsMeanOnCyclic(SchedulerWrapperMetricsMeanBase):
18 |     def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1):
19 |         scheduler = CosineAnnealingLR(
20 |             optimizer,
21 |             T_max=T_max,
22 |             eta_min=eta_min,
23 |             last_epoch=last_epoch,
24 |         )
25 |         super().__init__(scheduler)


--------------------------------------------------------------------------------
/pipeline/schedulers/learning_rate/reduce_on_plateau.py:
--------------------------------------------------------------------------------
 1 | from ..base import SchedulerWrapperLossBase, SchedulerWrapperMetricsMeanBase
 2 | 
 3 | from torch.optim.lr_scheduler import ReduceLROnPlateau
 4 | 
 5 | 
 6 | class SchedulerWrapperLossOnPlateau(SchedulerWrapperLossBase):
 7 |     def __init__(self, optimizer, mode="min", factor=0.5, patience=3, verbose=True, cooldown=3, min_lr=1e-8):
 8 |         scheduler = ReduceLROnPlateau(
 9 |             optimizer,
10 |             mode=mode,
11 |             factor=factor,
12 |             patience=patience,
13 |             verbose=verbose,
14 |             cooldown=cooldown,
15 |             min_lr=min_lr
16 |         )
17 |         super().__init__(scheduler)
18 | 
19 | 
20 | class SchedulerWrapperMetricsMeanOnPlateau(SchedulerWrapperMetricsMeanBase):
21 |     def __init__(self, optimizer, mode="max", factor=0.5, patience=3, verbose=True, cooldown=3, min_lr=1e-8):
22 |         scheduler = ReduceLROnPlateau(
23 |             optimizer,
24 |             mode=mode,
25 |             factor=factor,
26 |             patience=patience,
27 |             verbose=verbose,
28 |             cooldown=cooldown,
29 |             min_lr=min_lr
30 |         )
31 |         super().__init__(scheduler)
32 | 


--------------------------------------------------------------------------------
/pipeline/storage/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/storage/__init__.py


--------------------------------------------------------------------------------
/pipeline/storage/predictions.py:
--------------------------------------------------------------------------------
 1 | from ..core import PipelineError
 2 | 
 3 | import abc
 4 | import torch
 5 | import os
 6 | 
 7 | 
 8 | class PredictionsStorageBase(abc.ABC):
 9 |     @abc.abstractmethod
10 |     def add(self, identifier, prediction):
11 |         pass
12 | 
13 |     def add_batch(self, identifiers, predictions):
14 |         for identifier, prediction in zip(identifiers, predictions):
15 |             self.add(identifier, prediction)
16 | 
17 |     @abc.abstractmethod
18 |     def flush(self):
19 |         pass
20 | 
21 |     @abc.abstractmethod
22 |     def get_all(self):
23 |         pass
24 | 
25 |     @abc.abstractmethod
26 |     def get_by_id(self, identifier):
27 |         pass
28 | 
29 |     def get_by_id_batch(self, identifiers):
30 |         result = []
31 |         for identifier in identifiers:
32 |             result.append(self.get_by_id(identifier))
33 | 
34 |         return torch.stack(result)
35 | 
36 |     @abc.abstractmethod
37 |     def sort_by_id(self):
38 |         pass
39 | 
40 | 
41 | class PredictionsStorageFiles(PredictionsStorageBase):
42 |     def __init__(self, path):
43 |         if os.path.exists(path) and not os.path.isdir(path):
44 |             raise PipelineError("{} should be a directory".format(path))
45 | 
46 |         os.makedirs(path, exist_ok=True)
47 | 
48 |         self._path = path
49 | 
50 |         self._identifiers = []
51 |         self._predictions = []
52 | 
53 |         self._identifier_to_element_id = {}
54 | 
55 |         if os.path.exists(os.path.join(self._path, "identifiers")):
56 |             self._load_predictions()
57 | 
58 |     def _load_predictions(self):
59 |         self._identifiers = torch.load(os.path.join(self._path, "identifiers"))
60 |         self._predictions = torch.load(os.path.join(self._path, "predictions"))
61 | 
62 |         assert len(self._identifiers) == len(self._predictions)
63 | 
64 |         for i, identifier in enumerate(self._identifiers):
65 |             self._identifier_to_element_id[identifier] = i
66 | 
67 |     def _save_predictions(self):
68 |         assert len(self._identifiers) == len(self._predictions)
69 | 
70 |         with open(os.path.join(self._path, "identifiers"), "wb") as fout:
71 |             torch.save(self._identifiers, fout)
72 | 
73 |         with open(os.path.join(self._path, "predictions"), "wb") as fout:
74 |             torch.save(self._predictions, fout)
75 | 
76 |     def add(self, identifier, prediction):
77 |         self._identifiers.append(identifier)
78 |         self._predictions.append(prediction)
79 |         self._identifier_to_element_id[identifier] = len(self._identifiers)
80 | 
81 |     def flush(self):
82 |         self._save_predictions()
83 | 
84 |     def get_all(self):
85 |         return self._identifiers, self._predictions
86 | 
87 |     def get_by_id(self, identifier):
88 |         if identifier not in self._identifier_to_element_id:
89 |             raise PipelineError("Key error: {}".format(identifier))
90 | 
91 |         element_id = self._identifier_to_element_id[identifier]
92 |         return self._predictions[element_id]
93 | 
94 |     def sort_by_id(self):
95 |         result = sorted(zip(self._identifiers, self._predictions), key=lambda x: x[0])
96 |         self._identifiers, self._predictions = list(zip(*result))
97 |         self.flush()
98 | 


--------------------------------------------------------------------------------
/pipeline/storage/state.py:
--------------------------------------------------------------------------------
 1 | from ..core import PipelineError
 2 | 
 3 | import abc
 4 | import pickle
 5 | import os
 6 | 
 7 | 
 8 | class StateStorageBase(abc.ABC):
 9 |     @abc.abstractmethod
10 |     def has_key(self, key: str):
11 |         pass
12 | 
13 |     @abc.abstractmethod
14 |     def get_value(self, key: str):
15 |         pass
16 | 
17 |     @abc.abstractmethod
18 |     def remove_key(self, key: str):
19 |         pass
20 | 
21 |     @abc.abstractmethod
22 |     def set_value(self, key: str, value: object):
23 |         pass
24 | 
25 | 
26 | class StateStorageEmpty(StateStorageBase):
27 |     def set_value(self, key: str, value: object):
28 |         pass
29 | 
30 |     def get_value(self, key: str):
31 |         raise PipelineError("Key error: {}".format(key))
32 | 
33 |     def has_key(self, key: str):
34 |         return False
35 | 
36 |     def remove_key(self, key: str):
37 |         raise PipelineError("Key error: {}".format(key))
38 | 
39 | 
40 | class StateStorageFile(StateStorageBase):
41 |     def __init__(self, path: str):
42 |         self._path = path
43 | 
44 |         if not os.path.exists(path):
45 |             os.makedirs(os.path.dirname(path), exist_ok=True)
46 |             with open(path, "wb") as fout:
47 |                 pickle.dump({}, fout)
48 | 
49 |         with open(path, "rb") as fin:
50 |             self._state = pickle.load(fin)
51 | 
52 |     def _save(self):
53 |         with open(self._path, "wb") as fout:
54 |             pickle.dump(self._state, fout)
55 | 
56 |     def has_key(self, key: str):
57 |         return key in self._state
58 | 
59 |     def get_value(self, key: str):
60 |         if key not in self._state:
61 |             raise PipelineError("Key error: {}".format(key))
62 | 
63 |         return self._state[key]
64 | 
65 |     def set_value(self, key: str, value: object):
66 |         self._state[key] = value
67 | 
68 |         self._save()
69 | 
70 |     def remove_key(self, key: str):
71 |         if key not in self._state:
72 |             raise PipelineError("Key error: {}".format(key))
73 | 
74 |         del self._state[key]
75 | 
76 |         self._save()
77 | 
78 | 


--------------------------------------------------------------------------------
/pipeline/trainers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/pipeline/trainers/__init__.py


--------------------------------------------------------------------------------
/pipeline/trainers/base.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from typing import Iterable
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.optim import Optimizer
  7 | 
  8 | from ..core import PipelineError
  9 | from ..logger import LOGGER
 10 | from ..metrics.base import MetricsCalculatorBase
 11 | from pipeline.schedulers.base import SchedulerWrapperMetricsMeanBase, SchedulerWrapperBase
 12 | from ..storage.state import StateStorageBase
 13 | from ..utils import move_to_device, save_model, load_model
 14 | 
 15 | import os
 16 | 
 17 | 
 18 | class TrainerBase:
 19 |     def __init__(
 20 |             self,
 21 |             model: nn.Module,
 22 |             train_data_loader: Iterable,
 23 |             val_data_loader: Iterable,
 24 |             epoch_count: int,
 25 |             optimizer: Optimizer,
 26 |             scheduler: SchedulerWrapperBase,
 27 |             loss: nn.Module,
 28 |             metrics_calculator: MetricsCalculatorBase,
 29 |             print_frequency: None or int,
 30 |             device: str,
 31 |             model_save_path: str,
 32 |             state_storage: StateStorageBase) -> None:
 33 | 
 34 |         self.model = model.to(device)
 35 |         self.train_data_loader = train_data_loader
 36 |         self.val_data_loader = val_data_loader
 37 |         self.epoch_count = epoch_count
 38 |         self.optimizer = optimizer
 39 |         self.scheduler = scheduler
 40 |         self.loss = loss
 41 |         self.metrics_calculator = metrics_calculator
 42 |         self.print_frequency = print_frequency
 43 |         self.device = device
 44 |         self.model_save_path = model_save_path
 45 |         self.state_storage = state_storage
 46 | 
 47 |     def train_step(self, input_data: torch.Tensor, target: torch.Tensor):
 48 |         input_data = move_to_device(input_data, device=self.device)
 49 |         target = move_to_device(target, device=self.device)
 50 | 
 51 |         model_output = self.model(input_data)
 52 | 
 53 |         self.optimizer.zero_grad()
 54 |         loss = self.loss(model_output, target)
 55 | 
 56 |         loss.backward()
 57 | 
 58 |         self.optimizer.step(closure=None)
 59 | 
 60 |         return loss.cpu().data.numpy()
 61 | 
 62 |     def predict_step(self, input_data: torch.Tensor):
 63 |         input_data = move_to_device(input_data, device=self.device)
 64 |         model_output = self.model(input_data)
 65 |         return model_output
 66 | 
 67 |     def log_train_step(self, epoch_id: int, step_id: int, epoch_time: float, loss: float, mean_loss: float):
 68 |         if self.print_frequency is None or step_id % self.print_frequency == 0:
 69 |             LOGGER.info("[{} s] Epoch {}. Train step {}. Loss {}. Mean loss {}".format(
 70 |                 epoch_time, epoch_id, step_id, loss, mean_loss))
 71 |             return True
 72 | 
 73 |         return False
 74 | 
 75 |     def log_validation_step(self, epoch_id: int, step_id: int, epoch_time: float, loss: float, mean_loss: float):
 76 |         if self.print_frequency is None or step_id % self.print_frequency == 0:
 77 |             LOGGER.info("[{} s] Epoch {}. Validation step {}. Loss {}. Mean loss {}".format(
 78 |                 epoch_time, epoch_id, step_id, loss, mean_loss))
 79 | 
 80 |             return True
 81 | 
 82 |         return False
 83 | 
 84 |     def log_train_epoch(self, epoch_id: int, epoch_time: float, mean_loss: float):
 85 |         LOGGER.info("Training Epoch {} has completed. Time: {}. Mean loss: {}".format(
 86 |             epoch_id, epoch_time, mean_loss))
 87 |         return True
 88 | 
 89 |     def log_validation_epoch(self, epoch_id: int, epoch_time: float, mean_loss: float, metrics: dict):
 90 |         LOGGER.info("Validation Epoch {} has completed. Time: {}. Mean loss: {}. Metrics: {}".format(
 91 |             epoch_id, epoch_time, mean_loss, str(metrics)))
 92 |         return True
 93 | 
 94 |     def run_train_epoch(self, epoch_id: int):
 95 |         self.model.train()
 96 | 
 97 |         start_time = time.time()
 98 |         mean_loss = 0
 99 |         step_count = 0
100 | 
101 |         for step_id, (input_data, target) in enumerate(self.train_data_loader):
102 |             loss = self.train_step(input_data, target)
103 |             epoch_time = time.time() - start_time
104 | 
105 |             mean_loss += loss
106 |             step_count += 1
107 | 
108 |             self.log_train_step(epoch_id, step_id, epoch_time, loss, mean_loss / step_count)
109 | 
110 |         epoch_time = time.time() - start_time
111 |         mean_loss /= max(step_count, 1)
112 | 
113 |         self.log_train_epoch(epoch_id, epoch_time, mean_loss)
114 | 
115 |         return epoch_time, mean_loss
116 | 
117 |     def run_validation_epoch(self, epoch_id: int):
118 |         self.model.eval()
119 | 
120 |         self.metrics_calculator.zero_cache()
121 |         mean_loss = 0
122 |         step_count = 0
123 |         start_time = time.time()
124 | 
125 |         with torch.no_grad():
126 |             for step_id, (input_data, target) in enumerate(self.val_data_loader):
127 |                 target = move_to_device(target, device=self.device)
128 |                 model_output = self.predict_step(input_data)
129 | 
130 |                 loss = self.loss(model_output, target)
131 |                 mean_loss += loss
132 |                 step_count += 1
133 |                 epoch_time = time.time() - start_time
134 | 
135 |                 self.metrics_calculator.add(model_output, target)
136 |                 self.log_validation_step(epoch_id, step_id, epoch_time, loss, mean_loss / step_count)
137 | 
138 |         epoch_time = time.time() - start_time
139 |         mean_loss /= max(step_count, 1)
140 |         metrics = self.metrics_calculator.calculate()
141 | 
142 |         self.log_validation_epoch(epoch_id, epoch_time, mean_loss, metrics)
143 | 
144 |         return epoch_time, mean_loss, metrics
145 | 
146 |     def load_optimizer_state(self):
147 |         if not self.state_storage.has_key("learning_rates"):
148 |             return
149 | 
150 |         learning_rates = self.state_storage.get_value("learning_rates")
151 | 
152 |         for learning_rate, param_group in zip(learning_rates, self.optimizer.param_groups):
153 |             param_group["lr"] = learning_rate
154 | 
155 |     def save_optimizer_state(self):
156 |         learning_rates = []
157 |         for param_group in self.optimizer.param_groups:
158 |             learning_rates.append(float(param_group['lr']))
159 | 
160 |         self.state_storage.set_value("learning_rates", learning_rates)
161 | 
162 |     def save_last_model(self, epoch_id):
163 |         os.makedirs(self.model_save_path, exist_ok=True)
164 |         model_path = os.path.join(self.model_save_path, "epoch_{}".format(epoch_id))
165 |         save_model(self.model, model_path)
166 |         LOGGER.info("Model was saved in {}".format(model_path))
167 | 
168 |     def load_last_model(self, epoch_id):
169 |         last_model_path = os.path.join(self.model_save_path, "epoch_{}".format(epoch_id))
170 |         load_model(self.model, last_model_path)
171 | 
172 |     def run(self):
173 |         start_epoch_id = 0
174 | 
175 |         if self.state_storage.has_key("start_epoch_id"):
176 |             start_epoch_id = self.state_storage.get_value("start_epoch_id")
177 |             try:
178 |                 self.load_last_model(start_epoch_id - 1)
179 |             except:
180 |                 LOGGER.exception("Exception occurs during loading a model. Starting to train a model from scratch...")
181 |         else:
182 |             LOGGER.info("Model not found in {}. Starting to train a model from scratch...".format(self.model_save_path))
183 | 
184 |         self.load_optimizer_state()
185 | 
186 |         epoch_id = start_epoch_id
187 |         while self.epoch_count is None or epoch_id < self.epoch_count:
188 |             _, mean_train_loss = self.run_train_epoch(epoch_id)
189 | 
190 |             if self.val_data_loader is None:
191 |                 if isinstance(self.scheduler, SchedulerWrapperMetricsMeanBase):
192 |                     raise PipelineError("You can't use a scheduler based on metrics without validation data")
193 |                 self.scheduler.step(mean_train_loss, {}, epoch_id)
194 |                 continue
195 | 
196 |             _, mean_validation_loss, validation_metrics = self.run_validation_epoch(epoch_id)
197 |             self.scheduler.step(mean_validation_loss, validation_metrics, epoch_id)
198 | 
199 |             self.state_storage.set_value("start_epoch_id", epoch_id + 1)
200 |             self.save_optimizer_state()
201 |             self.save_last_model(epoch_id)
202 | 
203 |             epoch_id += 1
204 | 


--------------------------------------------------------------------------------
/pipeline/trainers/classification.py:
--------------------------------------------------------------------------------
1 | from .base import TrainerBase
2 | 
3 | 
4 | class TrainerClassification(TrainerBase):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/pipeline/trainers/segmentation.py:
--------------------------------------------------------------------------------
1 | from .base import TrainerBase
2 | 
3 | 
4 | class TrainerSegmentation(TrainerBase):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/pipeline/utils.py:
--------------------------------------------------------------------------------
  1 | from .logger import setup_logger
  2 | 
  3 | from torch.utils.data import DataLoader
  4 | from torch.nn import DataParallel
  5 | 
  6 | import importlib
  7 | import torch
  8 | import os
  9 | 
 10 | 
 11 | def _load_cls(module_path, cls_name):
 12 |     module_path_fixed = module_path
 13 |     if module_path_fixed.endswith(".py"):
 14 |         module_path_fixed = module_path_fixed[:-3]
 15 |     module_path_fixed = module_path_fixed.replace("/", ".")
 16 |     module = importlib.import_module(module_path_fixed)
 17 |     assert hasattr(module, cls_name), "{} file should contain {} class".format(module_path, cls_name)
 18 | 
 19 |     cls = getattr(module, cls_name)
 20 |     return cls
 21 | 
 22 | 
 23 | def load_config(config_path: str):
 24 |     return _load_cls(config_path, "Config")()
 25 | 
 26 | 
 27 | def load_predict_config(config_path: str):
 28 |     return _load_cls(config_path, "PredictConfig")()
 29 | 
 30 | 
 31 | def move_to_device(tensor: list or tuple or torch.Tensor, device: str):
 32 |     if isinstance(tensor, list):
 33 |         return [move_to_device(elem, device=device) for elem in tensor]
 34 |     if isinstance(tensor, tuple):
 35 |         return (move_to_device(elem, device=device) for elem in tensor)
 36 |     return tensor.to(device)
 37 | 
 38 | 
 39 | def get_path(path):
 40 |     return os.path.expanduser(path)
 41 | 
 42 | 
 43 | def save_model(model, path):
 44 |     if isinstance(model, DataParallel):
 45 |         model = model.module
 46 | 
 47 |     with open(path, "wb") as fout:
 48 |         torch.save(model.state_dict(), fout)
 49 | 
 50 | 
 51 | def load_model(model, path):
 52 |     with open(path, "rb") as fin:
 53 |         state_dict = torch.load(fin)
 54 | 
 55 |     model.load_state_dict(state_dict)
 56 | 
 57 | 
 58 | def run_train(config):
 59 |     train_data_loader = DataLoader(
 60 |         config.train_dataset,
 61 |         batch_size=config.batch_size,
 62 |         shuffle=True,
 63 |         pin_memory=True,
 64 |         num_workers=config.num_workers)
 65 | 
 66 |     val_data_loader = DataLoader(
 67 |         config.val_dataset,
 68 |         batch_size=config.batch_size,
 69 |         shuffle=False,
 70 |         num_workers=config.num_workers)
 71 | 
 72 |     model = config.model
 73 | 
 74 |     model_save_path = config.model_save_path
 75 |     os.makedirs(model_save_path, exist_ok=True)
 76 | 
 77 |     logger_path = os.path.join(model_save_path, "log.txt")
 78 |     setup_logger(out_file=logger_path)
 79 | 
 80 |     trainer = config.trainer_cls(
 81 |         model=model,
 82 |         train_data_loader=train_data_loader,
 83 |         val_data_loader=val_data_loader,
 84 |         epoch_count=config.epoch_count,
 85 |         optimizer=config.optimizer,
 86 |         scheduler=config.scheduler,
 87 |         loss=config.loss,
 88 |         metrics_calculator=config.metrics_calculator,
 89 |         print_frequency=config.print_frequency,
 90 |         device=config.device,
 91 |         model_save_path=config.model_save_path,
 92 |         state_storage=config.state_storage
 93 |     )
 94 | 
 95 |     trainer.run()
 96 | 
 97 | 
 98 | def run_predict(config):
 99 |     data_loader = DataLoader(
100 |         config.dataset,
101 |         batch_size=config.batch_size,
102 |         shuffle=False,
103 |         pin_memory=True,
104 |         num_workers=config.num_workers)
105 | 
106 |     model = config.model
107 | 
108 |     model_save_path = config.model_save_path
109 |     assert os.path.exists(model_save_path), "{} does not exist".format(model_save_path)
110 | 
111 |     logger_path = os.path.join(model_save_path, "log_predict.txt")
112 |     setup_logger(out_file=logger_path)
113 | 
114 |     predictor = config.predictor_cls(
115 |         model=model,
116 |         data_loader=data_loader,
117 |         print_frequency=config.print_frequency,
118 |         device=config.device,
119 |         model_save_path=model_save_path,
120 |         predictions_storage=config.predictions_storage)
121 | 
122 |     predictor.run()
123 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.0.0
2 | pandas
3 | numpy
4 | torchvision
5 | scikit-learn
6 | Pillow
7 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PavelOstyakov/pipeline/236c050af3be9dbb534e959589040e9433501e2b/tests/__init__.py


--------------------------------------------------------------------------------
/tests/common.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 | import os
3 | 
4 | 
5 | def make_temp_path():
6 |     _, path = tempfile.mkstemp()
7 |     os.remove(path)
8 |     return path
9 | 


--------------------------------------------------------------------------------
/tests/test_metrics.py:
--------------------------------------------------------------------------------
 1 | from pipeline.metrics.accuracy import MetricsCalculatorAccuracy
 2 | from pipeline.core import PipelineError
 3 | 
 4 | 
 5 | import pytest
 6 | 
 7 | 
 8 | class TestClassificationMetrics:
 9 |     def test_accuracy(self):
10 |         metrics_calculator = MetricsCalculatorAccuracy(border=0.4)
11 | 
12 |         with pytest.raises(PipelineError):
13 |             metrics_calculator.calculate()
14 | 
15 | 


--------------------------------------------------------------------------------
/tests/test_schedulers.py:
--------------------------------------------------------------------------------
 1 | from pipeline.schedulers.learning_rate.reduce_on_plateau import SchedulerWrapperLossOnPlateau, SchedulerWrapperMetricsMeanOnPlateau
 2 | 
 3 | from torch.optim import Adam
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class TestReduceLROnPlateau:
 8 |     def test_wrapper_loss(self):
 9 |         first_layer = nn.Linear(10, 5)
10 |         second_layer = nn.Linear(5, 1)
11 | 
12 |         optimizer = Adam([{"params": first_layer.parameters(), "lr": 1},
13 |                           {"params": second_layer.parameters(), "lr": 2}])
14 |         scheduler = SchedulerWrapperLossOnPlateau(optimizer, factor=0.5, patience=1, min_lr=0.1, cooldown=2)
15 | 
16 |         assert optimizer.param_groups[0]["lr"] == 1
17 |         assert optimizer.param_groups[1]["lr"] == 2
18 | 
19 |         scheduler.step(loss=10, metrics={"a": 5}, epoch_id=0)
20 |         assert optimizer.param_groups[0]["lr"] == 1
21 |         assert optimizer.param_groups[1]["lr"] == 2
22 | 
23 |         scheduler.step(loss=11, metrics={"a": 3}, epoch_id=1)
24 |         assert optimizer.param_groups[0]["lr"] == 1
25 |         assert optimizer.param_groups[1]["lr"] == 2
26 | 
27 |         scheduler.step(loss=12, metrics={"a": 1}, epoch_id=2)
28 |         assert optimizer.param_groups[0]["lr"] == 0.5
29 |         assert optimizer.param_groups[1]["lr"] == 1
30 | 
31 |         scheduler.step(loss=13, metrics={"a": 2}, epoch_id=3)
32 |         scheduler.step(loss=14, metrics={"a": 5}, epoch_id=4)
33 |         scheduler.step(loss=14, metrics={"a": 2}, epoch_id=5)
34 | 
35 |         assert optimizer.param_groups[0]["lr"] == 0.5
36 |         assert optimizer.param_groups[1]["lr"] == 1
37 | 
38 |         scheduler.step(loss=14, metrics={"a": 100}, epoch_id=6)
39 |         assert optimizer.param_groups[0]["lr"] == 0.25
40 |         assert optimizer.param_groups[1]["lr"] == 0.5
41 | 
42 |         scheduler.step(loss=9, metrics={"a": 21}, epoch_id=7)
43 |         scheduler.step(loss=8, metrics={"a": 21}, epoch_id=7)
44 | 
45 |         assert optimizer.param_groups[0]["lr"] == 0.25
46 |         assert optimizer.param_groups[1]["lr"] == 0.5
47 | 
48 |         scheduler.step(loss=13, metrics={"a": 3}, epoch_id=8)
49 | 
50 |         assert optimizer.param_groups[0]["lr"] == 0.25
51 |         assert optimizer.param_groups[1]["lr"] == 0.5
52 | 
53 |         scheduler.step(loss=14, metrics=None, epoch_id=9)
54 | 
55 |         assert optimizer.param_groups[0]["lr"] == 0.125
56 |         assert optimizer.param_groups[1]["lr"] == 0.25
57 | 
58 |         for epoch_id in range(10, 30):
59 |             scheduler.step(loss=14, metrics={"absd": "asdasd"}, epoch_id=epoch_id)
60 | 
61 |         assert optimizer.param_groups[0]["lr"] == 0.1
62 |         assert optimizer.param_groups[1]["lr"] == 0.1
63 | 
64 |     def test_wrapper_metrics(self):
65 |         model = nn.Linear(10, 1)
66 | 
67 |         optimizer = Adam(model.parameters(), lr=1)
68 |         scheduler = SchedulerWrapperMetricsMeanOnPlateau(optimizer, factor=0.5, patience=0, min_lr=0.1, cooldown=0)
69 | 
70 |         assert optimizer.param_groups[0]["lr"] == 1
71 | 
72 |         scheduler.step(loss=None, metrics={"a": 1, "b": 1}, epoch_id=0)
73 |         assert optimizer.param_groups[0]["lr"] == 1
74 | 
75 |         scheduler.step(loss="abacaba", metrics={"a": 1, "b": 0}, epoch_id=1)
76 |         scheduler.step(loss=-10, metrics={"a": 1, "b": 1}, epoch_id=2)
77 |         assert optimizer.param_groups[0]["lr"] == 0.25
78 | 
79 |         scheduler.step(loss=123, metrics={"a": 1, "b": 2}, epoch_id=3)
80 |         assert optimizer.param_groups[0]["lr"] == 0.25
81 | 
82 |         scheduler.step(loss=0, metrics={"a": 2}, epoch_id=4)
83 |         assert optimizer.param_groups[0]["lr"] == 0.25
84 | 
85 |         scheduler.step(loss=0, metrics={"aasda": 1.1}, epoch_id=5)
86 |         assert optimizer.param_groups[0]["lr"] == 0.125
87 | 
88 |         for epoch_id in range(6, 20):
89 |             scheduler.step(loss=0, metrics={"c": 1}, epoch_id=epoch_id)
90 |             assert optimizer.param_groups[0]["lr"] == 0.1
91 | 


--------------------------------------------------------------------------------
/tests/test_storage.py:
--------------------------------------------------------------------------------
 1 | from .common import make_temp_path
 2 | 
 3 | from pipeline.storage.state import StateStorageEmpty, StateStorageFile
 4 | from pipeline.core import PipelineError
 5 | 
 6 | import pytest
 7 | 
 8 | 
 9 | class TestStateStorageEmpty:
10 |     def test_set_value(self):
11 |         state_storage = StateStorageEmpty()
12 |         state_storage.set_value("key_name", 123)
13 | 
14 |     def test_get_value(self):
15 |         state_storage = StateStorageEmpty()
16 | 
17 |         with pytest.raises(PipelineError):
18 |             state_storage.get_value("some_key")
19 | 
20 |         state_storage.set_value("some_key", 123)
21 |         with pytest.raises(PipelineError):
22 |             state_storage.get_value("some_key")
23 | 
24 |     def test_has_key(self):
25 |         state_storage = StateStorageEmpty()
26 | 
27 |         assert not state_storage.has_key("key")
28 |         state_storage.set_value("key", "abacaba")
29 | 
30 |         assert not state_storage.has_key("key")
31 | 
32 |     def test_remove_key(self):
33 |         state_storage = StateStorageEmpty()
34 | 
35 |         with pytest.raises(PipelineError):
36 |             state_storage.remove_key("abacaba")
37 | 
38 |         state_storage.set_value("abacaba", 9.23)
39 |         with pytest.raises(PipelineError):
40 |             state_storage.remove_key("abacaba")
41 | 
42 | 
43 | class TestStateStorageFile:
44 |     def test_basic(self):
45 |         path = make_temp_path()
46 |         state_storage = StateStorageFile(path)
47 | 
48 |         assert not state_storage.has_key("key")
49 | 
50 |         with pytest.raises(PipelineError):
51 |             state_storage.remove_key("abacaba")
52 | 
53 |         with pytest.raises(PipelineError):
54 |             state_storage.get_value("some_key")
55 | 
56 |     def test_save_load(self):
57 |         path = make_temp_path()
58 |         state_storage = StateStorageFile(path)
59 | 
60 |         state_storage.set_value("aba", 123)
61 |         assert state_storage.get_value("aba") == 123
62 |         assert state_storage.has_key("aba")
63 | 
64 |         state_storage = StateStorageFile(path)
65 |         assert state_storage.get_value("aba") == 123
66 |         assert state_storage.has_key("aba")
67 | 
68 |         state_storage.remove_key("aba")
69 |         assert not state_storage.has_key("aba")
70 | 
71 |         state_storage = StateStorageFile(path)
72 |         assert not state_storage.has_key("aba")
73 | 


--------------------------------------------------------------------------------