├── .gitignore
├── LICENSE
├── README.md
├── baselines
└── rule-based-text-classifier_vader.ipynb
├── benchmark
├── pandas-pyarrow
│ ├── pandas2-pyarrow.ipynb
│ └── results.png
├── pytorch-lightning-m1-gpu
│ ├── README.md
│ ├── main.py
│ ├── my_classifier_template
│ │ ├── __init__.py
│ │ ├── dataset.py
│ │ └── model.py
│ ├── requirements.txt
│ └── setup.py
└── pytorch-m1-gpu
│ ├── README.md
│ ├── lenet-mnist-results
│ ├── 1080ti.txt
│ ├── 2080ti.txt
│ ├── 3090.txt
│ ├── intel-cpu.txt
│ ├── m1-cpu.txt
│ ├── m1-gpu.txt
│ ├── m1pro-cpu.txt
│ └── m1pro-gpu.txt
│ ├── lenet-mnist.py
│ ├── mlp-mnist.py
│ ├── mlp-results
│ ├── m1-cpu-mlp.txt
│ ├── m1-gpu-mlp.txt
│ ├── mlp-1080ti.txt
│ ├── mlp-2080ti.txt
│ ├── mlp-3090.txt
│ ├── mlp-intel-cpu.txt
│ ├── mlp-m1max-gpu_torch1.13.0.dev20220522.txt
│ ├── mlp-m1pro-cpu_torch1.12.0.dev20220518.txt
│ ├── mlp-m1pro-cpu_torch1.13.0.dev20220522.txt
│ ├── mlp-m1pro-gpu_torch1.12.0.dev20220518.txt
│ └── mlp-m1pro-gpu_torch1.13.0.dev20220522.txt
│ ├── vgg16-cifar10-results
│ ├── 1080ti.txt
│ ├── 2080ti.txt
│ ├── 3090.txt
│ ├── intel-cpu.txt
│ ├── m1-plain-gpu_torch 1.12.0.dev20220518.txt
│ ├── m1-plain-gpu_torch-1.13.0.dev20220522.txt
│ ├── m1max-gpu_torch-1.12.0.dev20220518.txt
│ ├── m1max-gpu_torch-1.13.0.dev20220522.txt
│ ├── m1pro-cpu_torch 1.12.0.dev20220518.txt
│ ├── m1pro-cpu_torch1.13.0.dev20220522.txt
│ ├── m1pro-gpu_torch 1.12.0.dev20220518.txt
│ ├── m1pro-gpu_torch1.13.0.dev20220522.txt
│ ├── m1ultra-cpu_torch-1.13.0.dev20220522.txt
│ ├── m1ultra-gpu_torch-1.13.0.dev20220522.txt
│ ├── titanv-cpu.txt
│ └── titanv.txt
│ └── vgg16-cifar10.py
├── categorical-features
├── data
│ └── iris_mod.csv
├── gradient-boosting-with-categorical-feat.ipynb
├── sklearn-categorical-numerical-mix.ipynb
├── sklearn-onehot-encoding-mixedtype-df.ipynb
├── sklearn-ordinal-encoding-mixedtype-df.ipynb
└── sklearn-permutation-importance.ipynb
├── cloud-resources
└── xgboost-lightning-gpu
│ ├── README.md
│ ├── my_xgboost_classifier.py
│ └── xgboost-cloud-gpu.py
├── demos
├── basic-pytorch-cnn-for-3-ele-pytorch-video.ipynb
├── data
│ └── cat
│ │ └── cat.jpeg
├── imagenet_int_to_label.py
└── torchvision-efficientnet-v2.ipynb
├── evaluation
├── ci-for-ml
│ ├── ci-simulation-repeated.ipynb
│ ├── ci-simulation-repeated
│ │ ├── 1_normal_approx.py
│ │ ├── 2.1_bootstrap_t.py
│ │ ├── 2.2_bootstrap_percentile.py
│ │ ├── 2.3_bootstrap_632.py
│ │ ├── 3_bootstrap_test.py
│ │ └── get_dataset.py
│ ├── ci-simulation.ipynb
│ └── confidence-intervals-for-ml.ipynb
└── lightning-jupyter-tensorboard
│ ├── notebook.ipynb
│ └── shared_utilities.py
├── hyperparameter-tuning-methods
├── 01.1-gridsearch-decisiontree-example.ipynb
├── 01.2-gridsearch-stacking-example.ipynb
├── 02.1-randomsearch-decisiontree-example.ipynb
├── 02.2-randomsearch-stacking-example.ipynb
├── 03.1-hyperopt-decisiontree-example.ipynb
├── 03.2-hyperopt-xgboost-example.ipynb
├── 04.1-optuna-decisiontree-example.ipynb
├── 04.2-optuna-xgboost-example.ipynb
├── 04.3-optuna-lightgbm-example.ipynb
├── 05.1-successive-halving-decisiontree.ipynb
├── 05.2-successive-halving-stacking.ipynb
├── 06.1-genetic-opt.ipynb
├── 07.1-orion_wip.ipynb
├── figures
│ └── orion-recommendations.png
├── lightning-hpo-optuna
│ ├── README.md
│ ├── mlp_cli2.py
│ ├── shared_utilities.py
│ └── sweeper.py
└── sklearn-parameter-sampler.ipynb
├── learning-rates
└── scheduler-comparison
│ ├── 1-baseline.ipynb
│ ├── 2-step-decay.ipynb
│ ├── 3-cosine-restarts.ipynb
│ ├── 4-cosine-epoch-decay.ipynb
│ ├── 5-cosine-batch-decay.ipynb
│ ├── 6-cosine-batch-decay-warmstart.ipynb
│ ├── overview.png
│ └── shared_utilities.py
├── losses
└── pytorch-loss-functions
│ ├── binary-cross-entropy-in-pytorch.ipynb
│ └── vgg16-smile-classifier
│ ├── dataset.py
│ ├── model.py
│ ├── vgg16-bceloss.ipynb
│ └── vgg16-bcewithlogitsloss.ipynb
├── math
└── Four-matrix-multiplications.ipynb
├── regression
└── pytorch-regression-model.ipynb
└── templates
├── lightning-cli
├── cli-configurable
│ ├── README.md
│ ├── mlp_cli2.py
│ └── shared_utilities.py
└── cli-simple
│ ├── README.md
│ ├── mlp_cli.py
│ └── shared_utilities.py
├── modern-early-stop-with-checkpointing
├── checkpointing.ipynb
├── logs
│ └── my-model
│ │ └── version_0
│ │ ├── checkpoints
│ │ ├── epoch=8-step=4050.ckpt
│ │ └── last.ckpt
│ │ ├── hparams.yaml
│ │ └── metrics.csv
└── shared_utilities.py
└── pl_classifier
├── README.md
├── main.py
├── my_classifier_template
├── __init__.py
├── dataset.py
├── model.py
└── plotting.py
├── notebooks
├── 4_inspecting-the-dataset.ipynb
└── 6_evaluating-the-results.ipynb
├── requirements.txt
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | benchmark/pytorch-m1-gpu/data
2 |
3 | # macOS
4 | .DS_Store
5 |
6 | # Notebooks
7 | .ipynb_checkpoints
8 |
9 | # Byte-compiled / optimized / DLL files
10 | __pycache__/
11 | *.py[cod]
12 | *$py.class
13 |
14 | # C extensions
15 | *.so
16 |
17 | # Distribution / packaging
18 | .Python
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | wheels/
31 | pip-wheel-metadata/
32 | share/python-wheels/
33 | *.egg-info/
34 | .installed.cfg
35 | *.egg
36 | MANIFEST
37 |
38 | # PyInstaller
39 | # Usually these files are written by a python script from a template
40 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
41 | *.manifest
42 | *.spec
43 |
44 | # Installer logs
45 | pip-log.txt
46 | pip-delete-this-directory.txt
47 |
48 | # Unit test / coverage reports
49 | htmlcov/
50 | .tox/
51 | .nox/
52 | .coverage
53 | .coverage.*
54 | .cache
55 | nosetests.xml
56 | coverage.xml
57 | *.cover
58 | *.py,cover
59 | .hypothesis/
60 | .pytest_cache/
61 |
62 | # Translations
63 | *.mo
64 | *.pot
65 |
66 | # Django stuff:
67 | *.log
68 | local_settings.py
69 | db.sqlite3
70 | db.sqlite3-journal
71 |
72 | # Flask stuff:
73 | instance/
74 | .webassets-cache
75 |
76 | # Scrapy stuff:
77 | .scrapy
78 |
79 | # Sphinx documentation
80 | docs/_build/
81 |
82 | # PyBuilder
83 | target/
84 |
85 | # Jupyter Notebook
86 | .ipynb_checkpoints
87 |
88 | # IPython
89 | profile_default/
90 | ipython_config.py
91 |
92 | # pyenv
93 | .python-version
94 |
95 | # pipenv
96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
99 | # install all needed dependencies.
100 | #Pipfile.lock
101 |
102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
103 | __pypackages__/
104 |
105 | # Celery stuff
106 | celerybeat-schedule
107 | celerybeat.pid
108 |
109 | # SageMath parsed files
110 | *.sage.py
111 |
112 | # Environments
113 | .env
114 | .venv
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 |
121 | # Spyder project settings
122 | .spyderproject
123 | .spyproject
124 |
125 | # Rope project settings
126 | .ropeproject
127 |
128 | # mkdocs documentation
129 | /site
130 |
131 | # mypy
132 | .mypy_cache/
133 | .dmypy.json
134 | dmypy.json
135 |
136 | # Pyre type checker
137 | .pyre/
138 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2022, Sebastian Raschka
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # machine-learning-notes
2 | Collection of useful machine learning codes and snippets (originally intended for my personal use)
3 |
--------------------------------------------------------------------------------
/benchmark/pandas-pyarrow/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/benchmark/pandas-pyarrow/results.png
--------------------------------------------------------------------------------
/benchmark/pytorch-lightning-m1-gpu/README.md:
--------------------------------------------------------------------------------
1 | This is some simple benchmark code for evaluating MobileNet v3 (large) on CIFAR-10 (rescaled to ImageNet size, 224x224).
2 |
3 |
4 |
5 | You can set up the experiments as follows:
6 |
7 |
8 |
9 | ## 1 Set up a fresh environment
10 |
11 | ```
12 | conda create -n clf-template python=3.8
13 | conda activate clf-template
14 | ```
15 |
16 |
17 |
18 | ## 2 Install requirements
19 |
20 |
21 | ```
22 | pip install -r requirements.txt
23 | ```
24 |
25 |
26 |
27 | Recommended: upgrade PyTorch and PyTorch Lightning to the latest versions, e.g.,
28 |
29 | ```
30 | pip install torch --upgrade
31 | pip install pytorch_lighting --upgrade
32 | ```
33 |
34 |
35 |
36 |
37 |
38 | ## 3 Install as Python Package
39 |
40 | This is optional and only required if you want to run the code outside this reposistory
41 |
42 | Assuming you are inside this folder, run
43 |
44 | ```
45 | pip install -e .
46 | ```
47 |
48 |
49 |
50 | ## 4 Install Nightly Releases with M1 GPU support
51 |
52 |
53 | TBD
54 |
55 |
56 |
57 | # Benchmark results
58 |
59 |
60 |
61 | You can run the following codes to replicate the benchmarks.
62 |
63 |
64 |
65 | ## GTX 1080Ti
66 |
67 | On a workstation with 4 x GTX 1080Ti cards and Intel Xeon E5-2650 (12 core)
68 |
69 |
70 |
71 | 4 GPUs
72 |
73 | ```
74 | python main.py --output_path results \
75 | --mixed_precision false \
76 | --num_epochs 3 \
77 | --batch_size 256 \
78 | --num_epochs 3 \
79 | --num_devices 4 \
80 | --log_accuracy false \
81 | --accelerator gpu \
82 | --strategy ddp_spawn
83 | ```
84 |
85 | Training time: 2.20 min
86 | Inference time (test set): 0.32 min
87 |
88 | ---
89 |
90 |
91 |
92 | 1 GPU
93 |
94 | ```
95 | python main.py --output_path results \
96 | --mixed_precision false \
97 | --num_epochs 3 \
98 | --batch_size 128 \
99 | --num_epochs 3 \
100 | --num_devices 1 \
101 | --log_accuracy false \
102 | --accelerator gpu \
103 | ```
104 |
105 | Training time: 6.47 min
106 | Inference time (test set): 0.11 min
107 |
108 | ---
109 |
110 |
111 |
112 | Multi-CPU with `ddp_spawn`
113 |
114 | ```
115 | python main.py --output_path results \
116 | --mixed_precision false \
117 | --num_epochs 3 \
118 | --batch_size 256 \
119 | --num_epochs 3 \
120 | --num_devices auto \
121 | --log_accuracy false \
122 | --accelerator cpu \
123 | --strategy ddp_spawn
124 | ```
125 |
126 | Training time:
127 | Inference time (test set):
128 |
129 | ---
130 |
131 |
132 |
133 | 1 CPU
134 |
135 | ```
136 | python main.py --output_path results \
137 | --mixed_precision false \
138 | --num_epochs 3 \
139 | --batch_size 256 \
140 | --num_epochs 3 \
141 | --log_accuracy false \
142 | --num_devices 1 \
143 | --accelerator cpu \
144 | ```
145 |
146 | Training time:
147 | Inference time (test set):
148 |
149 | ---
150 |
151 |
152 |
153 | ## RTX 2080Ti
154 |
155 | python main.py --output_path results \
156 | --mixed_precision false \
157 | --num_epochs 3 \
158 | --batch_size 128 \
159 | --num_epochs 3 \
160 | --device_numbers 1,2,3,5 \
161 | --log_accuracy false \
162 | --accelerator gpu \
163 | --strategy ddp_spawn
164 |
165 | 1.56 min
166 |
167 | 0.38
168 |
169 | python main.py --output_path results \
170 | --mixed_precision true \
171 | --num_epochs 3 \
172 | --batch_size 128 \
173 | --num_epochs 3 \
174 | --device_numbers 1,2,3,5 \
175 | --log_accuracy false \
176 | --accelerator gpu \
177 | --strategy ddp_spawn
178 |
179 | 1.42 min
180 |
181 | 0.44
182 |
183 | python main.py --output_path results \
184 | --mixed_precision true \
185 | --num_epochs 3 \
186 | --batch_size 128 \
187 | --num_epochs 3 \
188 | --num_devices 1 \
189 | --log_accuracy false \
190 | --accelerator gpu \
191 | --strategy ddp_spawn
192 |
193 |
194 |
195 | ## M1 Pro
196 |
197 | TBD
198 |
--------------------------------------------------------------------------------
/benchmark/pytorch-lightning-m1-gpu/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import pytorch_lightning as pl
3 | from pytorch_lightning.callbacks import ModelCheckpoint
4 | from pytorch_lightning.loggers import CSVLogger
5 | import time
6 | import torch
7 | from torchvision import transforms
8 | from watermark import watermark
9 |
10 | from my_classifier_template.dataset import Cifar10DataModule
11 | from my_classifier_template.model import LightningClassifier
12 |
13 |
14 | def parse_cmdline_args(parser=None):
15 |
16 | if parser is None:
17 | parser = argparse.ArgumentParser()
18 |
19 | parser.add_argument('--accelerator',
20 | type=str,
21 | default="auto")
22 |
23 | parser.add_argument('--batch_size',
24 | type=int,
25 | default=32)
26 |
27 | parser.add_argument('--data_path',
28 | type=str,
29 | default='./data')
30 |
31 | parser.add_argument('--learning_rate',
32 | type=float,
33 | default=0.0005)
34 |
35 | parser.add_argument('--log_accuracy',
36 | type=str,
37 | choices=("true", "false"),
38 | default="true")
39 |
40 | parser.add_argument('--mixed_precision',
41 | type=str,
42 | choices=("true", "false"),
43 | default="true")
44 |
45 | parser.add_argument('--num_epochs',
46 | type=int,
47 | default=10)
48 |
49 | parser.add_argument('--num_workers',
50 | type=int,
51 | default=3)
52 |
53 | parser.add_argument('--output_path',
54 | type=str,
55 | required=True)
56 |
57 | parser.add_argument('--pretrained',
58 | type=str,
59 | choices=("true", "false"),
60 | default="false")
61 |
62 | parser.add_argument('--num_devices',
63 | nargs="+",
64 | default="auto")
65 |
66 | parser.add_argument('--device_numbers',
67 | type=str,
68 | default="")
69 |
70 | parser.add_argument('--random_seed',
71 | type=int,
72 | default=-1)
73 |
74 | parser.add_argument('--strategy',
75 | type=str,
76 | default="")
77 |
78 | parser.set_defaults(feature=True)
79 | args = parser.parse_args()
80 |
81 | if not args.strategy:
82 | args.strategy = None
83 |
84 | if args.num_devices != "auto":
85 | args.devices = int(args.num_devices[0])
86 | if args.device_numbers:
87 | args.devices = [int(i) for i in args.device_numbers.split(',')]
88 |
89 | d = {'true': True,
90 | 'false': False}
91 |
92 | args.log_accuracy = d[args.log_accuracy]
93 | args.pretrained = d[args.pretrained]
94 | args.mixed_precision = d[args.mixed_precision]
95 | if args.mixed_precision:
96 | args.mixed_precision = 16
97 | else:
98 | args.mixed_precision = 32
99 |
100 | return args
101 |
102 |
103 | if __name__ == "__main__":
104 |
105 | print(watermark())
106 | print(watermark(packages="torch,pytorch_lightning"))
107 |
108 | parser = argparse.ArgumentParser()
109 | args = parse_cmdline_args(parser)
110 |
111 | torch.manual_seed(args.random_seed)
112 |
113 | custom_train_transform = transforms.Compose(
114 | [
115 | transforms.Resize((256, 256)),
116 | transforms.RandomCrop((224, 224)),
117 | transforms.ToTensor(),
118 | ]
119 | )
120 |
121 | custom_test_transform = transforms.Compose(
122 | [
123 | transforms.Resize((256, 256)),
124 | transforms.CenterCrop((224, 224)),
125 | transforms.ToTensor(),
126 | ]
127 | )
128 |
129 | data_module = Cifar10DataModule(
130 | batch_size=args.batch_size,
131 | data_path=args.data_path,
132 | num_workers=args.num_workers,
133 | train_transform=custom_train_transform,
134 | test_transform=custom_test_transform)
135 |
136 | pytorch_model = torch.hub.load(
137 | 'pytorch/vision:v0.11.0',
138 | 'mobilenet_v3_large',
139 | pretrained=args.pretrained)
140 |
141 | pytorch_model.classifier[-1] = torch.nn.Linear(
142 | in_features=1280, out_features=10 # as in original
143 | ) # number of class labels in Cifar-10)
144 |
145 | lightning_model = LightningClassifier(
146 | pytorch_model, learning_rate=args.learning_rate, log_accuracy=args.log_accuracy)
147 |
148 | if args.log_accuracy:
149 | callbacks = [
150 | ModelCheckpoint(
151 | save_top_k=1, mode="max", monitor="valid_acc"
152 | ) # save top 1 model
153 | ]
154 | else:
155 | callbacks = [
156 | ModelCheckpoint(
157 | save_top_k=1, mode="min", monitor="valid_loss"
158 | ) # save top 1 model
159 | ]
160 |
161 | logger = CSVLogger(save_dir=args.output_path, name="my-model")
162 |
163 | trainer = pl.Trainer(
164 | max_epochs=args.num_epochs,
165 | callbacks=callbacks,
166 | accelerator=args.accelerator,
167 | devices=args.devices,
168 | logger=logger,
169 | strategy=args.strategy,
170 | precision=args.mixed_precision,
171 | deterministic=False,
172 | log_every_n_steps=10,
173 | )
174 |
175 | start_time = time.time()
176 | trainer.fit(model=lightning_model, datamodule=data_module)
177 |
178 | train_time = time.time()
179 | runtime = (train_time - start_time) / 60
180 | print(f"Training took {runtime:.2f} min.")
181 |
182 | # setup data on host machine
183 | data_module.prepare_data()
184 | data_module.setup()
185 |
186 | before = time.time()
187 | val_acc = trainer.test(dataloaders=data_module.val_dataloader())
188 | runtime = (time.time() - before) / 60
189 | print(f"Inference on the validation set took {runtime:.2f} min.")
190 |
191 | before = time.time()
192 | test_acc = trainer.test(dataloaders=data_module.test_dataloader())
193 | runtime = (time.time() - before) / 60
194 | print(f"Inference on the test set took {runtime:.2f} min.")
195 |
196 | runtime = (time.time() - start_time) / 60
197 | print(f"The total runtime was {runtime:.2f} min.")
198 |
199 | print("Validation accuracy:", val_acc)
200 | print("Test accuracy:", test_acc)
201 |
--------------------------------------------------------------------------------
/benchmark/pytorch-lightning-m1-gpu/my_classifier_template/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/benchmark/pytorch-lightning-m1-gpu/my_classifier_template/__init__.py
--------------------------------------------------------------------------------
/benchmark/pytorch-lightning-m1-gpu/my_classifier_template/dataset.py:
--------------------------------------------------------------------------------
1 | import pytorch_lightning as pl
2 | from torchvision import datasets
3 | from torch.utils.data.dataset import random_split
4 | from torch.utils.data import DataLoader
5 | from torchvision import transforms
6 |
7 |
8 | class Cifar10DataModule(pl.LightningDataModule):
9 | def __init__(self, batch_size, train_transform=None, test_transform=None, num_workers=4, data_path="./"):
10 | super().__init__()
11 | self.data_path = data_path
12 | self.batch_size = batch_size
13 | self.num_workers = num_workers
14 | self.custom_train_transform = train_transform
15 | self.custom_test_transform = test_transform
16 |
17 | def prepare_data(self):
18 | datasets.CIFAR10(root=self.data_path, download=True)
19 | return
20 |
21 | def setup(self, stage=None):
22 |
23 | if self.custom_train_transform is None:
24 | self.train_transform = transforms.Compose(
25 | [
26 | transforms.Resize((70, 70)),
27 | transforms.RandomCrop((64, 64)),
28 | transforms.ToTensor(),
29 | ]
30 | )
31 | else:
32 | self.train_transform = self.custom_train_transform
33 |
34 | if self.custom_train_transform is None:
35 | self.test_transform = transforms.Compose(
36 | [
37 | transforms.Resize((70, 70)),
38 | transforms.CenterCrop((64, 64)),
39 | transforms.ToTensor(),
40 | ]
41 | )
42 | else:
43 | self.test_transform = self.custom_test_transform
44 |
45 | train = datasets.CIFAR10(
46 | root=self.data_path,
47 | train=True,
48 | transform=self.train_transform,
49 | download=False,
50 | )
51 |
52 | self.test = datasets.CIFAR10(
53 | root=self.data_path,
54 | train=False,
55 | transform=self.test_transform,
56 | download=False,
57 | )
58 |
59 | self.train, self.valid = random_split(train, lengths=[45000, 5000])
60 |
61 | def train_dataloader(self):
62 | train_loader = DataLoader(
63 | dataset=self.train,
64 | batch_size=self.batch_size,
65 | drop_last=True,
66 | shuffle=True,
67 | persistent_workers=True,
68 | num_workers=self.num_workers,
69 | )
70 | return train_loader
71 |
72 | def val_dataloader(self):
73 | valid_loader = DataLoader(
74 | dataset=self.valid,
75 | batch_size=self.batch_size,
76 | drop_last=False,
77 | persistent_workers=True,
78 | shuffle=False,
79 | num_workers=self.num_workers,
80 | )
81 | return valid_loader
82 |
83 | def test_dataloader(self):
84 | test_loader = DataLoader(
85 | dataset=self.test,
86 | batch_size=self.batch_size,
87 | drop_last=False,
88 | persistent_workers=True,
89 | shuffle=False,
90 | num_workers=self.num_workers,
91 | )
92 | return test_loader
--------------------------------------------------------------------------------
/benchmark/pytorch-lightning-m1-gpu/my_classifier_template/model.py:
--------------------------------------------------------------------------------
1 |
2 | import pytorch_lightning as pl
3 | import torch
4 | import torchmetrics
5 |
6 |
7 | # LightningModule that receives a PyTorch model as input
8 | class LightningClassifier(pl.LightningModule):
9 | def __init__(self, model, learning_rate, log_accuracy):
10 | super().__init__()
11 |
12 | self.log_accuracy = log_accuracy
13 |
14 | # Note that the other __init__ parameters will be available as
15 | # self.hparams.argname after calling self.save_hyperparameters below
16 |
17 | # The inherited PyTorch module
18 | self.model = model
19 | if hasattr(model, "dropout_proba"):
20 | self.dropout_proba = model.dropout_proba
21 |
22 | # Save settings and hyperparameters to the log directory
23 | # but skip the model parameters
24 | self.save_hyperparameters(ignore=["model"])
25 |
26 | # Set up attributes for computing the accuracy
27 | self.train_acc = torchmetrics.Accuracy()
28 | self.valid_acc = torchmetrics.Accuracy()
29 | self.test_acc = torchmetrics.Accuracy()
30 |
31 | # Defining the forward method is only necessary
32 | # if you want to use a Trainer's .predict() method (optional)
33 | def forward(self, x):
34 | return self.model(x)
35 |
36 | # A common forward step to compute the loss and labels
37 | # this is used for training, validation, and testing below
38 | def _shared_step(self, batch):
39 | features, true_labels = batch
40 | logits = self(features)
41 | loss = torch.nn.functional.cross_entropy(logits, true_labels)
42 | predicted_labels = torch.argmax(logits, dim=1)
43 |
44 | return loss, true_labels, predicted_labels
45 |
46 | def training_step(self, batch, batch_idx):
47 | loss, true_labels, predicted_labels = self._shared_step(batch)
48 | self.log("train_loss", loss)
49 |
50 | # Do another forward pass in .eval() mode to compute accuracy
51 | # while accountingfor Dropout, BatchNorm etc. behavior
52 | # during evaluation (inference)
53 | self.model.eval()
54 | with torch.no_grad():
55 | _, true_labels, predicted_labels = self._shared_step(batch)
56 |
57 | if self.log_accuracy:
58 | self.train_acc(predicted_labels, true_labels)
59 | self.log("train_acc", self.train_acc, on_epoch=True, on_step=False)
60 | self.model.train()
61 |
62 | return loss # this is passed to the optimzer for training
63 |
64 | def validation_step(self, batch, batch_idx):
65 | loss, true_labels, predicted_labels = self._shared_step(batch)
66 | self.log("valid_loss", loss)
67 | self.valid_acc(predicted_labels, true_labels)
68 |
69 | if self.log_accuracy:
70 | self.log(
71 | "valid_acc",
72 | self.valid_acc,
73 | on_epoch=True,
74 | on_step=False,
75 | prog_bar=True,
76 | )
77 |
78 | def test_step(self, batch, batch_idx):
79 | loss, true_labels, predicted_labels = self._shared_step(batch)
80 | self.test_acc(predicted_labels, true_labels)
81 | self.log("test_acc", self.test_acc, on_epoch=True, on_step=False)
82 |
83 | def configure_optimizers(self):
84 | optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
85 | return optimizer
--------------------------------------------------------------------------------
/benchmark/pytorch-lightning-m1-gpu/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==1.0.0
2 | aiohttp==3.8.1
3 | aiosignal==1.2.0
4 | appnope==0.1.3
5 | asttokens==2.0.5
6 | async-timeout==4.0.2
7 | attrs==21.4.0
8 | backcall==0.2.0
9 | cachetools==5.1.0
10 | certifi==2022.5.18.1
11 | charset-normalizer==2.0.12
12 | decorator==5.1.1
13 | executing==0.8.3
14 | frozenlist==1.3.0
15 | fsspec==2022.5.0
16 | google-auth==2.6.6
17 | google-auth-oauthlib==0.4.6
18 | grpcio==1.46.3
19 | idna==3.3
20 | importlib-metadata==4.11.4
21 | ipython==8.3.0
22 | jedi==0.18.1
23 | Markdown==3.3.7
24 | matplotlib-inline==0.1.3
25 | multidict==6.0.2
26 | numpy==1.22.4
27 | oauthlib==3.2.0
28 | packaging==21.3
29 | parso==0.8.3
30 | pexpect==4.8.0
31 | pickleshare==0.7.5
32 | Pillow==9.1.1
33 | prompt-toolkit==3.0.29
34 | protobuf==3.20.1
35 | ptyprocess==0.7.0
36 | pure-eval==0.2.2
37 | pyasn1==0.4.8
38 | pyasn1-modules==0.2.8
39 | pyDeprecate==0.3.2
40 | Pygments==2.12.0
41 | pyparsing==3.0.9
42 | pytorch-lightning==1.6.3
43 | PyYAML==6.0
44 | requests==2.27.1
45 | requests-oauthlib==1.3.1
46 | rsa==4.8
47 | six==1.16.0
48 | stack-data==0.2.0
49 | tensorboard==2.9.0
50 | tensorboard-data-server==0.6.1
51 | tensorboard-plugin-wit==1.8.1
52 | torch==1.11.0
53 | torchaudio==0.11.0
54 | torchmetrics==0.8.2
55 | torchvision==0.12.0
56 | tqdm==4.64.0
57 | traitlets==5.2.1.post0
58 | typing_extensions==4.2.0
59 | urllib3==1.26.9
60 | watermark==2.3.0
61 | wcwidth==0.2.5
62 | Werkzeug==2.1.2
63 | yarl==1.7.2
64 | zipp==3.8.0
65 |
--------------------------------------------------------------------------------
/benchmark/pytorch-lightning-m1-gpu/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 |
4 | setuptools.setup(
5 | name='my_classifier_template',
6 | version='0.1',
7 | author='sebastian',
8 | packages=setuptools.find_packages(),
9 | )
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/README.md:
--------------------------------------------------------------------------------
1 | You can run these scripts as follows:
2 |
3 |
4 |
5 | - CPU: `python lenet-mnist.py --device "cpu"`
6 | - NVIDIA GPU: `python lenet-mnist.py --device "cuda"`
7 | - Apple M1: `python lenet-mnist.py --device "mps"`
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/lenet-mnist-results/1080ti.txt:
--------------------------------------------------------------------------------
1 | torch 1.10.0
2 | device cuda:1
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1010
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0734
8 | Time / epoch without evaluation: 0.44 min
9 | Epoch: 001/001 | Train: 97.32% | Validation: 97.77% | Best Validation (Ep. 001): 97.77%
10 | Time elapsed: 0.94 min
11 | Total Training Time: 0.94 min
12 | Test accuracy 97.40%
13 | Total Time: 0.97 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/lenet-mnist-results/2080ti.txt:
--------------------------------------------------------------------------------
1 | torch 1.11.0+cu102
2 | device cuda:1
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1011
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0731
8 | Time / epoch without evaluation: 0.44 min
9 | Epoch: 001/001 | Train: 97.32% | Validation: 97.77% | Best Validation (Ep. 001): 97.77%
10 | Time elapsed: 0.91 min
11 | Total Training Time: 0.91 min
12 | Test accuracy 97.40%
13 | Total Time: 0.94 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/lenet-mnist-results/3090.txt:
--------------------------------------------------------------------------------
1 | torch 1.11.0
2 | device cuda
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1438
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1010
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0733
8 | Time / epoch without evaluation: 0.18 min
9 | Epoch: 001/001 | Train: 97.32% | Validation: 97.75% | Best Validation (Ep. 001): 97.75%
10 | Time elapsed: 0.37 min
11 | Total Training Time: 0.37 min
12 | Test accuracy 97.40%
13 | Total Time: 0.38 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/lenet-mnist-results/intel-cpu.txt:
--------------------------------------------------------------------------------
1 | torch 1.10.0
2 | device cpu
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1009
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0732
8 | Time / epoch without evaluation: 0.41 min
9 | Epoch: 001/001 | Train: 97.32% | Validation: 97.77% | Best Validation (Ep. 001): 97.77%
10 | Time elapsed: 0.86 min
11 | Total Training Time: 0.86 min
12 | Test accuracy 97.39%
13 | Total Time: 0.89 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/lenet-mnist-results/m1-cpu.txt:
--------------------------------------------------------------------------------
1 | torch 1.11.0
2 | device cpu
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1009
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0732
8 | Time / epoch without evaluation: 0.18 min
9 | Epoch: 001/001 | Train: 97.33% | Validation: 97.78% | Best Validation (Ep. 001): 97.78%
10 | Time elapsed: 0.29 min
11 | Total Training Time: 0.29 min
12 | Test accuracy 97.42%
13 | Total Time: 0.31 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/lenet-mnist-results/m1-gpu.txt:
--------------------------------------------------------------------------------
1 | torch 1.12.0.dev20220518
2 | device mps
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1010
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0730
8 | Time / epoch without evaluation: 0.92 min
9 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00%
10 | Time elapsed: 1.13 min
11 | Total Training Time: 1.13 min
12 | Test accuracy 0.00%
13 | Total Time: 1.17 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/lenet-mnist-results/m1pro-cpu.txt:
--------------------------------------------------------------------------------
1 | torch 1.12.0.dev20220518
2 | device cpu
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1009
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0733
8 | Time / epoch without evaluation: 0.08 min
9 | Epoch: 001/001 | Train: 97.32% | Validation: 97.77% | Best Validation (Ep. 001): 97.77%
10 | Time elapsed: 0.13 min
11 | Total Training Time: 0.13 min
12 | Test accuracy 97.41%
13 | Total Time: 0.15 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/lenet-mnist-results/m1pro-gpu.txt:
--------------------------------------------------------------------------------
1 | torch 1.12.0.dev20220518
2 | device mps
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1010
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0730
8 | Time / epoch without evaluation: 0.58 min
9 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00%
10 | Time elapsed: 0.75 min
11 | Total Training Time: 0.75 min
12 | Test accuracy 0.00%
13 | Total Time: 0.78 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/lenet-mnist.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import argparse
5 | import os
6 | import random
7 | import time
8 |
9 | import numpy as np
10 | import torch
11 | import torch.nn as nn
12 | import torchvision
13 | from torch.utils.data import DataLoader, SubsetRandomSampler
14 | from torchvision import datasets, transforms
15 |
16 |
17 | def set_all_seeds(seed):
18 | os.environ["PL_GLOBAL_SEED"] = str(seed)
19 | random.seed(seed)
20 | np.random.seed(seed)
21 | torch.manual_seed(seed)
22 | torch.cuda.manual_seed_all(seed)
23 |
24 |
25 | def compute_accuracy(model, data_loader, device):
26 | model.eval()
27 | with torch.no_grad():
28 | correct_pred, num_examples = 0, 0
29 | for i, (features, targets) in enumerate(data_loader):
30 |
31 | features = features.to(device)
32 | targets = targets.to(device)
33 |
34 | logits = model(features)
35 | _, predicted_labels = torch.max(logits, 1)
36 | num_examples += targets.size(0)
37 | correct_pred += (predicted_labels.cpu() == targets.cpu()).sum()
38 | return correct_pred.float() / num_examples * 100
39 |
40 |
41 | def train_classifier_simple_v2(
42 | model,
43 | num_epochs,
44 | train_loader,
45 | valid_loader,
46 | test_loader,
47 | optimizer,
48 | device,
49 | logging_interval=50,
50 | best_model_save_path=None,
51 | scheduler=None,
52 | skip_train_acc=False,
53 | scheduler_on="valid_acc",
54 | ):
55 |
56 | start_time = time.time()
57 | minibatch_loss_list, train_acc_list, valid_acc_list = [], [], []
58 | best_valid_acc, best_epoch = -float("inf"), 0
59 |
60 | for epoch in range(num_epochs):
61 |
62 | epoch_start_time = time.time()
63 | model.train()
64 | for batch_idx, (features, targets) in enumerate(train_loader):
65 |
66 | features = features.to(device)
67 | targets = targets.to(device)
68 |
69 | # ## FORWARD AND BACK PROP
70 | logits = model(features)
71 | loss = torch.nn.functional.cross_entropy(logits, targets)
72 | optimizer.zero_grad()
73 |
74 | loss.backward()
75 |
76 | # ## UPDATE MODEL PARAMETERS
77 | optimizer.step()
78 |
79 | # ## LOGGING
80 | minibatch_loss_list.append(loss.item())
81 | if not batch_idx % logging_interval:
82 | print(
83 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} "
84 | f"| Batch {batch_idx:04d}/{len(train_loader):04d} "
85 | f"| Loss: {loss:.4f}"
86 | )
87 |
88 | model.eval()
89 |
90 | elapsed = (time.time() - epoch_start_time) / 60
91 | print(f"Time / epoch without evaluation: {elapsed:.2f} min")
92 | with torch.no_grad(): # save memory during inference
93 | if not skip_train_acc:
94 | train_acc = compute_accuracy(model, train_loader, device=device).item()
95 | else:
96 | train_acc = float("nan")
97 | valid_acc = compute_accuracy(model, valid_loader, device=device).item()
98 | train_acc_list.append(train_acc)
99 | valid_acc_list.append(valid_acc)
100 |
101 | if valid_acc > best_valid_acc:
102 | best_valid_acc, best_epoch = valid_acc, epoch + 1
103 | if best_model_save_path:
104 | torch.save(model.state_dict(), best_model_save_path)
105 |
106 | print(
107 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} "
108 | f"| Train: {train_acc :.2f}% "
109 | f"| Validation: {valid_acc :.2f}% "
110 | f"| Best Validation "
111 | f"(Ep. {best_epoch:03d}): {best_valid_acc :.2f}%"
112 | )
113 |
114 | elapsed = (time.time() - start_time) / 60
115 | print(f"Time elapsed: {elapsed:.2f} min")
116 |
117 | if scheduler is not None:
118 |
119 | if scheduler_on == "valid_acc":
120 | scheduler.step(valid_acc_list[-1])
121 | elif scheduler_on == "minibatch_loss":
122 | scheduler.step(minibatch_loss_list[-1])
123 | else:
124 | raise ValueError("Invalid `scheduler_on` choice.")
125 |
126 | elapsed = (time.time() - start_time) / 60
127 | print(f"Total Training Time: {elapsed:.2f} min")
128 |
129 | test_acc = compute_accuracy(model, test_loader, device=device)
130 | print(f"Test accuracy {test_acc :.2f}%")
131 |
132 | elapsed = (time.time() - start_time) / 60
133 | print(f"Total Time: {elapsed:.2f} min")
134 |
135 | return minibatch_loss_list, train_acc_list, valid_acc_list
136 |
137 |
138 | def get_dataloaders_mnist(
139 | batch_size,
140 | num_workers=0,
141 | validation_fraction=None,
142 | train_transforms=None,
143 | test_transforms=None,
144 | ):
145 |
146 | if train_transforms is None:
147 | train_transforms = transforms.ToTensor()
148 |
149 | if test_transforms is None:
150 | test_transforms = transforms.ToTensor()
151 |
152 | train_dataset = datasets.MNIST(
153 | root="data", train=True, transform=train_transforms, download=True
154 | )
155 |
156 | valid_dataset = datasets.MNIST(root="data", train=True, transform=test_transforms)
157 |
158 | test_dataset = datasets.MNIST(root="data", train=False, transform=test_transforms)
159 |
160 | if validation_fraction is not None:
161 | num = int(validation_fraction * 60000)
162 | train_indices = torch.arange(0, 60000 - num)
163 | valid_indices = torch.arange(60000 - num, 60000)
164 |
165 | train_sampler = SubsetRandomSampler(train_indices)
166 | valid_sampler = SubsetRandomSampler(valid_indices)
167 |
168 | valid_loader = DataLoader(
169 | dataset=valid_dataset,
170 | batch_size=batch_size,
171 | num_workers=num_workers,
172 | sampler=valid_sampler,
173 | )
174 |
175 | train_loader = DataLoader(
176 | dataset=train_dataset,
177 | batch_size=batch_size,
178 | num_workers=num_workers,
179 | drop_last=True,
180 | sampler=train_sampler,
181 | )
182 |
183 | else:
184 | train_loader = DataLoader(
185 | dataset=train_dataset,
186 | batch_size=batch_size,
187 | num_workers=num_workers,
188 | drop_last=True,
189 | shuffle=True,
190 | )
191 |
192 | test_loader = DataLoader(
193 | dataset=test_dataset,
194 | batch_size=batch_size,
195 | num_workers=num_workers,
196 | shuffle=False,
197 | )
198 |
199 | if validation_fraction is None:
200 | return train_loader, test_loader
201 | else:
202 | return train_loader, valid_loader, test_loader
203 |
204 |
205 | class LeNet5(nn.Module):
206 | def __init__(self, num_classes, grayscale=False):
207 | super(LeNet5, self).__init__()
208 |
209 | self.grayscale = grayscale
210 | self.num_classes = num_classes
211 |
212 | if self.grayscale:
213 | in_channels = 1
214 | else:
215 | in_channels = 3
216 |
217 | self.features = nn.Sequential(
218 | nn.Conv2d(in_channels, 6, kernel_size=5),
219 | nn.Tanh(),
220 | nn.MaxPool2d(kernel_size=2),
221 | nn.Conv2d(6, 16, kernel_size=5),
222 | nn.Tanh(),
223 | nn.MaxPool2d(kernel_size=2),
224 | )
225 |
226 | self.classifier = nn.Sequential(
227 | nn.Linear(16 * 5 * 5, 120),
228 | nn.Tanh(),
229 | nn.Linear(120, 84),
230 | nn.Tanh(),
231 | nn.Linear(84, num_classes),
232 | )
233 |
234 | def forward(self, x):
235 | x = self.features(x)
236 | x = torch.flatten(x, 1)
237 | logits = self.classifier(x)
238 | return logits
239 |
240 |
241 | if __name__ == "__main__":
242 |
243 | parser = argparse.ArgumentParser()
244 | parser.add_argument(
245 | "--device", type=str, required=True, help="Which GPU device to use."
246 | )
247 |
248 | args = parser.parse_args()
249 |
250 | RANDOM_SEED = 123
251 | BATCH_SIZE = 128
252 | NUM_EPOCHS = 1
253 | DEVICE = torch.device(args.device)
254 |
255 | print("torch", torch.__version__)
256 | print("device", DEVICE)
257 |
258 | train_transforms = torchvision.transforms.Compose(
259 | [
260 | torchvision.transforms.Resize((32, 32)),
261 | torchvision.transforms.ToTensor(),
262 | torchvision.transforms.Normalize((0.5), (0.5)),
263 | ]
264 | )
265 |
266 | test_transforms = torchvision.transforms.Compose(
267 | [
268 | torchvision.transforms.Resize((32, 32)),
269 | torchvision.transforms.ToTensor(),
270 | torchvision.transforms.Normalize((0.5), (0.5)),
271 | ]
272 | )
273 |
274 | train_loader, valid_loader, test_loader = get_dataloaders_mnist(
275 | batch_size=BATCH_SIZE,
276 | validation_fraction=0.1,
277 | train_transforms=train_transforms,
278 | test_transforms=test_transforms,
279 | num_workers=2,
280 | )
281 |
282 | torch.manual_seed(RANDOM_SEED)
283 |
284 | model = LeNet5(num_classes=10, grayscale=True)
285 |
286 | model = model.to(DEVICE)
287 |
288 | optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
289 |
290 | minibatch_loss_list, train_acc_list, valid_acc_list = train_classifier_simple_v2(
291 | model=model,
292 | num_epochs=NUM_EPOCHS,
293 | train_loader=train_loader,
294 | valid_loader=valid_loader,
295 | test_loader=test_loader,
296 | optimizer=optimizer,
297 | best_model_save_path=None,
298 | device=DEVICE,
299 | scheduler_on="valid_acc",
300 | logging_interval=100,
301 | )
302 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-mnist.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import argparse
5 | import os
6 | import random
7 | import time
8 |
9 | import numpy as np
10 | import torch
11 | import torch.nn as nn
12 | import torchvision
13 | from torch.utils.data import DataLoader, SubsetRandomSampler
14 | from torchvision import datasets, transforms
15 |
16 |
17 | def set_all_seeds(seed):
18 | os.environ["PL_GLOBAL_SEED"] = str(seed)
19 | random.seed(seed)
20 | np.random.seed(seed)
21 | torch.manual_seed(seed)
22 | torch.cuda.manual_seed_all(seed)
23 |
24 |
25 | def compute_accuracy(model, data_loader, device):
26 | model.eval()
27 | with torch.no_grad():
28 | correct_pred, num_examples = 0, 0
29 | for i, (features, targets) in enumerate(data_loader):
30 |
31 | features = features.to(device)
32 | targets = targets.to(device)
33 |
34 | logits = model(features)
35 | _, predicted_labels = torch.max(logits, 1)
36 | num_examples += targets.size(0)
37 | correct_pred += (predicted_labels.cpu() == targets.cpu()).sum()
38 | return correct_pred.float() / num_examples * 100
39 |
40 |
41 | def train_classifier_simple_v2(
42 | model,
43 | num_epochs,
44 | train_loader,
45 | valid_loader,
46 | test_loader,
47 | optimizer,
48 | device,
49 | logging_interval=50,
50 | best_model_save_path=None,
51 | scheduler=None,
52 | skip_train_acc=False,
53 | scheduler_on="valid_acc",
54 | ):
55 |
56 | start_time = time.time()
57 | minibatch_loss_list, train_acc_list, valid_acc_list = [], [], []
58 | best_valid_acc, best_epoch = -float("inf"), 0
59 |
60 | for epoch in range(num_epochs):
61 |
62 | epoch_start_time = time.time()
63 | model.train()
64 | for batch_idx, (features, targets) in enumerate(train_loader):
65 |
66 | features = features.to(device)
67 | targets = targets.to(device)
68 |
69 | # ## FORWARD AND BACK PROP
70 | logits = model(features)
71 | loss = torch.nn.functional.cross_entropy(logits, targets)
72 | optimizer.zero_grad()
73 |
74 | loss.backward()
75 |
76 | # ## UPDATE MODEL PARAMETERS
77 | optimizer.step()
78 |
79 | # ## LOGGING
80 | minibatch_loss_list.append(loss.item())
81 | if not batch_idx % logging_interval:
82 | print(
83 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} "
84 | f"| Batch {batch_idx:04d}/{len(train_loader):04d} "
85 | f"| Loss: {loss:.4f}"
86 | )
87 |
88 | model.eval()
89 |
90 | elapsed = (time.time() - epoch_start_time) / 60
91 | print(f"Time / epoch without evaluation: {elapsed:.2f} min")
92 | with torch.no_grad(): # save memory during inference
93 | if not skip_train_acc:
94 | train_acc = compute_accuracy(model, train_loader, device=device).item()
95 | else:
96 | train_acc = float("nan")
97 | valid_acc = compute_accuracy(model, valid_loader, device=device).item()
98 | train_acc_list.append(train_acc)
99 | valid_acc_list.append(valid_acc)
100 |
101 | if valid_acc > best_valid_acc:
102 | best_valid_acc, best_epoch = valid_acc, epoch + 1
103 | if best_model_save_path:
104 | torch.save(model.state_dict(), best_model_save_path)
105 |
106 | print(
107 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} "
108 | f"| Train: {train_acc :.2f}% "
109 | f"| Validation: {valid_acc :.2f}% "
110 | f"| Best Validation "
111 | f"(Ep. {best_epoch:03d}): {best_valid_acc :.2f}%"
112 | )
113 |
114 | elapsed = (time.time() - start_time) / 60
115 | print(f"Time elapsed: {elapsed:.2f} min")
116 |
117 | if scheduler is not None:
118 |
119 | if scheduler_on == "valid_acc":
120 | scheduler.step(valid_acc_list[-1])
121 | elif scheduler_on == "minibatch_loss":
122 | scheduler.step(minibatch_loss_list[-1])
123 | else:
124 | raise ValueError("Invalid `scheduler_on` choice.")
125 |
126 | elapsed = (time.time() - start_time) / 60
127 | print(f"Total Training Time: {elapsed:.2f} min")
128 |
129 | test_acc = compute_accuracy(model, test_loader, device=device)
130 | print(f"Test accuracy {test_acc :.2f}%")
131 |
132 | elapsed = (time.time() - start_time) / 60
133 | print(f"Total Time: {elapsed:.2f} min")
134 |
135 | return minibatch_loss_list, train_acc_list, valid_acc_list
136 |
137 |
138 | def get_dataloaders_mnist(
139 | batch_size,
140 | num_workers=0,
141 | validation_fraction=None,
142 | train_transforms=None,
143 | test_transforms=None,
144 | ):
145 |
146 | if train_transforms is None:
147 | train_transforms = transforms.ToTensor()
148 |
149 | if test_transforms is None:
150 | test_transforms = transforms.ToTensor()
151 |
152 | train_dataset = datasets.MNIST(
153 | root="data", train=True, transform=train_transforms, download=True
154 | )
155 |
156 | valid_dataset = datasets.MNIST(root="data", train=True, transform=test_transforms)
157 |
158 | test_dataset = datasets.MNIST(root="data", train=False, transform=test_transforms)
159 |
160 | if validation_fraction is not None:
161 | num = int(validation_fraction * 60000)
162 | train_indices = torch.arange(0, 60000 - num)
163 | valid_indices = torch.arange(60000 - num, 60000)
164 |
165 | train_sampler = SubsetRandomSampler(train_indices)
166 | valid_sampler = SubsetRandomSampler(valid_indices)
167 |
168 | valid_loader = DataLoader(
169 | dataset=valid_dataset,
170 | batch_size=batch_size,
171 | num_workers=num_workers,
172 | sampler=valid_sampler,
173 | )
174 |
175 | train_loader = DataLoader(
176 | dataset=train_dataset,
177 | batch_size=batch_size,
178 | num_workers=num_workers,
179 | drop_last=True,
180 | sampler=train_sampler,
181 | )
182 |
183 | else:
184 | train_loader = DataLoader(
185 | dataset=train_dataset,
186 | batch_size=batch_size,
187 | num_workers=num_workers,
188 | drop_last=True,
189 | shuffle=True,
190 | )
191 |
192 | test_loader = DataLoader(
193 | dataset=test_dataset,
194 | batch_size=batch_size,
195 | num_workers=num_workers,
196 | shuffle=False,
197 | )
198 |
199 | if validation_fraction is None:
200 | return train_loader, test_loader
201 | else:
202 | return train_loader, valid_loader, test_loader
203 |
204 |
205 | class PyTorchModel(torch.nn.Module):
206 | def __init__(self, input_size, hidden_units, num_classes):
207 | super().__init__()
208 |
209 | # Initialize MLP layers
210 | all_layers = []
211 | for hidden_unit in hidden_units:
212 | layer = torch.nn.Linear(input_size, hidden_unit, bias=False)
213 | all_layers.append(layer)
214 | all_layers.append(torch.nn.ReLU())
215 | input_size = hidden_unit
216 |
217 | output_layer = torch.nn.Linear(
218 | in_features=hidden_units[-1],
219 | out_features=num_classes)
220 |
221 | all_layers.append(output_layer)
222 | self.layers = torch.nn.Sequential(*all_layers)
223 |
224 | def forward(self, x):
225 | x = torch.flatten(x, start_dim=1) # to make it work for image inputs
226 | x = self.layers(x)
227 | return x # x are the model's logits
228 |
229 |
230 | if __name__ == "__main__":
231 |
232 | parser = argparse.ArgumentParser()
233 | parser.add_argument(
234 | "--device", type=str, required=True, help="Which GPU device to use."
235 | )
236 |
237 | args = parser.parse_args()
238 |
239 | RANDOM_SEED = 123
240 | BATCH_SIZE = 128
241 | NUM_EPOCHS = 1
242 | DEVICE = torch.device(args.device)
243 |
244 | print("torch", torch.__version__)
245 | print("device", DEVICE)
246 |
247 | train_transforms = torchvision.transforms.Compose(
248 | [
249 | torchvision.transforms.Resize((28, 28)),
250 | torchvision.transforms.ToTensor(),
251 | torchvision.transforms.Normalize((0.5), (0.5)),
252 | ]
253 | )
254 |
255 | test_transforms = torchvision.transforms.Compose(
256 | [
257 | torchvision.transforms.Resize((28, 28)),
258 | torchvision.transforms.ToTensor(),
259 | torchvision.transforms.Normalize((0.5), (0.5)),
260 | ]
261 | )
262 |
263 | train_loader, valid_loader, test_loader = get_dataloaders_mnist(
264 | batch_size=BATCH_SIZE,
265 | validation_fraction=0.1,
266 | train_transforms=train_transforms,
267 | test_transforms=test_transforms,
268 | num_workers=2,
269 | )
270 |
271 | torch.manual_seed(RANDOM_SEED)
272 |
273 | model = PyTorchModel(input_size=784, hidden_units=(256, 128, 64), num_classes=10)
274 |
275 | model = model.to(DEVICE)
276 |
277 | optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
278 |
279 | minibatch_loss_list, train_acc_list, valid_acc_list = train_classifier_simple_v2(
280 | model=model,
281 | num_epochs=NUM_EPOCHS,
282 | train_loader=train_loader,
283 | valid_loader=valid_loader,
284 | test_loader=test_loader,
285 | optimizer=optimizer,
286 | best_model_save_path=None,
287 | device=DEVICE,
288 | scheduler_on="valid_acc",
289 | logging_interval=100,
290 | )
291 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-results/m1-cpu-mlp.txt:
--------------------------------------------------------------------------------
1 | torch 1.12.0.dev20220518
2 | device cpu
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4212
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2405
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3460
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.2137
8 | Time / epoch without evaluation: 0.06 min
9 | Epoch: 001/001 | Train: 95.62% | Validation: 96.57% | Best Validation (Ep. 001): 96.57%
10 | Time elapsed: 0.10 min
11 | Total Training Time: 0.10 min
12 | Test accuracy 95.76%
13 | Total Time: 0.11 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-results/m1-gpu-mlp.txt:
--------------------------------------------------------------------------------
1 | torch 1.12.0.dev20220518
2 | device mps
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4186
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2519
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3339
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.1897
8 | Time / epoch without evaluation: 0.34 min
9 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00%
10 | Time elapsed: 0.46 min
11 | Total Training Time: 0.46 min
12 | Test accuracy 0.00%
13 | Total Time: 0.48 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-results/mlp-1080ti.txt:
--------------------------------------------------------------------------------
1 | torch 1.10.0
2 | device cuda:1
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4133
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2513
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3265
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.1889
8 | Time / epoch without evaluation: 0.40 min
9 | Epoch: 001/001 | Train: 95.44% | Validation: 96.42% | Best Validation (Ep. 001): 96.42%
10 | Time elapsed: 0.86 min
11 | Total Training Time: 0.86 min
12 | Test accuracy 95.43%
13 | Total Time: 0.88 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-results/mlp-2080ti.txt:
--------------------------------------------------------------------------------
1 | torch 1.8.1+cu102
2 | device cuda:1
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4140
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2467
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3569
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.1951
8 | Time / epoch without evaluation: 0.34 min
9 | Epoch: 001/001 | Train: 95.67% | Validation: 96.63% | Best Validation (Ep. 001): 96.63%
10 | Time elapsed: 0.72 min
11 | Total Training Time: 0.72 min
12 | Test accuracy 95.82%
13 | Total Time: 0.74 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-results/mlp-3090.txt:
--------------------------------------------------------------------------------
1 | torch 1.11.0
2 | device cuda
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3063
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.3426
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2980
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3660
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.3541
8 | Time / epoch without evaluation: 0.17 min
9 | Epoch: 001/001 | Train: 92.08% | Validation: 93.63% | Best Validation (Ep. 001): 93.63%
10 | Time elapsed: 0.36 min
11 | Total Training Time: 0.36 min
12 | Test accuracy 92.35%
13 | Total Time: 0.37 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-results/mlp-intel-cpu.txt:
--------------------------------------------------------------------------------
1 | torch 1.10.0
2 | device cpu
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4140
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2491
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3578
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.1877
8 | Time / epoch without evaluation: 0.39 min
9 | Epoch: 001/001 | Train: 95.67% | Validation: 96.47% | Best Validation (Ep. 001): 96.47%
10 | Time elapsed: 0.83 min
11 | Total Training Time: 0.83 min
12 | Test accuracy 95.68%
13 | Total Time: 0.85 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-results/mlp-m1max-gpu_torch1.13.0.dev20220522.txt:
--------------------------------------------------------------------------------
1 | torch 1.13.0.dev20220522
2 | device mps
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1010
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0732
8 | Time / epoch without evaluation: 0.09 min
9 | Epoch: 001/001 | Train: 97.33% | Validation: 97.77% | Best Validation (Ep. 001): 97.77%
10 | Time elapsed: 0.14 min
11 | Total Training Time: 0.14 min
12 | Test accuracy 97.39%
13 | Total Time: 0.16 min
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-results/mlp-m1pro-cpu_torch1.12.0.dev20220518.txt:
--------------------------------------------------------------------------------
1 | torch 1.12.0.dev20220518
2 | device cpu
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4175
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2491
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3253
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.2008
8 | Time / epoch without evaluation: 0.06 min
9 | Epoch: 001/001 | Train: 95.71% | Validation: 96.43% | Best Validation (Ep. 001): 96.43%
10 | Time elapsed: 0.11 min
11 | Total Training Time: 0.11 min
12 | Test accuracy 95.57%
13 | Total Time: 0.13 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-results/mlp-m1pro-cpu_torch1.13.0.dev20220522.txt:
--------------------------------------------------------------------------------
1 | torch 1.13.0.dev20220522
2 | device cpu
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3063
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.3429
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.3083
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3685
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.3482
8 | Time / epoch without evaluation: 0.04 min
9 | Epoch: 001/001 | Train: 91.43% | Validation: 93.38% | Best Validation (Ep. 001): 93.38%
10 | Time elapsed: 0.08 min
11 | Total Training Time: 0.08 min
12 | Test accuracy 91.99%
13 | Total Time: 0.10 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-results/mlp-m1pro-gpu_torch1.12.0.dev20220518.txt:
--------------------------------------------------------------------------------
1 | torch 1.12.0.dev20220518
2 | device mps
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4186
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2519
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3339
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.1897
8 | Time / epoch without evaluation: 0.22 min
9 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00%
10 | Time elapsed: 0.34 min
11 | Total Training Time: 0.34 min
12 | Test accuracy 0.00%
13 | Total Time: 0.36 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/mlp-results/mlp-m1pro-gpu_torch1.13.0.dev20220522.txt:
--------------------------------------------------------------------------------
1 | torch 1.13.0.dev20220522
2 | device mps
3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3063
4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.3431
5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.3089
6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3688
7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.3544
8 | Time / epoch without evaluation: 0.07 min
9 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00%
10 | Time elapsed: 0.11 min
11 | Total Training Time: 0.11 min
12 | Test accuracy 0.00%
13 | Total Time: 0.13 min
14 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/1080ti.txt:
--------------------------------------------------------------------------------
1 | torch 1.10.0
2 | device cuda:1
3 | Files already downloaded and verified
4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.4711
5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.2623
6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.2856
7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 1.9907
8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.1907
9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 1.9049
10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.8330
11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.9606
12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.0498
13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.7697
14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8773
15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.7820
16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.6580
17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.2616
18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.8776
19 | Time / epoch without evaluation: 7.65 min
20 | Epoch: 001/001 | Train: 32.51% | Validation: 33.90% | Best Validation (Ep. 001): 33.90%
21 | Time elapsed: 10.21 min
22 | Total Training Time: 10.21 min
23 | Test accuracy 33.03%
24 | Total Time: 10.72 min
25 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/2080ti.txt:
--------------------------------------------------------------------------------
1 | torch 1.11.0+cu102
2 | device cuda:1
3 | Files already downloaded and verified
4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.4484
5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.1690
6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.1240
7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.1157
8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.3050
9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.0078
10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.8894
11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.7767
12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.0213
13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.8129
14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8300
15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.1053
16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.9753
17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.6367
18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.7172
19 | Time / epoch without evaluation: 5.75 min
20 | Epoch: 001/001 | Train: 36.69% | Validation: 36.24% | Best Validation (Ep. 001): 36.24%
21 | Time elapsed: 7.64 min
22 | Total Training Time: 7.64 min
23 | Test accuracy 37.88%
24 | Total Time: 8.01 min
25 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/3090.txt:
--------------------------------------------------------------------------------
1 | torch 1.11.0
2 | device cuda
3 | Files already downloaded and verified
4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.3290
5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.2226
6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.0688
7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.1872
8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.0353
9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 1.9326
10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.0525
11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.8289
12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 1.7227
13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.8406
14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8262
15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.8862
16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.9222
17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.8546
18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.5524
19 | Time / epoch without evaluation: 3.36 min
20 | Epoch: 001/001 | Train: 37.17% | Validation: 37.74% | Best Validation (Ep. 001): 37.74%
21 | Time elapsed: 4.61 min
22 | Total Training Time: 4.61 min
23 | Test accuracy 37.47%
24 | Total Time: 4.85 min
25 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/intel-cpu.txt:
--------------------------------------------------------------------------------
1 | torch 1.10.0
2 | device cpu
3 | Files already downloaded and verified
4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.4879
5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.4600
6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.3141
7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.2136
8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.2040
9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.4007
10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.9307
11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.0611
12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.0015
13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.7914
14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.1395
15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.0222
16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.2381
17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.9949
18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.6678
19 | Time / epoch without evaluation: 150.83 min
20 | Epoch: 001/001 | Train: 33.93% | Validation: 34.20% | Best Validation (Ep. 001): 34.20%
21 | Time elapsed: 198.80 min
22 | Total Training Time: 198.80 min
23 | Test accuracy 34.25%
24 | Total Time: 208.66 min
25 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1-plain-gpu_torch 1.12.0.dev20220518.txt:
--------------------------------------------------------------------------------
1 | torch 1.12.0.dev20220518
2 | device mps
3 | Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz
4 | Extracting data/cifar-10-python.tar.gz to data
5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.3051
6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.3035
7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.2989
8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.3011
9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.3024
10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.3046
11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.3044
12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.3038
13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.3017
14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.3042
15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.3032
16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.3023
17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.3020
18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.3029
19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 2.3038
20 | Time / epoch without evaluation: 113.27 min
21 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00%
22 | Time elapsed: 140.80 min
23 | Total Training Time: 140.80 min
24 | Test accuracy 0.00%
25 | Total Time: 146.50 min
26 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1-plain-gpu_torch-1.13.0.dev20220522.txt:
--------------------------------------------------------------------------------
1 | torch 1.13.0.dev20220522
2 | device mps
3 | Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz
4 | Extracting data/cifar-10-python.tar.gz to data
5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.3012
6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.3048
7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.3054
8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.3006
9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.3014
10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.3046
11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.3033
12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.3026
13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.3020
14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.3052
15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.3041
16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.3035
17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.3024
18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.3021
19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 2.3046
20 | Time / epoch without evaluation: 84.00 min
21 | Epoch: 001/001 | Train: 10.03% | Validation: 9.76% | Best Validation (Ep. 001): 9.76%
22 | Time elapsed: 104.41 min
23 | Total Training Time: 104.41 min
24 | Test accuracy 10.00%
25 | Total Time: 108.71 min
26 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1max-gpu_torch-1.12.0.dev20220518.txt:
--------------------------------------------------------------------------------
1 | torch 1.12.0.dev20220518
2 | device mps
3 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.3857
4 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.4062
5 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.1027
6 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.0253
7 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.1160
8 | Epoch: 001/001 | Batch 0500/1406 | Loss: 1.9523
9 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.9365
10 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.3179
11 | Epoch: 001/001 | Batch 0800/1406 | Loss: 1.9971
12 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.7516
13 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8922
14 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.8546
15 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.7630
16 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.8767
17 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.5391
18 | Time / epoch without evaluation: 42.28 min
19 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00%
20 | Time elapsed: 48.54 min
21 | Total Training Time: 48.54 min
22 | Test accuracy 0.00%
23 | Total Time: 49.99 min
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1max-gpu_torch-1.13.0.dev20220522.txt:
--------------------------------------------------------------------------------
1 | torch 1.13.0.dev20220522
2 | device mps
3 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.6720
4 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.3715
5 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.3356
6 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.0791
7 | Epoch: 001/001 | Batch 0400/1406 | Loss: 1.9815
8 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.0724
9 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.9088
10 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.1451
11 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.2497
12 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.1637
13 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.2672
14 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.8210
15 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.7867
16 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.8080
17 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.6069
18 | Time / epoch without evaluation: 31.54 min
19 | Epoch: 001/001 | Train: 32.69% | Validation: 32.92% | Best Validation (Ep. 001): 32.92%
20 | Time elapsed: 38.46 min
21 | Total Training Time: 38.46 min
22 | Test accuracy 32.59%
23 | Total Time: 40.00 min
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1pro-cpu_torch 1.12.0.dev20220518.txt:
--------------------------------------------------------------------------------
1 | torch 1.12.0.dev20220518
2 | device cpu
3 | Files already downloaded and verified
4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.5936
5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.6695
6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.0085
7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 1.8780
8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 1.9127
9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.0184
10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.8296
11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.8775
12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 1.9802
13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.8182
14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8658
15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.9554
16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.1732
17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.7894
18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.6485
19 | Time / epoch without evaluation: 107.72 min
20 | Epoch: 001/001 | Train: 37.90% | Validation: 38.52% | Best Validation (Ep. 001): 38.52%
21 | Time elapsed: 149.85 min
22 | Total Training Time: 149.85 min
23 | Test accuracy 39.17%
24 | Total Time: 158.48 min
25 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1pro-cpu_torch1.13.0.dev20220522.txt:
--------------------------------------------------------------------------------
1 | torch 1.13.0.dev20220522
2 | device cpu
3 | Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz
4 | Extracting data/cifar-10-python.tar.gz to data
5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.6914
6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.6882
7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.0695
8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.0680
9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.0316
10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.0238
11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.2194
12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.8091
13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.0532
14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.1239
15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8187
16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.8362
17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.0197
18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.0013
19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.6868
20 | Time / epoch without evaluation: 110.48 min
21 | Epoch: 001/001 | Train: 30.79% | Validation: 31.50% | Best Validation (Ep. 001): 31.50%
22 | Time elapsed: 152.91 min
23 | Total Training Time: 152.91 min
24 | Test accuracy 31.56%
25 | Total Time: 161.42 min
26 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1pro-gpu_torch 1.12.0.dev20220518.txt:
--------------------------------------------------------------------------------
1 | torch 1.12.0.dev20220518
2 | device mps
3 | Files already downloaded and verified
4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.2904
5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.3904
6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.3327
7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.3666
8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.3475
9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 1.7922
10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.8935
11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.8857
12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.5368
13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.8395
14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8795
15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.9586
16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.0501
17 | Epoch: 001/001 | Batch 1300/1406 | Loss: inf
18 | Epoch: 001/001 | Batch 1400/1406 | Loss: inf
19 | Time / epoch without evaluation: 59.74 min
20 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00%
21 | Time elapsed: 69.53 min
22 | Total Training Time: 69.53 min
23 | Test accuracy 0.00%
24 | Total Time: 71.63 min
25 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1pro-gpu_torch1.13.0.dev20220522.txt:
--------------------------------------------------------------------------------
1 | torch 1.13.0.dev20220522
2 | device mps
3 | Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz
4 | Extracting data/cifar-10-python.tar.gz to data
5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.3672
6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.8752
7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.1686
8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.1362
9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 1.9429
10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 1.9907
11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.0271
12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.9571
13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.0105
14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.8068
15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8195
16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.6118
17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.6810
18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.7971
19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.6352
20 | Time / epoch without evaluation: 42.12 min
21 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00%
22 | Time elapsed: 49.75 min
23 | Total Training Time: 49.75 min
24 | Test accuracy 0.00%
25 | Total Time: 51.44 min
26 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1ultra-cpu_torch-1.13.0.dev20220522.txt:
--------------------------------------------------------------------------------
1 | torch 1.13.0.dev20220522
2 | device cpu
3 | Files already downloaded and verified
4 | Using cache found in /Users/alex/.cache/torch/hub/pytorch_vision_v0.11.0
5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.2517
6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.5809
7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.3804
8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.2363
9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.3646
10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.2934
11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.2161
12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.2401
13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.3203
14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.2977
15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.4127
16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.2535
17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.4046
18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.3649
19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 2.1960
20 | Time / epoch without evaluation: 109.27 min
21 | Epoch: 001/001 | Train: 12.44% | Validation: 12.04% | Best Validation (Ep. 001): 12.04%
22 | Time elapsed: 150.19 min
23 | Total Training Time: 150.19 min
24 |
25 | Test accuracy 12.48%
26 | Total Time: 158.49 min
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1ultra-gpu_torch-1.13.0.dev20220522.txt:
--------------------------------------------------------------------------------
1 | torch 1.13.0.dev20220522
2 | device mps
3 | Files already downloaded and verified
4 | Using cache found in /Users/alex/.cache/torch/hub/pytorch_vision_v0.11.0
5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.4455
6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.4203
7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.2958
8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.1548
9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.0212
10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.1466
11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.0805
12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.9645
13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.1037
14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.1161
15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.2875
16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.0838
17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.2671
18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.9454
19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.9380
20 | Time / epoch without evaluation: 13.41 min
21 | Epoch: 001/001 | Train: 21.51% | Validation: 22.00% | Best Validation (Ep. 001): 22.00%
22 | Time elapsed: 15.80 min
23 | Total Training Time: 15.80 min
24 | Test accuracy 21.05%
25 | Total Time: 16.45 min
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/titanv-cpu.txt:
--------------------------------------------------------------------------------
1 | torch 1.13.0.dev20220522+cu102
2 | device cpu
3 | Files already downloaded and verified
4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.4952
5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.5676
6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.0315
7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.0713
8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.1783
9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.0708
10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.8028
11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.0366
12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.2274
13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.9104
14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.0756
15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.0831
16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.7274
17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.0498
18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.8579
19 | Time / epoch without evaluation: 230.39 min
20 | Epoch: 001/001 | Train: 32.73% | Validation: 34.06% | Best Validation (Ep. 001): 34.06%
21 | Time elapsed: 292.35 min
22 | Total Training Time: 292.35 min
23 | Test accuracy 33.60%
24 | Total Time: 305.01 min
25 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10-results/titanv.txt:
--------------------------------------------------------------------------------
1 | torch 1.13.0.dev20220522+cu102
2 | device cuda
3 | Files already downloaded and verified
4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.7397
5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.4220
6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.2438
7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.1947
8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.4192
9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.1813
10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.1236
11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.0670
12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 1.9411
13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.1265
14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.1767
15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.9055
16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.7954
17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.9439
18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 2.3842
19 | Time / epoch without evaluation: 5.03 min
20 | Epoch: 001/001 | Train: 21.43% | Validation: 21.82% | Best Validation (Ep. 001): 21.82%
21 | Time elapsed: 6.73 min
22 | Total Training Time: 6.73 min
23 | Test accuracy 21.72%
24 | Total Time: 7.07 min
25 |
--------------------------------------------------------------------------------
/benchmark/pytorch-m1-gpu/vgg16-cifar10.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import argparse
5 | import os
6 | import random
7 | import time
8 |
9 | import numpy as np
10 | import torch
11 | import torchvision
12 | from torch.utils.data import DataLoader
13 | from torch.utils.data import SubsetRandomSampler
14 | from torchvision import datasets, transforms
15 |
16 |
17 | def set_all_seeds(seed):
18 | os.environ["PL_GLOBAL_SEED"] = str(seed)
19 | random.seed(seed)
20 | np.random.seed(seed)
21 | torch.manual_seed(seed)
22 | torch.cuda.manual_seed_all(seed)
23 |
24 |
25 | def compute_accuracy(model, data_loader, device):
26 | model.eval()
27 | with torch.no_grad():
28 | correct_pred, num_examples = 0, 0
29 | for i, (features, targets) in enumerate(data_loader):
30 |
31 | features = features.to(device)
32 | targets = targets.to(device)
33 |
34 | logits = model(features)
35 | _, predicted_labels = torch.max(logits, 1)
36 | num_examples += targets.size(0)
37 | correct_pred += (predicted_labels.cpu() == targets.cpu()).sum()
38 | return correct_pred.float() / num_examples * 100
39 |
40 |
41 | def train_classifier_simple_v2(
42 | model,
43 | num_epochs,
44 | train_loader,
45 | valid_loader,
46 | test_loader,
47 | optimizer,
48 | device,
49 | logging_interval=50,
50 | best_model_save_path=None,
51 | scheduler=None,
52 | skip_train_acc=False,
53 | scheduler_on="valid_acc",
54 | ):
55 |
56 | start_time = time.time()
57 | minibatch_loss_list, train_acc_list, valid_acc_list = [], [], []
58 | best_valid_acc, best_epoch = -float("inf"), 0
59 |
60 | for epoch in range(num_epochs):
61 |
62 | epoch_start_time = time.time()
63 | model.train()
64 | for batch_idx, (features, targets) in enumerate(train_loader):
65 |
66 | features = features.to(device)
67 | targets = targets.to(device)
68 |
69 | # ## FORWARD AND BACK PROP
70 | logits = model(features)
71 | loss = torch.nn.functional.cross_entropy(logits, targets)
72 | optimizer.zero_grad()
73 |
74 | loss.backward()
75 |
76 | # ## UPDATE MODEL PARAMETERS
77 | optimizer.step()
78 |
79 | # ## LOGGING
80 | minibatch_loss_list.append(loss.item())
81 | if not batch_idx % logging_interval:
82 | print(
83 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} "
84 | f"| Batch {batch_idx:04d}/{len(train_loader):04d} "
85 | f"| Loss: {loss:.4f}"
86 | )
87 |
88 | model.eval()
89 |
90 | elapsed = (time.time() - epoch_start_time) / 60
91 | print(f"Time / epoch without evaluation: {elapsed:.2f} min")
92 | with torch.no_grad(): # save memory during inference
93 | if not skip_train_acc:
94 | train_acc = compute_accuracy(model, train_loader, device=device).item()
95 | else:
96 | train_acc = float("nan")
97 | valid_acc = compute_accuracy(model, valid_loader, device=device).item()
98 | train_acc_list.append(train_acc)
99 | valid_acc_list.append(valid_acc)
100 |
101 | if valid_acc > best_valid_acc:
102 | best_valid_acc, best_epoch = valid_acc, epoch + 1
103 | if best_model_save_path:
104 | torch.save(model.state_dict(), best_model_save_path)
105 |
106 | print(
107 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} "
108 | f"| Train: {train_acc :.2f}% "
109 | f"| Validation: {valid_acc :.2f}% "
110 | f"| Best Validation "
111 | f"(Ep. {best_epoch:03d}): {best_valid_acc :.2f}%"
112 | )
113 |
114 | elapsed = (time.time() - start_time) / 60
115 | print(f"Time elapsed: {elapsed:.2f} min")
116 |
117 | if scheduler is not None:
118 |
119 | if scheduler_on == "valid_acc":
120 | scheduler.step(valid_acc_list[-1])
121 | elif scheduler_on == "minibatch_loss":
122 | scheduler.step(minibatch_loss_list[-1])
123 | else:
124 | raise ValueError("Invalid `scheduler_on` choice.")
125 |
126 | elapsed = (time.time() - start_time) / 60
127 | print(f"Total Training Time: {elapsed:.2f} min")
128 |
129 | test_acc = compute_accuracy(model, test_loader, device=device)
130 | print(f"Test accuracy {test_acc :.2f}%")
131 |
132 | elapsed = (time.time() - start_time) / 60
133 | print(f"Total Time: {elapsed:.2f} min")
134 |
135 | return minibatch_loss_list, train_acc_list, valid_acc_list
136 |
137 |
138 | def get_dataloaders_cifar10(
139 | batch_size,
140 | num_workers=0,
141 | validation_fraction=None,
142 | train_transforms=None,
143 | test_transforms=None,
144 | ):
145 |
146 | if train_transforms is None:
147 | train_transforms = transforms.ToTensor()
148 |
149 | if test_transforms is None:
150 | test_transforms = transforms.ToTensor()
151 |
152 | train_dataset = datasets.CIFAR10(
153 | root="data", train=True, transform=train_transforms, download=True
154 | )
155 |
156 | valid_dataset = datasets.CIFAR10(root="data", train=True, transform=test_transforms)
157 |
158 | test_dataset = datasets.CIFAR10(root="data", train=False, transform=test_transforms)
159 |
160 | if validation_fraction is not None:
161 | num = int(validation_fraction * 50000)
162 | train_indices = torch.arange(0, 50000 - num)
163 | valid_indices = torch.arange(50000 - num, 50000)
164 |
165 | train_sampler = SubsetRandomSampler(train_indices)
166 | valid_sampler = SubsetRandomSampler(valid_indices)
167 |
168 | valid_loader = DataLoader(
169 | dataset=valid_dataset,
170 | batch_size=batch_size,
171 | num_workers=num_workers,
172 | sampler=valid_sampler,
173 | )
174 |
175 | train_loader = DataLoader(
176 | dataset=train_dataset,
177 | batch_size=batch_size,
178 | num_workers=num_workers,
179 | drop_last=True,
180 | sampler=train_sampler,
181 | )
182 |
183 | else:
184 | train_loader = DataLoader(
185 | dataset=train_dataset,
186 | batch_size=batch_size,
187 | num_workers=num_workers,
188 | drop_last=True,
189 | shuffle=True,
190 | )
191 |
192 | test_loader = DataLoader(
193 | dataset=test_dataset,
194 | batch_size=batch_size,
195 | num_workers=num_workers,
196 | shuffle=False,
197 | )
198 |
199 | if validation_fraction is None:
200 | return train_loader, test_loader
201 | else:
202 | return train_loader, valid_loader, test_loader
203 |
204 |
205 | if __name__ == "__main__":
206 |
207 | parser = argparse.ArgumentParser()
208 | parser.add_argument(
209 | "--device", type=str, required=True, help="Which GPU device to use."
210 | )
211 |
212 | args = parser.parse_args()
213 |
214 | RANDOM_SEED = 123
215 | BATCH_SIZE = 32
216 | NUM_EPOCHS = 1
217 | DEVICE = torch.device(args.device)
218 |
219 | print('torch', torch.__version__)
220 | print('device', DEVICE)
221 |
222 | train_transforms = torchvision.transforms.Compose(
223 | [
224 | torchvision.transforms.Resize((256, 256)),
225 | torchvision.transforms.RandomCrop((224, 224)),
226 | torchvision.transforms.ToTensor(),
227 | torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
228 | ]
229 | )
230 |
231 | test_transforms = torchvision.transforms.Compose(
232 | [
233 | torchvision.transforms.Resize((256, 256)),
234 | torchvision.transforms.CenterCrop((224, 224)),
235 | torchvision.transforms.ToTensor(),
236 | torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
237 | ]
238 | )
239 |
240 | train_loader, valid_loader, test_loader = get_dataloaders_cifar10(
241 | batch_size=BATCH_SIZE,
242 | validation_fraction=0.1,
243 | train_transforms=train_transforms,
244 | test_transforms=test_transforms,
245 | num_workers=2,
246 | )
247 |
248 | model = torch.hub.load(
249 | "pytorch/vision:v0.11.0", "vgg16_bn", pretrained=False
250 | )
251 |
252 | model.classifier[-1] = torch.nn.Linear(
253 | in_features=4096, out_features=10 # as in original
254 | ) # number of class labels in Cifar-10)
255 |
256 | model = model.to(DEVICE)
257 |
258 | optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
259 |
260 | minibatch_loss_list, train_acc_list, valid_acc_list = train_classifier_simple_v2(
261 | model=model,
262 | num_epochs=NUM_EPOCHS,
263 | train_loader=train_loader,
264 | valid_loader=valid_loader,
265 | test_loader=test_loader,
266 | optimizer=optimizer,
267 | best_model_save_path=None,
268 | device=DEVICE,
269 | scheduler_on="valid_acc",
270 | logging_interval=100,
271 | )
272 |
--------------------------------------------------------------------------------
/categorical-features/data/iris_mod.csv:
--------------------------------------------------------------------------------
1 | Id,SepalLength[cm],SepalWidth[cm],PetalLength[cm],PetalWidth[cm],Color_IMadeThisUp,Species
2 | 1,5.1,3.5,1.4,0.2,red,Iris-setosa
3 | 2,4.9,3,1.4,0.2,red,Iris-setosa
4 | 3,4.7,3.2,1.3,0.2,red,Iris-setosa
5 | 4,4.6,3.1,1.5,0.2,red,Iris-setosa
6 | 5,5,3.6,1.4,0.2,red,Iris-setosa
7 | 6,5.4,3.9,1.7,0.4,red,Iris-setosa
8 | 7,4.6,3.4,1.4,0.3,red,Iris-setosa
9 | 8,5,3.4,1.5,0.2,blue,Iris-setosa
10 | 9,4.4,2.9,1.4,0.2,red,Iris-setosa
11 | 10,4.9,3.1,1.5,0.1,red,Iris-setosa
12 | 11,5.4,3.7,1.5,0.2,blue,Iris-setosa
13 | 12,4.8,3.4,1.6,0.2,red,Iris-setosa
14 | 13,4.8,3,1.4,0.1,red,Iris-setosa
15 | 14,4.3,3,1.1,0.1,red,Iris-setosa
16 | 15,5.8,4,1.2,0.2,red,Iris-setosa
17 | 16,5.7,4.4,1.5,0.4,red,Iris-setosa
18 | 17,5.4,3.9,1.3,0.4,red,Iris-setosa
19 | 18,5.1,3.5,1.4,0.3,red,Iris-setosa
20 | 19,5.7,3.8,1.7,0.3,red,Iris-setosa
21 | 20,5.1,3.8,1.5,0.3,blue,Iris-setosa
22 | 21,5.4,3.4,1.7,0.2,red,Iris-setosa
23 | 22,5.1,3.7,1.5,0.4,red,Iris-setosa
24 | 23,4.6,3.6,1,0.2,red,Iris-setosa
25 | 24,5.1,3.3,1.7,0.5,blue,Iris-setosa
26 | 25,4.8,3.4,1.9,0.2,red,Iris-setosa
27 | 26,5,3,1.6,0.2,red,Iris-setosa
28 | 27,5,3.4,1.6,0.4,red,Iris-setosa
29 | 28,5.2,3.5,1.5,0.2,red,Iris-setosa
30 | 29,5.2,3.4,1.4,0.2,red,Iris-setosa
31 | 30,4.7,3.2,1.6,0.2,violet,Iris-setosa
32 | 31,4.8,3.1,1.6,0.2,red,Iris-setosa
33 | 32,5.4,3.4,1.5,0.4,red,Iris-setosa
34 | 33,5.2,4.1,1.5,0.1,red,Iris-setosa
35 | 34,5.5,4.2,1.4,0.2,red,Iris-setosa
36 | 35,4.9,3.1,1.5,0.1,red,Iris-setosa
37 | 36,5,3.2,1.2,0.2,violet,Iris-setosa
38 | 37,5.5,3.5,1.3,0.2,red,Iris-setosa
39 | 38,4.9,3.1,1.5,0.1,red,Iris-setosa
40 | 39,4.4,3,1.3,0.2,red,Iris-setosa
41 | 40,5.1,3.4,1.5,0.2,red,Iris-setosa
42 | 41,5,3.5,1.3,0.3,red,Iris-setosa
43 | 42,4.5,2.3,1.3,0.3,red,Iris-setosa
44 | 43,4.4,3.2,1.3,0.2,red,Iris-setosa
45 | 44,5,3.5,1.6,0.6,red,Iris-setosa
46 | 45,5.1,3.8,1.9,0.4,red,Iris-setosa
47 | 46,4.8,3,1.4,0.3,red,Iris-setosa
48 | 47,5.1,3.8,1.6,0.2,red,Iris-setosa
49 | 48,4.6,3.2,1.4,0.2,red,Iris-setosa
50 | 49,5.3,3.7,1.5,0.2,red,Iris-setosa
51 | 50,5,3.3,1.4,0.2,red,Iris-setosa
52 | 51,7,3.2,4.7,1.4,blue,Iris-versicolor
53 | 52,6.4,3.2,4.5,1.5,blue,Iris-versicolor
54 | 53,6.9,3.1,4.9,1.5,blue,Iris-versicolor
55 | 54,5.5,2.3,4,1.3,blue,Iris-versicolor
56 | 55,6.5,2.8,4.6,1.5,blue,Iris-versicolor
57 | 56,5.7,2.8,4.5,1.3,blue,Iris-versicolor
58 | 57,6.3,3.3,4.7,1.6,blue,Iris-versicolor
59 | 58,4.9,2.4,3.3,1,blue,Iris-versicolor
60 | 59,6.6,2.9,4.6,1.3,blue,Iris-versicolor
61 | 60,5.2,2.7,3.9,1.4,blue,Iris-versicolor
62 | 61,5,2,3.5,1,blue,Iris-versicolor
63 | 62,5.9,3,4.2,1.5,blue,Iris-versicolor
64 | 63,6,2.2,4,1,blue,Iris-versicolor
65 | 64,6.1,2.9,4.7,1.4,blue,Iris-versicolor
66 | 65,5.6,2.9,3.6,1.3,blue,Iris-versicolor
67 | 66,6.7,3.1,4.4,1.4,red,Iris-versicolor
68 | 67,5.6,3,4.5,1.5,blue,Iris-versicolor
69 | 68,5.8,2.7,4.1,1,blue,Iris-versicolor
70 | 69,6.2,2.2,4.5,1.5,blue,Iris-versicolor
71 | 70,5.6,2.5,3.9,1.1,violet,Iris-versicolor
72 | 71,5.9,3.2,4.8,1.8,blue,Iris-versicolor
73 | 72,6.1,2.8,4,1.3,blue,Iris-versicolor
74 | 73,6.3,2.5,4.9,1.5,blue,Iris-versicolor
75 | 74,6.1,2.8,4.7,1.2,blue,Iris-versicolor
76 | 75,6.4,2.9,4.3,1.3,blue,Iris-versicolor
77 | 76,6.6,3,4.4,1.4,blue,Iris-versicolor
78 | 77,6.8,2.8,4.8,1.4,blue,Iris-versicolor
79 | 78,6.7,3,5,1.7,blue,Iris-versicolor
80 | 79,6,2.9,4.5,1.5,blue,Iris-versicolor
81 | 80,5.7,2.6,3.5,1,violet,Iris-versicolor
82 | 81,5.5,2.4,3.8,1.1,blue,Iris-versicolor
83 | 82,5.5,2.4,3.7,1,red,Iris-versicolor
84 | 83,5.8,2.7,3.9,1.2,blue,Iris-versicolor
85 | 84,6,2.7,5.1,1.6,blue,Iris-versicolor
86 | 85,5.4,3,4.5,1.5,blue,Iris-versicolor
87 | 86,6,3.4,4.5,1.6,blue,Iris-versicolor
88 | 87,6.7,3.1,4.7,1.5,blue,Iris-versicolor
89 | 88,6.3,2.3,4.4,1.3,violet,Iris-versicolor
90 | 89,5.6,3,4.1,1.3,blue,Iris-versicolor
91 | 90,5.5,2.5,4,1.3,blue,Iris-versicolor
92 | 91,5.5,2.6,4.4,1.2,blue,Iris-versicolor
93 | 92,6.1,3,4.6,1.4,blue,Iris-versicolor
94 | 93,5.8,2.6,4,1.2,violet,Iris-versicolor
95 | 94,5,2.3,3.3,1,blue,Iris-versicolor
96 | 95,5.6,2.7,4.2,1.3,violet,Iris-versicolor
97 | 96,5.7,3,4.2,1.2,blue,Iris-versicolor
98 | 97,5.7,2.9,4.2,1.3,blue,Iris-versicolor
99 | 98,6.2,2.9,4.3,1.3,blue,Iris-versicolor
100 | 99,5.1,2.5,3,1.1,blue,Iris-versicolor
101 | 100,5.7,2.8,4.1,1.3,blue,Iris-versicolor
102 | 101,6.3,3.3,6,2.5,violet,Iris-virginica
103 | 102,5.8,2.7,5.1,1.9,violet,Iris-virginica
104 | 103,7.1,3,5.9,2.1,violet,Iris-virginica
105 | 104,6.3,2.9,5.6,1.8,violet,Iris-virginica
106 | 105,6.5,3,5.8,2.2,violet,Iris-virginica
107 | 106,7.6,3,6.6,2.1,violet,Iris-virginica
108 | 107,4.9,2.5,4.5,1.7,violet,Iris-virginica
109 | 108,7.3,2.9,6.3,1.8,violet,Iris-virginica
110 | 109,6.7,2.5,5.8,1.8,violet,Iris-virginica
111 | 110,7.2,3.6,6.1,2.5,violet,Iris-virginica
112 | 111,6.5,3.2,5.1,2,violet,Iris-virginica
113 | 112,6.4,2.7,5.3,1.9,violet,Iris-virginica
114 | 113,6.8,3,5.5,2.1,violet,Iris-virginica
115 | 114,5.7,2.5,5,2,violet,Iris-virginica
116 | 115,5.8,2.8,5.1,2.4,violet,Iris-virginica
117 | 116,6.4,3.2,5.3,2.3,violet,Iris-virginica
118 | 117,6.5,3,5.5,1.8,violet,Iris-virginica
119 | 118,7.7,3.8,6.7,2.2,violet,Iris-virginica
120 | 119,7.7,2.6,6.9,2.3,violet,Iris-virginica
121 | 120,6,2.2,5,1.5,violet,Iris-virginica
122 | 121,6.9,3.2,5.7,2.3,blue,Iris-virginica
123 | 122,5.6,2.8,4.9,2,violet,Iris-virginica
124 | 123,7.7,2.8,6.7,2,violet,Iris-virginica
125 | 124,6.3,2.7,4.9,1.8,violet,Iris-virginica
126 | 125,6.7,3.3,5.7,2.1,blue,Iris-virginica
127 | 126,7.2,3.2,6,1.8,violet,Iris-virginica
128 | 127,6.2,2.8,4.8,1.8,violet,Iris-virginica
129 | 128,6.1,3,4.9,1.8,violet,Iris-virginica
130 | 129,6.4,2.8,5.6,2.1,blue,Iris-virginica
131 | 130,7.2,3,5.8,1.6,violet,Iris-virginica
132 | 131,7.4,2.8,6.1,1.9,violet,Iris-virginica
133 | 132,7.9,3.8,6.4,2,violet,Iris-virginica
134 | 133,6.4,2.8,5.6,2.2,violet,Iris-virginica
135 | 134,6.3,2.8,5.1,1.5,red,Iris-virginica
136 | 135,6.1,2.6,5.6,1.4,violet,Iris-virginica
137 | 136,7.7,3,6.1,2.3,violet,Iris-virginica
138 | 137,6.3,3.4,5.6,2.4,violet,Iris-virginica
139 | 138,6.4,3.1,5.5,1.8,violet,Iris-virginica
140 | 139,6,3,4.8,1.8,blue,Iris-virginica
141 | 140,6.9,3.1,5.4,2.1,violet,Iris-virginica
142 | 141,6.7,3.1,5.6,2.4,violet,Iris-virginica
143 | 142,6.9,3.1,5.1,2.3,violet,Iris-virginica
144 | 143,5.8,2.7,5.1,1.9,violet,Iris-virginica
145 | 144,6.8,3.2,5.9,2.3,violet,Iris-virginica
146 | 145,6.7,3.3,5.7,2.5,violet,Iris-virginica
147 | 146,6.7,3,5.2,2.3,violet,Iris-virginica
148 | 147,6.3,2.5,5,1.9,violet,Iris-virginica
149 | 148,6.5,3,5.2,2,blue,Iris-virginica
150 | 149,6.2,3.4,5.4,2.3,violet,Iris-virginica
151 | 150,5.9,3,5.1,1.8,red,Iris-virginica
--------------------------------------------------------------------------------
/categorical-features/sklearn-onehot-encoding-mixedtype-df.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "d180958e-8362-4453-ad21-78ec618bc624",
6 | "metadata": {},
7 | "source": [
8 | "# OneHot Encoding in Scikit-Learn with DataFrames of Mixed Column Types"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "11bdfc69-a04f-462e-8c14-c0b66dfd1796",
14 | "metadata": {},
15 | "source": [
16 | "## Some Toydata"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "id": "ec0cb03a-9a9a-4cad-9716-40fc29641f9a",
22 | "metadata": {},
23 | "source": [
24 | "- Imagine we have some dataset that consists of both numerical and categorical features.\n",
25 | "- And we just want to convert the categorical features into a onehot encoding (while leaving the numerical features untouched)"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 1,
31 | "id": "61f31b73-d486-4bc5-876a-86636c1acb86",
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "import pandas as pd"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 2,
41 | "id": "02e244fd-76b0-430f-a002-4291d7d687e3",
42 | "metadata": {},
43 | "outputs": [
44 | {
45 | "data": {
46 | "text/html": [
47 | "
\n",
48 | "\n",
61 | "
\n",
62 | " \n",
63 | " \n",
64 | " | \n",
65 | " numerical | \n",
66 | " categorical | \n",
67 | "
\n",
68 | " \n",
69 | " \n",
70 | " \n",
71 | " 0 | \n",
72 | " 1.1 | \n",
73 | " b | \n",
74 | "
\n",
75 | " \n",
76 | " 1 | \n",
77 | " 2.1 | \n",
78 | " b | \n",
79 | "
\n",
80 | " \n",
81 | " 2 | \n",
82 | " 3.1 | \n",
83 | " b | \n",
84 | "
\n",
85 | " \n",
86 | " 3 | \n",
87 | " 4.2 | \n",
88 | " b | \n",
89 | "
\n",
90 | " \n",
91 | " 4 | \n",
92 | " 5.1 | \n",
93 | " a | \n",
94 | "
\n",
95 | " \n",
96 | " 5 | \n",
97 | " 6.1 | \n",
98 | " a | \n",
99 | "
\n",
100 | " \n",
101 | " 6 | \n",
102 | " 7.1 | \n",
103 | " a | \n",
104 | "
\n",
105 | " \n",
106 | " 7 | \n",
107 | " 8.1 | \n",
108 | " a | \n",
109 | "
\n",
110 | " \n",
111 | " 8 | \n",
112 | " 1.2 | \n",
113 | " c | \n",
114 | "
\n",
115 | " \n",
116 | " 9 | \n",
117 | " 2.1 | \n",
118 | " c | \n",
119 | "
\n",
120 | " \n",
121 | " 10 | \n",
122 | " 3.1 | \n",
123 | " c | \n",
124 | "
\n",
125 | " \n",
126 | " 11 | \n",
127 | " 4.1 | \n",
128 | " c | \n",
129 | "
\n",
130 | " \n",
131 | "
\n",
132 | "
"
133 | ],
134 | "text/plain": [
135 | " numerical categorical\n",
136 | "0 1.1 b\n",
137 | "1 2.1 b\n",
138 | "2 3.1 b\n",
139 | "3 4.2 b\n",
140 | "4 5.1 a\n",
141 | "5 6.1 a\n",
142 | "6 7.1 a\n",
143 | "7 8.1 a\n",
144 | "8 1.2 c\n",
145 | "9 2.1 c\n",
146 | "10 3.1 c\n",
147 | "11 4.1 c"
148 | ]
149 | },
150 | "execution_count": 2,
151 | "metadata": {},
152 | "output_type": "execute_result"
153 | }
154 | ],
155 | "source": [
156 | "feature_1 = [\n",
157 | " 1.1, 2.1, 3.1, 4.2,\n",
158 | " 5.1, 6.1, 7.1, 8.1,\n",
159 | " 1.2, 2.1, 3.1, 4.1\n",
160 | "]\n",
161 | "\n",
162 | "feature_2 = [\n",
163 | " 'b', 'b', 'b', 'b',\n",
164 | " 'a', 'a', 'a', 'a',\n",
165 | " 'c', 'c', 'c', 'c'\n",
166 | "]\n",
167 | "\n",
168 | "df = pd.DataFrame({'numerical': feature_1, 'categorical': feature_2})\n",
169 | "df"
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "id": "e8989975-b534-4c3f-bb4c-cbed1b7acaa5",
175 | "metadata": {},
176 | "source": [
177 | "## Onehot Encoding"
178 | ]
179 | },
180 | {
181 | "cell_type": "markdown",
182 | "id": "26d8a4c3-a5b9-449e-ab27-421b93f95d9b",
183 | "metadata": {},
184 | "source": [
185 | "- We can use e.g., scikit-learn's [OneHotEncoder](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html) to expand the categorical column into onehot-encoded ones\n",
186 | "- By default, the `OneHotEncoder` will expand all columns into categorical ones (this includes the numerical ones), which is not what we want if we have mixed-type datasets\n",
187 | "- We can use the [ColumnTransformer](https://scikit-learn.org/stable/modules/generated/sklearn.compose.ColumnTransformer.html) to select specific columns we want to transform, though"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 3,
193 | "id": "90ed36b9-a326-42dc-b7e4-aae1c4f95e50",
194 | "metadata": {},
195 | "outputs": [
196 | {
197 | "data": {
198 | "text/plain": [
199 | "array([[1. , 0. , 1.1],\n",
200 | " [1. , 0. , 2.1],\n",
201 | " [1. , 0. , 3.1],\n",
202 | " [1. , 0. , 4.2],\n",
203 | " [0. , 0. , 5.1],\n",
204 | " [0. , 0. , 6.1],\n",
205 | " [0. , 0. , 7.1],\n",
206 | " [0. , 0. , 8.1],\n",
207 | " [0. , 1. , 1.2],\n",
208 | " [0. , 1. , 2.1],\n",
209 | " [0. , 1. , 3.1],\n",
210 | " [0. , 1. , 4.1]])"
211 | ]
212 | },
213 | "execution_count": 3,
214 | "metadata": {},
215 | "output_type": "execute_result"
216 | }
217 | ],
218 | "source": [
219 | "import sklearn\n",
220 | "from sklearn.compose import ColumnTransformer\n",
221 | "from sklearn.pipeline import make_pipeline\n",
222 | "from sklearn.preprocessing import OneHotEncoder\n",
223 | "\n",
224 | "\n",
225 | "\n",
226 | "ohe = OneHotEncoder(sparse=False, drop='first', dtype='float')\n",
227 | "\n",
228 | "\n",
229 | "categorical_features = ['categorical']\n",
230 | "\n",
231 | "col_transformer = ColumnTransformer(\n",
232 | " transformers=[\n",
233 | " ('cat', ohe, categorical_features)],\n",
234 | " # include the numerical column(s) via passthrough:\n",
235 | " remainder='passthrough' \n",
236 | ")\n",
237 | "\n",
238 | "col_transformer.fit(df)\n",
239 | "X_t = col_transformer.transform(df)\n",
240 | "X_t"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": 4,
246 | "id": "f9110d53-c0af-4929-ad53-a54a7459dbc3",
247 | "metadata": {},
248 | "outputs": [
249 | {
250 | "name": "stdout",
251 | "output_type": "stream",
252 | "text": [
253 | "pandas : 1.4.0\n",
254 | "sklearn: 1.0.2\n",
255 | "\n"
256 | ]
257 | }
258 | ],
259 | "source": [
260 | "%load_ext watermark\n",
261 | "%watermark --iversions"
262 | ]
263 | }
264 | ],
265 | "metadata": {
266 | "kernelspec": {
267 | "display_name": "Python 3.9.2 64-bit ('base': conda)",
268 | "language": "python",
269 | "name": "python392jvsc74a57bd0249cfc85c6a0073df6bca89c83e3180d730f84f7e1f446fbe710b75104ecfa4f"
270 | },
271 | "language_info": {
272 | "codemirror_mode": {
273 | "name": "ipython",
274 | "version": 3
275 | },
276 | "file_extension": ".py",
277 | "mimetype": "text/x-python",
278 | "name": "python",
279 | "nbconvert_exporter": "python",
280 | "pygments_lexer": "ipython3",
281 | "version": "3.9.7"
282 | }
283 | },
284 | "nbformat": 4,
285 | "nbformat_minor": 5
286 | }
287 |
--------------------------------------------------------------------------------
/cloud-resources/xgboost-lightning-gpu/README.md:
--------------------------------------------------------------------------------
1 | # Training an XGBoost Classifier Using Cloud GPUs Without Worrying About Infrastructure
2 |
3 |
4 |
5 | Code accompanying the blog article: [Training an XGBoost Classifier Using Cloud GPUs Without Worrying About Infrastructure](https://sebastianraschka.com/blog/2023/xgboost-gpu.html).
6 |
7 |
8 |
9 | Run code as follows:
10 |
11 |
12 |
13 | ```pip install lightning
14 | # run XGBoost classifier locally
15 | python my_xgboost_classifier.py
16 |
17 | # run XGBoost classifier locally via Lightning (if you have a GPU)
18 | pip install lightning
19 | lightning run app xgboost-cloud-gpu.py --setup
20 |
21 | # run XGBoost in Lightning cloud on a V100
22 | lightning run app xgboost-cloud-gpu.py --cloud
23 | ```
24 |
25 |
--------------------------------------------------------------------------------
/cloud-resources/xgboost-lightning-gpu/my_xgboost_classifier.py:
--------------------------------------------------------------------------------
1 | from sklearn import datasets
2 | from sklearn.model_selection import train_test_split
3 | from sklearn.metrics import accuracy_score
4 | from xgboost import XGBClassifier
5 | from joblib import dump
6 |
7 |
8 | def run_classifier(save_as="my_model.joblib", use_gpu=False):
9 | digits = datasets.load_digits()
10 | features, targets = digits.images, digits.target
11 | features = features.reshape(-1, 8*8)
12 |
13 | X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=123)
14 |
15 | if use_gpu:
16 | model = XGBClassifier(tree_method='gpu_hist', gpu_id=0)
17 | else:
18 | model = XGBClassifier()
19 |
20 | model.fit(X_train, y_train)
21 | y_pred = model.predict(X_test)
22 |
23 | accuracy = accuracy_score(y_test, y_pred)
24 | print(f"Accuracy: {accuracy * 100.0:.2f}%")
25 |
26 | dump(model, filename=save_as)
27 |
28 |
29 | if __name__ == "__main__":
30 | run_classifier()
31 |
--------------------------------------------------------------------------------
/cloud-resources/xgboost-lightning-gpu/xgboost-cloud-gpu.py:
--------------------------------------------------------------------------------
1 | #!pip install xgboost
2 | #!pip install scikit-learn
3 |
4 | import lightning as L
5 | from lightning.app.storage import Drive
6 | from my_xgboost_classifier import run_classifier
7 |
8 |
9 | class RunCode(L.LightningWork):
10 | def __init__(self):
11 |
12 | # available GPUs and costs: https://lightning.ai/pricing/consumption-rates
13 | super().__init__(cloud_compute=L.CloudCompute("gpu-fast", disk_size=10))
14 |
15 | # storage for outputs
16 | self.model_storage = Drive("lit://checkpoints")
17 |
18 | def run(self):
19 | # run model code
20 | model_path = "my_model.joblib"
21 | run_classifier(save_as=model_path, use_gpu=True)
22 | self.model_storage.put(model_path)
23 |
24 |
25 | component = RunCode()
26 | app = L.LightningApp(component)
--------------------------------------------------------------------------------
/demos/data/cat/cat.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/demos/data/cat/cat.jpeg
--------------------------------------------------------------------------------
/evaluation/ci-for-ml/ci-simulation-repeated.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "81dd54a3-d44a-41ce-aed1-9f3eda29482b",
6 | "metadata": {},
7 | "source": [
8 | "# CI Simulation Repeated (Supplementary Material)"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "539ff52f-0cce-4cb9-aff5-91705ee69ad4",
14 | "metadata": {},
15 | "source": [
16 | "- This notebook supplements the [confidence-intervals-for-ml.ipynb](confidence-intervals-for-ml.ipynb) with a case study.\n",
17 | "\n",
18 | "- In particular, this notebook repeats the CI simulation from [ci-simulation.ipynb](ci-simulation.ipynb) multiple times to get a sense of which CI method is good at capturing the true model performance.\n",
19 | "\n",
20 | "- Just to make sure that there are no silent bugs due to variable naming & reuse, each experiment is in a separate Python script."
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "id": "2a37522e-fa74-4e44-b398-59b7a975d67f",
26 | "metadata": {},
27 | "source": [
28 | "## 1) Normal Approximation Interval Based on the Test Set"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 1,
34 | "id": "b8000642-fb48-499e-828f-24396bdffbec",
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "name": "stdout",
39 | "output_type": "stream",
40 | "text": [
41 | "95.6% of 95% confidence intervals contain the true accuracy.\n"
42 | ]
43 | }
44 | ],
45 | "source": [
46 | "!python ci-simulation-repeated/1_normal_approx.py --repetitions 1000"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "id": "75c876a5-817f-451f-81b0-5af229451e94",
52 | "metadata": {},
53 | "source": [
54 | "## 2.1) A *t* Confidence Interval Interval from Bootstrap Samples"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 2,
60 | "id": "c5540a38-f241-4429-a049-2519291319c0",
61 | "metadata": {},
62 | "outputs": [
63 | {
64 | "name": "stdout",
65 | "output_type": "stream",
66 | "text": [
67 | "98.5% of 95% confidence intervals contain the true accuracy.\n"
68 | ]
69 | }
70 | ],
71 | "source": [
72 | "!python ci-simulation-repeated/2.1_bootstrap_t.py --repetitions 1000"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "id": "cbb37329-4482-4dd4-a6eb-ac76f5ada83c",
78 | "metadata": {},
79 | "source": [
80 | "## 2.2) Bootstrap Percentile Method"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 3,
86 | "id": "0c6afc91-c960-4959-b7d6-8e212a545281",
87 | "metadata": {},
88 | "outputs": [
89 | {
90 | "name": "stdout",
91 | "output_type": "stream",
92 | "text": [
93 | "98.0% of 95% confidence intervals contain the true accuracy.\n"
94 | ]
95 | }
96 | ],
97 | "source": [
98 | "!python ci-simulation-repeated/2.2_bootstrap_percentile.py --repetitions 1000"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "id": "32efa9f6-5190-4726-9c95-ec019dae9960",
104 | "metadata": {},
105 | "source": [
106 | "## 2.3) .632 Bootstrap"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 4,
112 | "id": "4f520295-dbfc-49a7-b8a4-8dea88d6442e",
113 | "metadata": {},
114 | "outputs": [
115 | {
116 | "name": "stdout",
117 | "output_type": "stream",
118 | "text": [
119 | "83.2% of 95% confidence intervals contain the true accuracy.\n"
120 | ]
121 | }
122 | ],
123 | "source": [
124 | "!python ci-simulation-repeated/2.3_bootstrap_632.py --repetitions 1000"
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "id": "eeb6e55a-445d-4f65-b282-85cc2f6a2303",
130 | "metadata": {},
131 | "source": [
132 | "## 3) Bootstrapping the Test Set predictions"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 5,
138 | "id": "7edb473f-234e-4e4f-b561-45ca5344fcfc",
139 | "metadata": {},
140 | "outputs": [
141 | {
142 | "name": "stdout",
143 | "output_type": "stream",
144 | "text": [
145 | "94.5% of 95% confidence intervals contain the true accuracy.\n"
146 | ]
147 | }
148 | ],
149 | "source": [
150 | "!python ci-simulation-repeated/3_bootstrap_test.py --repetitions 1000"
151 | ]
152 | }
153 | ],
154 | "metadata": {
155 | "kernelspec": {
156 | "display_name": "Python 3 (ipykernel)",
157 | "language": "python",
158 | "name": "python3"
159 | },
160 | "language_info": {
161 | "codemirror_mode": {
162 | "name": "ipython",
163 | "version": 3
164 | },
165 | "file_extension": ".py",
166 | "mimetype": "text/x-python",
167 | "name": "python",
168 | "nbconvert_exporter": "python",
169 | "pygments_lexer": "ipython3",
170 | "version": "3.9.7"
171 | }
172 | },
173 | "nbformat": 4,
174 | "nbformat_minor": 5
175 | }
176 |
--------------------------------------------------------------------------------
/evaluation/ci-for-ml/ci-simulation-repeated/1_normal_approx.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from get_dataset import get_dataset
3 | from sklearn.tree import DecisionTreeClassifier
4 | import scipy.stats
5 | import numpy as np
6 |
7 |
8 | def run_method(num_repetitions):
9 | is_inside_list = []
10 |
11 | for i in range(num_repetitions):
12 |
13 | X_train, y_train, X_test, y_test, X_huge_test, y_huge_test = get_dataset(
14 | random_seed=i
15 | )
16 |
17 | clf = DecisionTreeClassifier(random_state=123, max_depth=3)
18 | clf.fit(X_train, y_train)
19 |
20 | acc_test_true = clf.score(X_huge_test, y_huge_test)
21 |
22 | #####################################################
23 | # Compute CI
24 | #####################################################
25 |
26 | confidence = 0.95 # Change to your desired confidence level
27 | z_value = scipy.stats.norm.ppf((1 + confidence) / 2.0)
28 | acc_test = clf.score(X_test, y_test)
29 | ci_length = z_value * np.sqrt((acc_test * (1 - acc_test)) / y_test.shape[0])
30 |
31 | ci_lower = acc_test - ci_length
32 | ci_upper = acc_test + ci_length
33 |
34 | # Check CI
35 | is_inside = acc_test_true >= ci_lower and acc_test_true <= ci_upper
36 |
37 | is_inside_list.append(is_inside)
38 |
39 | return is_inside_list
40 |
41 |
42 | if __name__ == "__main__":
43 |
44 | parser = argparse.ArgumentParser()
45 | parser.add_argument(
46 | "-r",
47 | "--repetitions",
48 | required=True,
49 | type=int,
50 | )
51 |
52 | args = parser.parse_args()
53 | is_inside_list = run_method(args.repetitions)
54 |
55 | print(
56 | f"{np.mean(is_inside_list)*100}% of 95% confidence"
57 | " intervals contain the true accuracy."
58 | )
59 |
--------------------------------------------------------------------------------
/evaluation/ci-for-ml/ci-simulation-repeated/2.1_bootstrap_t.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from get_dataset import get_dataset
3 | from sklearn.tree import DecisionTreeClassifier
4 | import scipy.stats
5 | import numpy as np
6 |
7 |
8 | def run_method(num_repetitions):
9 | is_inside_list = []
10 |
11 | for i in range(num_repetitions):
12 |
13 | X_train, y_train, X_test, y_test, X_huge_test, y_huge_test = get_dataset(
14 | random_seed=i
15 | )
16 |
17 | clf = DecisionTreeClassifier(random_state=123, max_depth=3)
18 | clf.fit(X_train, y_train)
19 |
20 | acc_test_true = clf.score(X_huge_test, y_huge_test)
21 |
22 | #####################################################
23 | # Compute CI
24 | #####################################################
25 |
26 | rng = np.random.RandomState(seed=12345)
27 | idx = np.arange(y_train.shape[0])
28 |
29 | bootstrap_train_accuracies = []
30 | bootstrap_rounds = 200
31 |
32 | for i in range(bootstrap_rounds):
33 |
34 | train_idx = rng.choice(idx, size=idx.shape[0], replace=True)
35 | valid_idx = np.setdiff1d(idx, train_idx, assume_unique=False)
36 |
37 | boot_train_X, boot_train_y = X_train[train_idx], y_train[train_idx]
38 | boot_valid_X, boot_valid_y = X_train[valid_idx], y_train[valid_idx]
39 |
40 | clf.fit(boot_train_X, boot_train_y)
41 | acc = clf.score(boot_valid_X, boot_valid_y)
42 | bootstrap_train_accuracies.append(acc)
43 |
44 | bootstrap_train_mean = np.mean(bootstrap_train_accuracies)
45 |
46 | confidence = 0.95 # Change to your desired confidence level
47 | t_value = scipy.stats.t.ppf((1 + confidence) / 2.0, df=bootstrap_rounds - 1)
48 |
49 | se = 0.0
50 | for acc in bootstrap_train_accuracies:
51 | se += (acc - bootstrap_train_mean) ** 2
52 | se = np.sqrt((1.0 / (bootstrap_rounds - 1)) * se)
53 |
54 | ci_length = t_value * se
55 |
56 | ci_lower = bootstrap_train_mean - ci_length
57 | ci_upper = bootstrap_train_mean + ci_length
58 |
59 | # Check CI
60 | is_inside = acc_test_true >= ci_lower and acc_test_true <= ci_upper
61 |
62 | is_inside_list.append(is_inside)
63 |
64 | return is_inside_list
65 |
66 |
67 | if __name__ == "__main__":
68 |
69 | parser = argparse.ArgumentParser()
70 | parser.add_argument(
71 | "-r",
72 | "--repetitions",
73 | required=True,
74 | type=int,
75 | )
76 |
77 | args = parser.parse_args()
78 | is_inside_list = run_method(args.repetitions)
79 |
80 | print(
81 | f"{np.mean(is_inside_list)*100}% of 95% confidence"
82 | " intervals contain the true accuracy."
83 | )
84 |
--------------------------------------------------------------------------------
/evaluation/ci-for-ml/ci-simulation-repeated/2.2_bootstrap_percentile.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from get_dataset import get_dataset
3 | from sklearn.tree import DecisionTreeClassifier
4 | import numpy as np
5 |
6 |
7 | def run_method(num_repetitions):
8 | is_inside_list = []
9 |
10 | for i in range(num_repetitions):
11 |
12 | X_train, y_train, X_test, y_test, X_huge_test, y_huge_test = get_dataset(
13 | random_seed=i
14 | )
15 |
16 | clf = DecisionTreeClassifier(random_state=123, max_depth=3)
17 | clf.fit(X_train, y_train)
18 |
19 | acc_test_true = clf.score(X_huge_test, y_huge_test)
20 |
21 | #####################################################
22 | # Compute CI
23 | #####################################################
24 |
25 | rng = np.random.RandomState(seed=12345)
26 | idx = np.arange(y_train.shape[0])
27 |
28 | bootstrap_train_accuracies = []
29 | bootstrap_rounds = 200
30 |
31 | for i in range(bootstrap_rounds):
32 |
33 | train_idx = rng.choice(idx, size=idx.shape[0], replace=True)
34 | valid_idx = np.setdiff1d(idx, train_idx, assume_unique=False)
35 |
36 | boot_train_X, boot_train_y = X_train[train_idx], y_train[train_idx]
37 | boot_valid_X, boot_valid_y = X_train[valid_idx], y_train[valid_idx]
38 |
39 | clf.fit(boot_train_X, boot_train_y)
40 | acc = clf.score(boot_valid_X, boot_valid_y)
41 | bootstrap_train_accuracies.append(acc)
42 |
43 | ci_lower = np.percentile(bootstrap_train_accuracies, 2.5)
44 | ci_upper = np.percentile(bootstrap_train_accuracies, 97.5)
45 |
46 | # Check CI
47 | is_inside = acc_test_true >= ci_lower and acc_test_true <= ci_upper
48 |
49 | is_inside_list.append(is_inside)
50 |
51 | return is_inside_list
52 |
53 |
54 | if __name__ == "__main__":
55 |
56 | parser = argparse.ArgumentParser()
57 | parser.add_argument(
58 | "-r",
59 | "--repetitions",
60 | required=True,
61 | type=int,
62 | )
63 |
64 | args = parser.parse_args()
65 | is_inside_list = run_method(args.repetitions)
66 |
67 | print(
68 | f"{np.mean(is_inside_list)*100}% of 95% confidence"
69 | " intervals contain the true accuracy."
70 | )
71 |
--------------------------------------------------------------------------------
/evaluation/ci-for-ml/ci-simulation-repeated/2.3_bootstrap_632.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from get_dataset import get_dataset
3 | from sklearn.tree import DecisionTreeClassifier
4 | import numpy as np
5 |
6 |
7 | def run_method(num_repetitions):
8 | is_inside_list = []
9 |
10 | for i in range(num_repetitions):
11 |
12 | X_train, y_train, X_test, y_test, X_huge_test, y_huge_test = get_dataset(
13 | random_seed=i
14 | )
15 |
16 | clf = DecisionTreeClassifier(random_state=123, max_depth=3)
17 | clf.fit(X_train, y_train)
18 |
19 | acc_test_true = clf.score(X_huge_test, y_huge_test)
20 |
21 | #####################################################
22 | # Compute CI
23 | #####################################################
24 |
25 | rng = np.random.RandomState(seed=12345)
26 | idx = np.arange(y_train.shape[0])
27 |
28 | bootstrap_train_accuracies = []
29 | bootstrap_rounds = 200
30 | weight = 0.632
31 |
32 | for i in range(bootstrap_rounds):
33 |
34 | train_idx = rng.choice(idx, size=idx.shape[0], replace=True)
35 | valid_idx = np.setdiff1d(idx, train_idx, assume_unique=False)
36 |
37 | boot_train_X, boot_train_y = X_train[train_idx], y_train[train_idx]
38 | boot_valid_X, boot_valid_y = X_train[valid_idx], y_train[valid_idx]
39 |
40 | clf.fit(boot_train_X, boot_train_y)
41 | train_acc = clf.score(X_train, y_train)
42 | valid_acc = clf.score(boot_valid_X, boot_valid_y)
43 | acc = weight * train_acc + (1.0 - weight) * valid_acc
44 |
45 | bootstrap_train_accuracies.append(acc)
46 |
47 | ci_lower = np.percentile(bootstrap_train_accuracies, 2.5)
48 | ci_upper = np.percentile(bootstrap_train_accuracies, 97.5)
49 |
50 | # Check CI
51 | is_inside = acc_test_true >= ci_lower and acc_test_true <= ci_upper
52 |
53 | is_inside_list.append(is_inside)
54 |
55 | return is_inside_list
56 |
57 |
58 | if __name__ == "__main__":
59 |
60 | parser = argparse.ArgumentParser()
61 | parser.add_argument(
62 | "-r",
63 | "--repetitions",
64 | required=True,
65 | type=int,
66 | )
67 |
68 | args = parser.parse_args()
69 | is_inside_list = run_method(args.repetitions)
70 |
71 | print(
72 | f"{np.mean(is_inside_list)*100}% of 95% confidence"
73 | " intervals contain the true accuracy."
74 | )
75 |
--------------------------------------------------------------------------------
/evaluation/ci-for-ml/ci-simulation-repeated/3_bootstrap_test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from get_dataset import get_dataset
3 | from sklearn.tree import DecisionTreeClassifier
4 | import numpy as np
5 |
6 |
7 | def run_method(num_repetitions):
8 | is_inside_list = []
9 |
10 | for i in range(num_repetitions):
11 |
12 | X_train, y_train, X_test, y_test, X_huge_test, y_huge_test = get_dataset(
13 | random_seed=i
14 | )
15 |
16 | clf = DecisionTreeClassifier(random_state=123, max_depth=3)
17 | clf.fit(X_train, y_train)
18 |
19 | acc_test_true = clf.score(X_huge_test, y_huge_test)
20 |
21 | #####################################################
22 | # Compute CI
23 | #####################################################
24 |
25 | predictions_test = clf.predict(X_test)
26 |
27 | rng = np.random.RandomState(seed=12345)
28 | idx = np.arange(y_test.shape[0])
29 |
30 | test_accuracies = []
31 |
32 | for i in range(200):
33 |
34 | pred_idx = rng.choice(idx, size=idx.shape[0], replace=True)
35 | acc_test_boot = np.mean(predictions_test[pred_idx] == y_test[pred_idx])
36 | test_accuracies.append(acc_test_boot)
37 |
38 | ci_lower = np.percentile(test_accuracies, 2.5)
39 | ci_upper = np.percentile(test_accuracies, 97.5)
40 |
41 | # Check CI
42 | is_inside = acc_test_true >= ci_lower and acc_test_true <= ci_upper
43 | is_inside_list.append(is_inside)
44 |
45 | return is_inside_list
46 |
47 |
48 | if __name__ == "__main__":
49 |
50 | parser = argparse.ArgumentParser()
51 | parser.add_argument(
52 | "-r",
53 | "--repetitions",
54 | required=True,
55 | type=int,
56 | )
57 |
58 | args = parser.parse_args()
59 | is_inside_list = run_method(args.repetitions)
60 |
61 | print(
62 | f"{np.mean(is_inside_list)*100}% of 95% confidence"
63 | " intervals contain the true accuracy."
64 | )
65 |
--------------------------------------------------------------------------------
/evaluation/ci-for-ml/ci-simulation-repeated/get_dataset.py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import make_classification
2 |
3 |
4 | def get_dataset(random_seed):
5 |
6 | X, y = make_classification(
7 | n_samples=10_002_000,
8 | n_features=5,
9 | n_redundant=2,
10 | n_classes=2,
11 | n_clusters_per_class=1,
12 | random_state=random_seed,
13 | flip_y=0.25,
14 | )
15 |
16 | X_train = X[:1_000]
17 | y_train = y[:1_000]
18 |
19 | X_test = X[1_000:2_000]
20 | y_test = y[1_000:2_000]
21 |
22 | X_huge_test = X[2_000:]
23 | y_huge_test = y[2_000:]
24 |
25 | return X_train, y_train, X_test, y_test, X_huge_test, y_huge_test
26 |
--------------------------------------------------------------------------------
/evaluation/lightning-jupyter-tensorboard/shared_utilities.py:
--------------------------------------------------------------------------------
1 | import lightning as L
2 | import torch
3 | import torch.nn.functional as F
4 | import torchmetrics
5 | from torch.utils.data import DataLoader
6 | from torch.utils.data.dataset import random_split
7 | from torchvision import datasets, transforms
8 |
9 |
10 | class PyTorchMLP(torch.nn.Module):
11 | def __init__(self, num_features, num_classes):
12 | super().__init__()
13 |
14 | self.all_layers = torch.nn.Sequential(
15 | # 1st hidden layer
16 | torch.nn.Linear(num_features, 100),
17 | torch.nn.BatchNorm1d(100),
18 | torch.nn.ReLU(),
19 |
20 | # 2nd hidden layer
21 | torch.nn.Linear(100, 50),
22 | torch.nn.BatchNorm1d(50),
23 | torch.nn.ReLU(),
24 |
25 | # output layer
26 | torch.nn.Linear(50, num_classes),
27 | )
28 |
29 | def forward(self, x):
30 | x = torch.flatten(x, start_dim=1)
31 | logits = self.all_layers(x)
32 | return logits
33 |
34 |
35 | class LightningModel(L.LightningModule):
36 | def __init__(self, model, learning_rate):
37 | super().__init__()
38 |
39 | self.learning_rate = learning_rate
40 | self.model = model
41 |
42 | self.save_hyperparameters(ignore=["model"])
43 |
44 | self.train_acc = torchmetrics.Accuracy()
45 | self.val_acc = torchmetrics.Accuracy()
46 | self.test_acc = torchmetrics.Accuracy()
47 |
48 | def forward(self, x):
49 | return self.model(x)
50 |
51 | def _shared_step(self, batch):
52 | features, true_labels = batch
53 | logits = self(features)
54 |
55 | loss = F.cross_entropy(logits, true_labels)
56 | predicted_labels = torch.argmax(logits, dim=1)
57 | return loss, true_labels, predicted_labels
58 |
59 | def training_step(self, batch, batch_idx):
60 | loss, true_labels, predicted_labels = self._shared_step(batch)
61 |
62 | self.log("loss/train", loss)
63 | self.train_acc(predicted_labels, true_labels)
64 | self.log(
65 | "acc/train", self.train_acc, prog_bar=True, on_epoch=True, on_step=False
66 | )
67 | return loss
68 |
69 | def validation_step(self, batch, batch_idx):
70 | loss, true_labels, predicted_labels = self._shared_step(batch)
71 |
72 | self.log("loss/val", loss, prog_bar=True)
73 | self.val_acc(predicted_labels, true_labels)
74 | self.log("acc/val", self.val_acc, prog_bar=True)
75 |
76 | def test_step(self, batch, batch_idx):
77 | loss, true_labels, predicted_labels = self._shared_step(batch)
78 | self.test_acc(predicted_labels, true_labels)
79 | self.log("acc/test", self.test_acc)
80 |
81 | def configure_optimizers(self):
82 | optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate)
83 | return optimizer
84 |
85 |
86 | class MnistDataModule(L.LightningDataModule):
87 | def __init__(self, data_path="./", batch_size=64, num_workers=0):
88 | super().__init__()
89 | self.batch_size = batch_size
90 | self.data_path = data_path
91 | self.num_workers = num_workers
92 |
93 | def prepare_data(self):
94 | datasets.MNIST(root=self.data_path, download=True)
95 | return
96 |
97 | def setup(self, stage=None):
98 | # Note transforms.ToTensor() scales input images
99 | # to 0-1 range
100 | train = datasets.MNIST(
101 | root=self.data_path,
102 | train=True,
103 | transform=transforms.ToTensor(),
104 | download=False,
105 | )
106 |
107 | self.test = datasets.MNIST(
108 | root=self.data_path,
109 | train=False,
110 | transform=transforms.ToTensor(),
111 | download=False,
112 | )
113 |
114 | self.train, self.valid = random_split(train, lengths=[55000, 5000])
115 |
116 | def train_dataloader(self):
117 | train_loader = DataLoader(
118 | dataset=self.train,
119 | batch_size=self.batch_size,
120 | drop_last=True,
121 | shuffle=True,
122 | num_workers=self.num_workers,
123 | )
124 | return train_loader
125 |
126 | def val_dataloader(self):
127 | valid_loader = DataLoader(
128 | dataset=self.valid,
129 | batch_size=self.batch_size,
130 | drop_last=False,
131 | shuffle=False,
132 | num_workers=self.num_workers,
133 | )
134 | return valid_loader
135 |
136 | def test_dataloader(self):
137 | test_loader = DataLoader(
138 | dataset=self.test,
139 | batch_size=self.batch_size,
140 | drop_last=False,
141 | shuffle=False,
142 | num_workers=self.num_workers,
143 | )
144 | return test_loader
145 |
146 |
147 |
--------------------------------------------------------------------------------
/hyperparameter-tuning-methods/03.1-hyperopt-decisiontree-example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "39329df3-1f99-4b11-9405-5969d52368a7",
6 | "metadata": {},
7 | "source": [
8 | "# Deciscion Tree & Hyperopt Example"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "c7d5f0ab-33cd-40f2-82e7-fb2747f04f89",
14 | "metadata": {},
15 | "source": [
16 | "Example showing how to use the Hyperopt library (http://hyperopt.github.io) for Bayesian hyperparameter optimization (via tree of parzen estimator)"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "id": "7f61a90e-a119-4bd0-af21-38604c5b4eec",
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "name": "stdout",
27 | "output_type": "stream",
28 | "text": [
29 | "scikit-learn: 1.0\n",
30 | "hyperopt : 0.2.5\n",
31 | "\n"
32 | ]
33 | }
34 | ],
35 | "source": [
36 | "%load_ext watermark\n",
37 | "%watermark -p scikit-learn,hyperopt"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "id": "1f0489c2-dd9c-4e71-a78c-e01201762b37",
43 | "metadata": {},
44 | "source": [
45 | "## Dataset"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 2,
51 | "id": "271b17ff-5ea4-4161-8b7f-20ba8131d666",
52 | "metadata": {},
53 | "outputs": [
54 | {
55 | "name": "stdout",
56 | "output_type": "stream",
57 | "text": [
58 | "Train/Valid/Test sizes: 398 80 171\n"
59 | ]
60 | }
61 | ],
62 | "source": [
63 | "from sklearn import model_selection\n",
64 | "from sklearn.model_selection import train_test_split\n",
65 | "from sklearn import datasets\n",
66 | "\n",
67 | "\n",
68 | "data = datasets.load_breast_cancer()\n",
69 | "X, y = data.data, data.target\n",
70 | "\n",
71 | "X_train, X_test, y_train, y_test = \\\n",
72 | " train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)\n",
73 | "\n",
74 | "X_train_sub, X_valid, y_train_sub, y_valid = \\\n",
75 | " train_test_split(X_train, y_train, test_size=0.2, random_state=1, stratify=y_train)\n",
76 | "\n",
77 | "print('Train/Valid/Test sizes:', y_train.shape[0], y_valid.shape[0], y_test.shape[0])"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "id": "0affc454-9f07-48e6-bcee-e6253d968247",
83 | "metadata": {},
84 | "source": [
85 | "## Hyperopt"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 3,
91 | "id": "53282fd6-1292-4b4d-a0b7-980707d61c3c",
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "from hyperopt import Trials, STATUS_OK, tpe, hp, fmin\n",
96 | "import hyperopt.pyll.stochastic"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "id": "5435889f-3cd7-45cd-abb2-632e3b034194",
102 | "metadata": {},
103 | "source": [
104 | "Some random sampling examples:"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 4,
110 | "id": "7ca6f8f6-0c78-434a-8121-a83b5708e143",
111 | "metadata": {},
112 | "outputs": [
113 | {
114 | "data": {
115 | "text/plain": [
116 | "1.8925662130833578"
117 | ]
118 | },
119 | "execution_count": 4,
120 | "metadata": {},
121 | "output_type": "execute_result"
122 | }
123 | ],
124 | "source": [
125 | "hyperopt.pyll.stochastic.sample(hp.loguniform('test', 1e-5, 1)) # range e^{low} to e^{high}"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 5,
131 | "id": "b2adc867-2d5a-44bd-8115-195ed53d6a7e",
132 | "metadata": {},
133 | "outputs": [
134 | {
135 | "data": {
136 | "text/plain": [
137 | "1.1"
138 | ]
139 | },
140 | "execution_count": 5,
141 | "metadata": {},
142 | "output_type": "execute_result"
143 | }
144 | ],
145 | "source": [
146 | "hyperopt.pyll.stochastic.sample(hp.qloguniform('test', 1e-5, 1, 0.1)) # rounded to 0.1"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 6,
152 | "id": "9a6bb270-d2a1-4179-a770-39bad5a8332c",
153 | "metadata": {},
154 | "outputs": [],
155 | "source": [
156 | "from sklearn.model_selection import cross_val_score\n",
157 | "from sklearn.tree import DecisionTreeClassifier\n",
158 | "import numpy as np\n",
159 | "\n",
160 | "\n",
161 | "\n",
162 | "params = {\n",
163 | " 'min_samples_split': hp.choice('min_samples_split', np.arange(2, 10)),\n",
164 | " 'min_impurity_decrease': hp.quniform('min_impurity_decrease', 0.0, 0.5, 0.05),\n",
165 | " 'max_depth': hp.choice('max_depth', [6, 16, None])\n",
166 | "}\n",
167 | "\n",
168 | "\n",
169 | "\n",
170 | "def optimization_objective(params):\n",
171 | "\n",
172 | "\n",
173 | " tree = DecisionTreeClassifier(random_state=123, **params)\n",
174 | " tree.fit(X_train, y_train)\n",
175 | " \n",
176 | " accuracies = cross_val_score(\n",
177 | " estimator=tree, X=X_train, y=y_train, cv=10, n_jobs=-1)\n",
178 | "\n",
179 | " score = accuracies.mean()\n",
180 | "\n",
181 | " return {'loss':1-score, 'status': STATUS_OK}"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": 7,
187 | "id": "a51829c6-234f-401f-84ed-a005f71d0150",
188 | "metadata": {},
189 | "outputs": [
190 | {
191 | "name": "stdout",
192 | "output_type": "stream",
193 | "text": [
194 | "100%|████████| 50/50 [00:01<00:00, 32.09trial/s, best loss: 0.06756410256410261]\n"
195 | ]
196 | }
197 | ],
198 | "source": [
199 | "trials = Trials()\n",
200 | "best = fmin(fn=optimization_objective,\n",
201 | " space=params,\n",
202 | " algo=tpe.suggest,\n",
203 | " max_evals=50,\n",
204 | " trials=trials)"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 8,
210 | "id": "2c26399d-ebfc-4b06-86d9-36e49711e908",
211 | "metadata": {},
212 | "outputs": [
213 | {
214 | "data": {
215 | "text/plain": [
216 | "{'max_depth': 2, 'min_impurity_decrease': 0.0, 'min_samples_split': 5}"
217 | ]
218 | },
219 | "execution_count": 8,
220 | "metadata": {},
221 | "output_type": "execute_result"
222 | }
223 | ],
224 | "source": [
225 | "best"
226 | ]
227 | },
228 | {
229 | "cell_type": "markdown",
230 | "id": "42380f27-d982-4ae8-8981-17b7224ebb04",
231 | "metadata": {},
232 | "source": [
233 | "- Attention, `fmin` returns results from `hp.choice` as an index!"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 9,
239 | "id": "83e99f85-9ce2-494e-99ea-20ab49dc0b15",
240 | "metadata": {},
241 | "outputs": [
242 | {
243 | "name": "stdout",
244 | "output_type": "stream",
245 | "text": [
246 | "{'max_depth': None, 'min_impurity_decrease': 0.0, 'min_samples_split': 7}\n"
247 | ]
248 | }
249 | ],
250 | "source": [
251 | "from hyperopt import space_eval\n",
252 | "\n",
253 | "best_params = space_eval(params, best)\n",
254 | "print(best_params)"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": 10,
260 | "id": "fbb610d8-4846-4e9f-a589-adacd0042603",
261 | "metadata": {},
262 | "outputs": [
263 | {
264 | "data": {
265 | "text/plain": [
266 | "DecisionTreeClassifier(min_samples_split=7, random_state=123)"
267 | ]
268 | },
269 | "execution_count": 10,
270 | "metadata": {},
271 | "output_type": "execute_result"
272 | }
273 | ],
274 | "source": [
275 | "tree = DecisionTreeClassifier(random_state=123, **best_params)\n",
276 | "tree.fit(X_train, y_train)"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": 11,
282 | "id": "763e816b-6437-45a9-812f-8b429472d75e",
283 | "metadata": {},
284 | "outputs": [
285 | {
286 | "name": "stdout",
287 | "output_type": "stream",
288 | "text": [
289 | "Training Accuracy: 0.99\n",
290 | "Test Accuracy: 0.94\n"
291 | ]
292 | }
293 | ],
294 | "source": [
295 | "print(f\"Training Accuracy: {tree.score(X_train, y_train):0.2f}\")\n",
296 | "print(f\"Test Accuracy: {tree.score(X_test, y_test):0.2f}\")"
297 | ]
298 | }
299 | ],
300 | "metadata": {
301 | "kernelspec": {
302 | "display_name": "Python 3 (ipykernel)",
303 | "language": "python",
304 | "name": "python3"
305 | },
306 | "language_info": {
307 | "codemirror_mode": {
308 | "name": "ipython",
309 | "version": 3
310 | },
311 | "file_extension": ".py",
312 | "mimetype": "text/x-python",
313 | "name": "python",
314 | "nbconvert_exporter": "python",
315 | "pygments_lexer": "ipython3",
316 | "version": "3.9.6"
317 | }
318 | },
319 | "nbformat": 4,
320 | "nbformat_minor": 5
321 | }
322 |
--------------------------------------------------------------------------------
/hyperparameter-tuning-methods/05.1-successive-halving-decisiontree.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "39329df3-1f99-4b11-9405-5969d52368a7",
6 | "metadata": {},
7 | "source": [
8 | "# Decision Tree & Successive Halving Random + Search Example"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "id": "7f61a90e-a119-4bd0-af21-38604c5b4eec",
15 | "metadata": {},
16 | "outputs": [
17 | {
18 | "name": "stdout",
19 | "output_type": "stream",
20 | "text": [
21 | "scikit-learn: 1.0\n",
22 | "mlxtend : 0.19.0\n",
23 | "\n"
24 | ]
25 | }
26 | ],
27 | "source": [
28 | "%load_ext watermark\n",
29 | "%watermark -p scikit-learn,mlxtend"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "id": "1f0489c2-dd9c-4e71-a78c-e01201762b37",
35 | "metadata": {},
36 | "source": [
37 | "## Dataset"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 2,
43 | "id": "271b17ff-5ea4-4161-8b7f-20ba8131d666",
44 | "metadata": {},
45 | "outputs": [
46 | {
47 | "name": "stdout",
48 | "output_type": "stream",
49 | "text": [
50 | "Train/Valid/Test sizes: 398 80 171\n"
51 | ]
52 | }
53 | ],
54 | "source": [
55 | "from sklearn import model_selection\n",
56 | "from sklearn.model_selection import train_test_split\n",
57 | "from sklearn import datasets\n",
58 | "\n",
59 | "\n",
60 | "data = datasets.load_breast_cancer()\n",
61 | "X, y = data.data, data.target\n",
62 | "\n",
63 | "X_train, X_test, y_train, y_test = \\\n",
64 | " train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)\n",
65 | "\n",
66 | "X_train_sub, X_valid, y_train_sub, y_valid = \\\n",
67 | " train_test_split(X_train, y_train, test_size=0.2, random_state=1, stratify=y_train)\n",
68 | "\n",
69 | "print('Train/Valid/Test sizes:', y_train.shape[0], y_valid.shape[0], y_test.shape[0])"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "id": "0c922b01-86f0-4e83-9e36-446f99f6fe1b",
75 | "metadata": {},
76 | "source": [
77 | "## Successive Halving + Random Search"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "id": "72e56f33-ec33-46dd-afa2-a1b3c8b3da0b",
83 | "metadata": {},
84 | "source": [
85 | "\n",
86 | "- More info: \n",
87 | " - https://scikit-learn.org/stable/modules/grid_search.html#successive-halving-user-guide\n",
88 | " - https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.HalvingRandomSearchCV.html#sklearn.model_selection.HalvingRandomSearchCV"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 3,
94 | "id": "96f0b4c1-803a-436f-93d5-31baab55faa5",
95 | "metadata": {},
96 | "outputs": [
97 | {
98 | "data": {
99 | "text/plain": [
100 | "0.8882539682539681"
101 | ]
102 | },
103 | "execution_count": 3,
104 | "metadata": {},
105 | "output_type": "execute_result"
106 | }
107 | ],
108 | "source": [
109 | "import numpy as np\n",
110 | "import scipy.stats\n",
111 | "\n",
112 | "from sklearn.experimental import enable_halving_search_cv\n",
113 | "from sklearn.model_selection import HalvingRandomSearchCV\n",
114 | "\n",
115 | "from sklearn.tree import DecisionTreeClassifier\n",
116 | "\n",
117 | "\n",
118 | "clf = DecisionTreeClassifier(random_state=123)\n",
119 | "\n",
120 | "params = {\n",
121 | " 'min_samples_split': scipy.stats.randint(2, 12),\n",
122 | " 'min_impurity_decrease': scipy.stats.uniform(0.0, 0.5),\n",
123 | " 'max_depth': [6, 16, None]\n",
124 | "}\n",
125 | "\n",
126 | "\n",
127 | "search = HalvingRandomSearchCV(\n",
128 | " estimator=clf, \n",
129 | " param_distributions=params,\n",
130 | " n_candidates='exhaust',\n",
131 | " resource='n_samples',\n",
132 | " factor=3,\n",
133 | " random_state=123,\n",
134 | " n_jobs=1)\n",
135 | "\n",
136 | "\n",
137 | "search.fit(X_train, y_train)\n",
138 | "\n",
139 | "search.best_score_"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 4,
145 | "id": "2c26399d-ebfc-4b06-86d9-36e49711e908",
146 | "metadata": {},
147 | "outputs": [
148 | {
149 | "data": {
150 | "text/plain": [
151 | "{'max_depth': None,\n",
152 | " 'min_impurity_decrease': 0.029838948304784174,\n",
153 | " 'min_samples_split': 2}"
154 | ]
155 | },
156 | "execution_count": 4,
157 | "metadata": {},
158 | "output_type": "execute_result"
159 | }
160 | ],
161 | "source": [
162 | "search.best_params_"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 5,
168 | "id": "763e816b-6437-45a9-812f-8b429472d75e",
169 | "metadata": {},
170 | "outputs": [
171 | {
172 | "name": "stdout",
173 | "output_type": "stream",
174 | "text": [
175 | "Training Accuracy: 0.95\n",
176 | "Test Accuracy: 0.94\n"
177 | ]
178 | }
179 | ],
180 | "source": [
181 | "print(f\"Training Accuracy: {search.best_estimator_.score(X_train, y_train):0.2f}\")\n",
182 | "print(f\"Test Accuracy: {search.best_estimator_.score(X_test, y_test):0.2f}\")"
183 | ]
184 | }
185 | ],
186 | "metadata": {
187 | "kernelspec": {
188 | "display_name": "Python 3 (ipykernel)",
189 | "language": "python",
190 | "name": "python3"
191 | },
192 | "language_info": {
193 | "codemirror_mode": {
194 | "name": "ipython",
195 | "version": 3
196 | },
197 | "file_extension": ".py",
198 | "mimetype": "text/x-python",
199 | "name": "python",
200 | "nbconvert_exporter": "python",
201 | "pygments_lexer": "ipython3",
202 | "version": "3.9.6"
203 | }
204 | },
205 | "nbformat": 4,
206 | "nbformat_minor": 5
207 | }
208 |
--------------------------------------------------------------------------------
/hyperparameter-tuning-methods/06.1-genetic-opt.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "39329df3-1f99-4b11-9405-5969d52368a7",
6 | "metadata": {},
7 | "source": [
8 | "# Genetic Programming-Based Hyperparameter Optimization of a Decision Tree"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "66496ba0-a0c7-4819-9c6d-13daf80c8c9c",
14 | "metadata": {},
15 | "source": [
16 | "This notebook shows how to use [`sklearn-genetic-opt`](https://sklearn-genetic-opt.readthedocs.io/en/stable/) for hyperparameter optimization based on genetic algorithms (evolutionary programming). If you are interested in understanding how it works, `sklearn-genetic-opt` is using [DEAP](https://deap.readthedocs.io/) under the hood. \n"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "id": "7f61a90e-a119-4bd0-af21-38604c5b4eec",
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "name": "stdout",
27 | "output_type": "stream",
28 | "text": [
29 | "scikit-learn : 1.0\n",
30 | "sklearn : 1.0\n",
31 | "deap : 1.3.1\n",
32 | "sklearn_genetic: 0.7.0\n",
33 | "\n"
34 | ]
35 | }
36 | ],
37 | "source": [
38 | "%load_ext watermark\n",
39 | "%watermark -p scikit-learn,sklearn,deap,sklearn_genetic"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "id": "1f0489c2-dd9c-4e71-a78c-e01201762b37",
45 | "metadata": {},
46 | "source": [
47 | "## Dataset"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 2,
53 | "id": "271b17ff-5ea4-4161-8b7f-20ba8131d666",
54 | "metadata": {},
55 | "outputs": [
56 | {
57 | "name": "stdout",
58 | "output_type": "stream",
59 | "text": [
60 | "Train/Valid/Test sizes: 398 80 171\n"
61 | ]
62 | }
63 | ],
64 | "source": [
65 | "from sklearn import model_selection\n",
66 | "from sklearn.model_selection import train_test_split\n",
67 | "from sklearn import datasets\n",
68 | "\n",
69 | "\n",
70 | "data = datasets.load_breast_cancer()\n",
71 | "X, y = data.data, data.target\n",
72 | "\n",
73 | "X_train, X_test, y_train, y_test = \\\n",
74 | " train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)\n",
75 | "\n",
76 | "X_train_sub, X_valid, y_train_sub, y_valid = \\\n",
77 | " train_test_split(X_train, y_train, test_size=0.2, random_state=1, stratify=y_train)\n",
78 | "\n",
79 | "print('Train/Valid/Test sizes:', y_train.shape[0], y_valid.shape[0], y_test.shape[0])"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "id": "0c922b01-86f0-4e83-9e36-446f99f6fe1b",
85 | "metadata": {},
86 | "source": [
87 | "## sklearn-genetic-opt"
88 | ]
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "id": "72e56f33-ec33-46dd-afa2-a1b3c8b3da0b",
93 | "metadata": {},
94 | "source": [
95 | "- Install: `pip install sklearn-genetic-opt[all]`\n",
96 | "\n",
97 | "- More info: https://sklearn-genetic-opt.readthedocs.io/en/stable/#"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": 3,
103 | "id": "96f0b4c1-803a-436f-93d5-31baab55faa5",
104 | "metadata": {},
105 | "outputs": [
106 | {
107 | "name": "stdout",
108 | "output_type": "stream",
109 | "text": [
110 | "gen\tnevals\tfitness \tfitness_std\tfitness_max\tfitness_min\n",
111 | "0 \t15 \t0.773962\t0.131052 \t0.914778 \t0.628165 \n",
112 | "1 \t28 \t0.888608\t0.0588224 \t0.914778 \t0.673165 \n",
113 | "2 \t29 \t0.911424\t0.00855215 \t0.914778 \t0.88962 \n",
114 | "3 \t28 \t0.914778\t4.44089e-16\t0.914778 \t0.914778 \n",
115 | "4 \t28 \t0.914778\t4.44089e-16\t0.914778 \t0.914778 \n",
116 | "5 \t28 \t0.914778\t4.44089e-16\t0.914778 \t0.914778 \n",
117 | "6 \t29 \t0.914778\t4.44089e-16\t0.914778 \t0.914778 \n",
118 | "7 \t27 \t0.918297\t0.00703797 \t0.932373 \t0.914778 \n",
119 | "8 \t27 \t0.922989\t0.0087779 \t0.932373 \t0.914778 \n",
120 | "9 \t29 \t0.928854\t0.00703797 \t0.932373 \t0.914778 \n",
121 | "10 \t29 \t0.932373\t3.33067e-16\t0.932373 \t0.932373 \n",
122 | "11 \t29 \t0.932373\t3.33067e-16\t0.932373 \t0.932373 \n",
123 | "12 \t29 \t0.932373\t3.33067e-16\t0.932373 \t0.932373 \n",
124 | "13 \t29 \t0.932861\t0.000974684\t0.93481 \t0.932373 \n",
125 | "14 \t29 \t0.933023\t0.00107755 \t0.93481 \t0.932373 \n",
126 | "15 \t28 \t0.93416 \t0.00107755 \t0.93481 \t0.932373 \n",
127 | "16 \t29 \t0.93481 \t3.33067e-16\t0.93481 \t0.93481 \n",
128 | "17 \t29 \t0.93481 \t3.33067e-16\t0.93481 \t0.93481 \n",
129 | "18 \t29 \t0.93481 \t3.33067e-16\t0.93481 \t0.93481 \n",
130 | "19 \t28 \t0.93481 \t3.33067e-16\t0.93481 \t0.93481 \n",
131 | "20 \t29 \t0.93481 \t3.33067e-16\t0.93481 \t0.93481 \n"
132 | ]
133 | },
134 | {
135 | "data": {
136 | "text/plain": [
137 | "0.9348101265822784"
138 | ]
139 | },
140 | "execution_count": 3,
141 | "metadata": {},
142 | "output_type": "execute_result"
143 | }
144 | ],
145 | "source": [
146 | "import numpy as np\n",
147 | "import scipy.stats\n",
148 | "\n",
149 | "from sklearn_genetic import GASearchCV\n",
150 | "from sklearn_genetic.space import Integer, Categorical, Continuous\n",
151 | "from sklearn.tree import DecisionTreeClassifier\n",
152 | "\n",
153 | "\n",
154 | "clf = DecisionTreeClassifier(random_state=123)\n",
155 | "\n",
156 | "params = {\n",
157 | " 'min_samples_split': Integer(2, 12),\n",
158 | " 'min_impurity_decrease': Continuous(0.0, 0.5),\n",
159 | " 'max_depth': Categorical([6, 16, None])\n",
160 | "}\n",
161 | "\n",
162 | "search = GASearchCV(\n",
163 | " estimator=clf,\n",
164 | " cv=5,\n",
165 | " population_size=15,\n",
166 | " generations=20,\n",
167 | " tournament_size=3,\n",
168 | " elitism=True,\n",
169 | " keep_top_k=4,\n",
170 | " crossover_probability=0.9,\n",
171 | " mutation_probability=0.05,\n",
172 | " param_grid=params,\n",
173 | " criteria='max',\n",
174 | " algorithm='eaMuCommaLambda',\n",
175 | " n_jobs=-1)\n",
176 | "\n",
177 | "search.fit(X_train, y_train)\n",
178 | "\n",
179 | "search.best_score_"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 4,
185 | "id": "2c26399d-ebfc-4b06-86d9-36e49711e908",
186 | "metadata": {},
187 | "outputs": [
188 | {
189 | "data": {
190 | "text/plain": [
191 | "{'min_samples_split': 8,\n",
192 | " 'min_impurity_decrease': 0.006258039752250311,\n",
193 | " 'max_depth': 16}"
194 | ]
195 | },
196 | "execution_count": 4,
197 | "metadata": {},
198 | "output_type": "execute_result"
199 | }
200 | ],
201 | "source": [
202 | "search.best_params_"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": 5,
208 | "id": "763e816b-6437-45a9-812f-8b429472d75e",
209 | "metadata": {},
210 | "outputs": [
211 | {
212 | "name": "stdout",
213 | "output_type": "stream",
214 | "text": [
215 | "Training Accuracy: 0.99\n",
216 | "Test Accuracy: 0.94\n"
217 | ]
218 | }
219 | ],
220 | "source": [
221 | "print(f\"Training Accuracy: {search.best_estimator_.score(X_train, y_train):0.2f}\")\n",
222 | "print(f\"Test Accuracy: {search.best_estimator_.score(X_test, y_test):0.2f}\")"
223 | ]
224 | }
225 | ],
226 | "metadata": {
227 | "kernelspec": {
228 | "display_name": "Python 3 (ipykernel)",
229 | "language": "python",
230 | "name": "python3"
231 | },
232 | "language_info": {
233 | "codemirror_mode": {
234 | "name": "ipython",
235 | "version": 3
236 | },
237 | "file_extension": ".py",
238 | "mimetype": "text/x-python",
239 | "name": "python",
240 | "nbconvert_exporter": "python",
241 | "pygments_lexer": "ipython3",
242 | "version": "3.9.6"
243 | }
244 | },
245 | "nbformat": 4,
246 | "nbformat_minor": 5
247 | }
248 |
--------------------------------------------------------------------------------
/hyperparameter-tuning-methods/figures/orion-recommendations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/hyperparameter-tuning-methods/figures/orion-recommendations.png
--------------------------------------------------------------------------------
/hyperparameter-tuning-methods/lightning-hpo-optuna/README.md:
--------------------------------------------------------------------------------
1 | See https://github.com/Lightning-AI/lightning-hpo
2 |
3 |
4 |
5 | Run as
6 |
7 | ```
8 | python -m lightning run app sweeper.py
9 | ```
--------------------------------------------------------------------------------
/hyperparameter-tuning-methods/lightning-hpo-optuna/mlp_cli2.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from lightning.pytorch.callbacks import ModelCheckpoint
4 | from lightning.pytorch.utilities.cli import LightningCLI
5 | from lightning.pytorch.loggers import CSVLogger
6 | from shared_utilities import CustomDataModule, LightningModel2
7 | from watermark import watermark
8 |
9 | if __name__ == "__main__":
10 |
11 | print(watermark(packages="torch,lightning"))
12 |
13 | print(f"The provided arguments are {sys.argv[1:]}")
14 |
15 | cli = LightningCLI(
16 | model_class=LightningModel2,
17 | datamodule_class=CustomDataModule,
18 | run=False,
19 | save_config_overwrite=True,
20 | seed_everything_default=123,
21 | trainer_defaults={
22 | "logger": CSVLogger(save_dir="sweep-logs/", name="my-sweep"),
23 | "callbacks": [ModelCheckpoint(monitor="val_acc")],
24 | },
25 | )
26 |
27 | print(cli.model.hidden_units)
28 |
29 | lightning_model = LightningModel2(
30 | model=None,
31 | hidden_units=cli.model.hidden_units,
32 | learning_rate=cli.model.learning_rate,
33 | )
34 |
35 | cli.trainer.fit(lightning_model, datamodule=cli.datamodule)
36 | #cli.trainer.test(lightning_model, datamodule=cli.datamodule)
37 |
--------------------------------------------------------------------------------
/hyperparameter-tuning-methods/lightning-hpo-optuna/shared_utilities.py:
--------------------------------------------------------------------------------
1 | import lightning as L
2 | import numpy as np
3 | import torch
4 | import torch.nn.functional as F
5 | import torchmetrics
6 | from sklearn.datasets import make_classification
7 | from sklearn.model_selection import train_test_split
8 | from torch.utils.data import DataLoader, Dataset
9 |
10 |
11 | class PyTorchMLP2(torch.nn.Module):
12 | def __init__(self, num_features, hidden_units, num_classes):
13 | super().__init__()
14 |
15 | # Initialize MLP layers
16 | all_layers = []
17 | for hidden_unit in hidden_units:
18 | layer = torch.nn.Linear(num_features, hidden_unit)
19 | all_layers.append(layer)
20 | all_layers.append(torch.nn.ReLU())
21 | num_features = hidden_unit
22 |
23 | output_layer = torch.nn.Linear(
24 | in_features=hidden_units[-1], out_features=num_classes
25 | )
26 |
27 | all_layers.append(output_layer)
28 | self.layers = torch.nn.Sequential(*all_layers)
29 |
30 | def forward(self, x):
31 | x = torch.flatten(x, start_dim=1)
32 | logits = self.layers(x)
33 | return logits
34 |
35 |
36 | class LightningModel2(L.LightningModule):
37 | def __init__(self, model=None, hidden_units=None, learning_rate=None):
38 | super().__init__()
39 |
40 | self.learning_rate = learning_rate
41 | self.hidden_units = hidden_units
42 |
43 | if model is None:
44 | self.model = PyTorchMLP2(
45 | num_features=100, hidden_units=hidden_units, num_classes=2
46 | )
47 |
48 | self.save_hyperparameters(ignore=["model"])
49 |
50 | self.train_acc = torchmetrics.Accuracy()
51 | self.val_acc = torchmetrics.Accuracy()
52 | self.test_acc = torchmetrics.Accuracy()
53 |
54 | def forward(self, x):
55 | return self.model(x)
56 |
57 | def _shared_step(self, batch):
58 | features, true_labels = batch
59 | logits = self(features)
60 |
61 | loss = F.cross_entropy(logits, true_labels)
62 | predicted_labels = torch.argmax(logits, dim=1)
63 | return loss, true_labels, predicted_labels
64 |
65 | def training_step(self, batch, batch_idx):
66 | loss, true_labels, predicted_labels = self._shared_step(batch)
67 |
68 | self.log("train_loss", loss)
69 | self.train_acc(predicted_labels, true_labels)
70 | self.log(
71 | "train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False
72 | )
73 | return loss
74 |
75 | def validation_step(self, batch, batch_idx):
76 | loss, true_labels, predicted_labels = self._shared_step(batch)
77 |
78 | self.log("val_loss", loss, prog_bar=True)
79 | self.val_acc(predicted_labels, true_labels)
80 | self.log("val_acc", self.val_acc, prog_bar=True)
81 |
82 | def test_step(self, batch, batch_idx):
83 | loss, true_labels, predicted_labels = self._shared_step(batch)
84 | self.test_acc(predicted_labels, true_labels)
85 | self.log("test_acc", self.test_acc)
86 |
87 | def configure_optimizers(self):
88 | optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate)
89 | return optimizer
90 |
91 |
92 | class CustomDataset(Dataset):
93 | def __init__(self, feature_array, label_array, transform=None):
94 |
95 | self.x = feature_array
96 | self.y = label_array
97 | self.transform = transform
98 |
99 | def __getitem__(self, index):
100 | x = self.x[index]
101 | y = self.y[index]
102 |
103 | if self.transform is not None:
104 | x = self.transform(x)
105 |
106 | return x, y
107 |
108 | def __len__(self):
109 | return self.y.shape[0]
110 |
111 |
112 | class CustomDataModule(L.LightningDataModule):
113 | def __init__(self, data_dir="./mnist", batch_size=64):
114 | super().__init__()
115 | self.data_dir = data_dir
116 | self.batch_size = batch_size
117 |
118 | def prepare_data(self):
119 | # download
120 | pass
121 |
122 | def setup(self, stage: str):
123 |
124 | X, y = make_classification(
125 | n_samples=20000,
126 | n_features=100,
127 | n_informative=10,
128 | n_redundant=40,
129 | n_repeated=25,
130 | n_clusters_per_class=5,
131 | flip_y=0.05,
132 | class_sep=0.5,
133 | random_state=123,
134 | )
135 |
136 | X_train, X_test, y_train, y_test = train_test_split(
137 | X, y, test_size=0.2, random_state=123
138 | )
139 |
140 | X_train, X_val, y_train, y_val = train_test_split(
141 | X_train, y_train, test_size=0.1, random_state=123
142 | )
143 |
144 | self.train_dataset = CustomDataset(
145 | feature_array=X_train.astype(np.float32),
146 | label_array=y_train.astype(np.int64),
147 | )
148 |
149 | self.val_dataset = CustomDataset(
150 | feature_array=X_val.astype(np.float32), label_array=y_val.astype(np.int64)
151 | )
152 |
153 | self.test_dataset = CustomDataset(
154 | feature_array=X_test.astype(np.float32), label_array=y_test.astype(np.int64)
155 | )
156 |
157 | def train_dataloader(self):
158 | train_loader = DataLoader(
159 | dataset=self.train_dataset,
160 | batch_size=32,
161 | shuffle=True,
162 | drop_last=True,
163 | num_workers=0,
164 | )
165 | return train_loader
166 |
167 | def val_dataloader(self):
168 | val_loader = DataLoader(
169 | dataset=self.val_dataset,
170 | batch_size=32,
171 | shuffle=False,
172 | num_workers=0,
173 | )
174 | return val_loader
175 |
176 | def test_dataloader(self):
177 | test_loader = DataLoader(
178 | dataset=self.test_dataset, batch_size=32, shuffle=False, num_workers=0
179 | )
180 | return test_loader
181 |
--------------------------------------------------------------------------------
/hyperparameter-tuning-methods/lightning-hpo-optuna/sweeper.py:
--------------------------------------------------------------------------------
1 | import os.path as ops
2 |
3 | import optuna
4 | from lightning import LightningApp
5 |
6 | from lightning_hpo import Sweep
7 | from lightning_hpo.algorithm.optuna import OptunaAlgorithm
8 | from lightning_hpo.distributions.distributions import Categorical, IntUniform, LogUniform
9 |
10 | app = LightningApp(
11 | Sweep(
12 | script_path=ops.join(ops.dirname(__file__), "./mlp_cli2.py"),
13 | n_trials=3,
14 | distributions={
15 | "model.learning_rate": LogUniform(0.001, 0.1),
16 | "model.hidden_units": Categorical(["[50, 100]", "[100, 200]"]),
17 | "data.batch_size": Categorical([32, 64]),
18 | "trainer.max_epochs": IntUniform(1, 3),
19 | },
20 | algorithm=OptunaAlgorithm(optuna.create_study(direction="maximize")),
21 | framework="pytorch_lightning",
22 | )
23 | )
--------------------------------------------------------------------------------
/hyperparameter-tuning-methods/sklearn-parameter-sampler.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "928c6039-6f63-4b5a-8821-2df7a647058e",
6 | "metadata": {},
7 | "source": [
8 | "# ParameterSampler for Randomized Search"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "0d153468-cb4f-460e-bda1-291f77ecddea",
14 | "metadata": {},
15 | "source": [
16 | "A little template that can be used to create submit scripts based on Randomized Search for arbitrary projects."
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "id": "854032e0-f6b9-49cd-af3e-a31b4db5705b",
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "name": "stdout",
27 | "output_type": "stream",
28 | "text": [
29 | "my_script.py --param1 2.7858767423914466 --param2 categorical1\n",
30 | "my_script.py --param1 1.7138837047473028 --param2 categorical1\n",
31 | "my_script.py --param1 2.205259076331565 --param2 categorical2\n",
32 | "my_script.py --param1 1.964475733730389 --param2 categorical2\n",
33 | "my_script.py --param1 3.923056793538462 --param2 categorical1\n",
34 | "my_script.py --param1 2.3187771880904404 --param2 categorical2\n",
35 | "my_script.py --param1 1.568470072776602 --param2 categorical1\n",
36 | "my_script.py --param1 2.5092680373504668 --param2 categorical2\n",
37 | "my_script.py --param1 1.7542889787184976 --param2 categorical2\n",
38 | "my_script.py --param1 2.779020708741076 --param2 categorical1\n"
39 | ]
40 | }
41 | ],
42 | "source": [
43 | "from sklearn.model_selection import ParameterSampler\n",
44 | "from scipy.stats import uniform\n",
45 | "\n",
46 | "distributions = dict(param1=uniform(loc=0, scale=4),\n",
47 | " param2=['categorical1', 'categorical2'])\n",
48 | "\n",
49 | "sampler = ParameterSampler(distributions, n_iter=10, random_state=123)\n",
50 | "for param in sampler:\n",
51 | " \n",
52 | " print(\"my_script.py\", end=\"\")\n",
53 | " for k in param:\n",
54 | " print(f' --{k} {param[k]}', end=\"\")\n",
55 | " print()"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": null,
61 | "id": "a57a7747-814c-48bc-b143-9a624ef1715d",
62 | "metadata": {},
63 | "outputs": [],
64 | "source": []
65 | }
66 | ],
67 | "metadata": {
68 | "kernelspec": {
69 | "display_name": "Python 3 (ipykernel)",
70 | "language": "python",
71 | "name": "python3"
72 | },
73 | "language_info": {
74 | "codemirror_mode": {
75 | "name": "ipython",
76 | "version": 3
77 | },
78 | "file_extension": ".py",
79 | "mimetype": "text/x-python",
80 | "name": "python",
81 | "nbconvert_exporter": "python",
82 | "pygments_lexer": "ipython3",
83 | "version": "3.9.7"
84 | }
85 | },
86 | "nbformat": 4,
87 | "nbformat_minor": 5
88 | }
89 |
--------------------------------------------------------------------------------
/learning-rates/scheduler-comparison/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/learning-rates/scheduler-comparison/overview.png
--------------------------------------------------------------------------------
/learning-rates/scheduler-comparison/shared_utilities.py:
--------------------------------------------------------------------------------
1 | import lightning as L
2 | import numpy as np
3 | import torch
4 | import torch.nn.functional as F
5 | import torchmetrics
6 | from sklearn.datasets import make_classification
7 | from sklearn.model_selection import train_test_split
8 | from torch.utils.data import DataLoader, Dataset
9 |
10 |
11 | class CustomDataset(Dataset):
12 | def __init__(self, feature_array, label_array, transform=None):
13 |
14 | self.x = feature_array
15 | self.y = label_array
16 | self.transform = transform
17 |
18 | def __getitem__(self, index):
19 | x = self.x[index]
20 | y = self.y[index]
21 |
22 | if self.transform is not None:
23 | x = self.transform(x)
24 |
25 | return x, y
26 |
27 | def __len__(self):
28 | return self.y.shape[0]
29 |
30 |
31 | class CustomDataModule(L.LightningDataModule):
32 | def __init__(self, data_dir="./mnist", batch_size=64):
33 | super().__init__()
34 | self.data_dir = data_dir
35 | self.batch_size = batch_size
36 |
37 | def prepare_data(self):
38 | # download
39 | pass
40 |
41 | def setup(self, stage: str):
42 |
43 | X, y = make_classification(
44 | n_samples=20000,
45 | n_features=100,
46 | n_informative=10,
47 | n_redundant=40,
48 | n_repeated=25,
49 | n_clusters_per_class=5,
50 | flip_y=0.05,
51 | class_sep=0.5,
52 | random_state=123,
53 | )
54 |
55 | X_train, X_test, y_train, y_test = train_test_split(
56 | X, y, test_size=0.2, random_state=123
57 | )
58 |
59 | X_train, X_val, y_train, y_val = train_test_split(
60 | X_train, y_train, test_size=0.1, random_state=123
61 | )
62 |
63 | self.train_dataset = CustomDataset(
64 | feature_array=X_train.astype(np.float32),
65 | label_array=y_train.astype(np.int64),
66 | )
67 |
68 | self.val_dataset = CustomDataset(
69 | feature_array=X_val.astype(np.float32), label_array=y_val.astype(np.int64)
70 | )
71 |
72 | self.test_dataset = CustomDataset(
73 | feature_array=X_test.astype(np.float32), label_array=y_test.astype(np.int64)
74 | )
75 |
76 | def train_dataloader(self):
77 | train_loader = DataLoader(
78 | dataset=self.train_dataset,
79 | batch_size=32,
80 | shuffle=True,
81 | drop_last=True,
82 | num_workers=0,
83 | )
84 | return train_loader
85 |
86 | def val_dataloader(self):
87 | val_loader = DataLoader(
88 | dataset=self.val_dataset,
89 | batch_size=32,
90 | shuffle=False,
91 | num_workers=0,
92 | )
93 | return val_loader
94 |
95 | def test_dataloader(self):
96 | test_loader = DataLoader(
97 | dataset=self.test_dataset, batch_size=32, shuffle=False, num_workers=0
98 | )
99 | return test_loader
100 |
--------------------------------------------------------------------------------
/losses/pytorch-loss-functions/vgg16-smile-classifier/dataset.py:
--------------------------------------------------------------------------------
1 | from torchvision import datasets
2 | from torchvision import transforms
3 | from torch.utils.data import DataLoader
4 |
5 |
6 | def get_dataloaders_celeba(batch_size, num_workers=0,
7 | train_transforms=None,
8 | test_transforms=None,
9 | download=True):
10 |
11 | if train_transforms is None:
12 | train_transforms = transforms.ToTensor()
13 |
14 | if test_transforms is None:
15 | test_transforms = transforms.ToTensor()
16 |
17 | def get_smile(attr):
18 | return attr[31]
19 |
20 | train_dataset = datasets.CelebA(root='.',
21 | split='train',
22 | transform=train_transforms,
23 | target_type='attr',
24 | target_transform=get_smile,
25 | download=download)
26 |
27 | valid_dataset = datasets.CelebA(root='.',
28 | split='valid',
29 | target_type='attr',
30 | target_transform=get_smile,
31 | transform=test_transforms)
32 |
33 | test_dataset = datasets.CelebA(root='.',
34 | split='test',
35 | target_type='attr',
36 | target_transform=get_smile,
37 | transform=test_transforms)
38 |
39 | train_loader = DataLoader(dataset=train_dataset,
40 | batch_size=batch_size,
41 | num_workers=num_workers,
42 | shuffle=True)
43 |
44 | valid_loader = DataLoader(dataset=valid_dataset,
45 | batch_size=batch_size,
46 | num_workers=num_workers,
47 | shuffle=False)
48 |
49 | test_loader = DataLoader(dataset=test_dataset,
50 | batch_size=batch_size,
51 | num_workers=num_workers,
52 | shuffle=False)
53 |
54 | return train_loader, valid_loader, test_loader
55 |
--------------------------------------------------------------------------------
/losses/pytorch-loss-functions/vgg16-smile-classifier/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import pytorch_lightning as pl
4 | import torchmetrics
5 |
6 |
7 | class PyTorchVGG16Logits(nn.Module):
8 |
9 | def __init__(self, num_outputs):
10 | super().__init__()
11 |
12 |
13 | self.block_1 = nn.Sequential(
14 | nn.Conv2d(in_channels=3,
15 | out_channels=64,
16 | kernel_size=(3, 3),
17 | stride=(1, 1),
18 | # (1(32-1)- 32 + 3)/2 = 1
19 | padding=1),
20 | nn.ReLU(),
21 | nn.Conv2d(in_channels=64,
22 | out_channels=64,
23 | kernel_size=(3, 3),
24 | stride=(1, 1),
25 | padding=1),
26 | nn.ReLU(),
27 | nn.MaxPool2d(kernel_size=(2, 2),
28 | stride=(2, 2))
29 | )
30 |
31 | self.block_2 = nn.Sequential(
32 | nn.Conv2d(in_channels=64,
33 | out_channels=128,
34 | kernel_size=(3, 3),
35 | stride=(1, 1),
36 | padding=1),
37 | nn.ReLU(),
38 | nn.Conv2d(in_channels=128,
39 | out_channels=128,
40 | kernel_size=(3, 3),
41 | stride=(1, 1),
42 | padding=1),
43 | nn.ReLU(),
44 | nn.MaxPool2d(kernel_size=(2, 2),
45 | stride=(2, 2))
46 | )
47 |
48 | self.block_3 = nn.Sequential(
49 | nn.Conv2d(in_channels=128,
50 | out_channels=256,
51 | kernel_size=(3, 3),
52 | stride=(1, 1),
53 | padding=1),
54 | nn.ReLU(),
55 | nn.Conv2d(in_channels=256,
56 | out_channels=256,
57 | kernel_size=(3, 3),
58 | stride=(1, 1),
59 | padding=1),
60 | nn.ReLU(),
61 | nn.Conv2d(in_channels=256,
62 | out_channels=256,
63 | kernel_size=(3, 3),
64 | stride=(1, 1),
65 | padding=1),
66 | nn.ReLU(),
67 | nn.MaxPool2d(kernel_size=(2, 2),
68 | stride=(2, 2))
69 | )
70 |
71 | self.block_4 = nn.Sequential(
72 | nn.Conv2d(in_channels=256,
73 | out_channels=512,
74 | kernel_size=(3, 3),
75 | stride=(1, 1),
76 | padding=1),
77 | nn.ReLU(),
78 | nn.Conv2d(in_channels=512,
79 | out_channels=512,
80 | kernel_size=(3, 3),
81 | stride=(1, 1),
82 | padding=1),
83 | nn.ReLU(),
84 | nn.Conv2d(in_channels=512,
85 | out_channels=512,
86 | kernel_size=(3, 3),
87 | stride=(1, 1),
88 | padding=1),
89 | nn.ReLU(),
90 | nn.MaxPool2d(kernel_size=(2, 2),
91 | stride=(2, 2))
92 | )
93 |
94 | self.block_5 = nn.Sequential(
95 | nn.Conv2d(in_channels=512,
96 | out_channels=512,
97 | kernel_size=(3, 3),
98 | stride=(1, 1),
99 | padding=1),
100 | nn.ReLU(),
101 | nn.Conv2d(in_channels=512,
102 | out_channels=512,
103 | kernel_size=(3, 3),
104 | stride=(1, 1),
105 | padding=1),
106 | nn.ReLU(),
107 | nn.Conv2d(in_channels=512,
108 | out_channels=512,
109 | kernel_size=(3, 3),
110 | stride=(1, 1),
111 | padding=1),
112 | nn.ReLU(),
113 | nn.MaxPool2d(kernel_size=(2, 2),
114 | stride=(2, 2))
115 | )
116 |
117 | self.features = nn.Sequential(
118 | self.block_1, self.block_2,
119 | self.block_3, self.block_4,
120 | self.block_5
121 | )
122 |
123 | self.classifier = nn.Sequential(
124 | nn.Flatten(),
125 | nn.Linear(512*4*4, 4096),
126 | nn.ReLU(True),
127 | nn.Dropout(p=0.5),
128 | nn.Linear(4096, 4096),
129 | nn.ReLU(True),
130 | nn.Dropout(p=0.5),
131 | nn.Linear(4096, num_outputs),
132 | )
133 |
134 | #self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
135 |
136 | for m in self.modules():
137 | if isinstance(m, torch.nn.Conv2d):
138 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
139 | # m.weight.data.normal_(0, np.sqrt(2. / n))
140 | m.weight.detach().normal_(0, 0.05)
141 | if m.bias is not None:
142 | m.bias.detach().zero_()
143 | elif isinstance(m, torch.nn.Linear):
144 | m.weight.detach().normal_(0, 0.05)
145 | m.bias.detach().detach().zero_()
146 |
147 |
148 | def forward(self, x):
149 |
150 | x = self.features(x)
151 | #x = self.avgpool(x)
152 | x = self.classifier(x)
153 | return x
154 |
155 |
156 |
157 | # LightningModule that receives a PyTorch model as input
158 | class LightningModelForBCE(pl.LightningModule):
159 | def __init__(self, model, learning_rate, use_logits, loss_fn):
160 | super().__init__()
161 |
162 | self.loss_fn = loss_fn
163 | self.learning_rate = learning_rate
164 | self.use_logits = use_logits
165 | # The inherited PyTorch module
166 | self.model = model
167 |
168 | # Save settings and hyperparameters to the log directory
169 | # but skip the model parameters
170 | self.save_hyperparameters(ignore=['model'])
171 |
172 | # Set up attributes for computing the accuracy
173 | self.train_acc = torchmetrics.Accuracy()
174 | self.valid_acc = torchmetrics.Accuracy()
175 | self.test_acc = torchmetrics.Accuracy()
176 |
177 | # Defining the forward method is only necessary
178 | # if you want to use a Trainer's .predict() method (optional)
179 | def forward(self, x):
180 | return self.model(x)
181 |
182 | def training_step(self, batch, batch_idx):
183 | features, true_labels = batch
184 | outputs = self(features).flatten()
185 | loss = self.loss_fn(outputs, true_labels.float())
186 | self.log("train_loss", loss)
187 | return loss
188 |
189 | def test_step(self, batch, batch_idx):
190 | features, true_labels = batch
191 | outputs = self(features).flatten()
192 |
193 | if self.use_logits:
194 | predicted_labels = (outputs > 0.0).float()
195 | else:
196 | predicted_labels = (outputs > 0.5).float()
197 |
198 | self.test_acc(predicted_labels, true_labels)
199 | self.log("test_acc", self.test_acc, on_epoch=True, on_step=False)
200 |
201 | def configure_optimizers(self):
202 | optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
203 | return optimizer
--------------------------------------------------------------------------------
/templates/lightning-cli/cli-configurable/README.md:
--------------------------------------------------------------------------------
1 | Check usage via:
2 |
3 | ```
4 | python mlp_cli2.py --help
5 | ```
6 |
7 | ```
8 | :
9 | --model CONFIG Path to a configuration file.
10 | --model.model MODEL (type: Optional[Any], default: null)
11 | --model.learning_rate LEARNING_RATE
12 | (type: Optional[Any], default: null)
13 |
14 | :
15 | --data CONFIG Path to a configuration file.
16 | --data.data_dir DATA_DIR
17 | (type: Any, default: ./mnist)
18 | --data.batch_size BATCH_SIZE
19 | (type: Any, default: 64)
20 | ```
21 |
22 |
23 |
24 | Usage example:
25 |
26 | ```
27 | python mlp_cli2.py --model.learning_rate 0.1 --model.hidden_units "[100, 200]"
28 | ```
29 |
--------------------------------------------------------------------------------
/templates/lightning-cli/cli-configurable/mlp_cli2.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from lightning.pytorch.callbacks import ModelCheckpoint
4 | from lightning.pytorch.utilities.cli import LightningCLI
5 | from shared_utilities import CustomDataModule, LightningModel2
6 | from watermark import watermark
7 |
8 | if __name__ == "__main__":
9 |
10 | print(watermark(packages="torch,lightning"))
11 |
12 | print(f"The provided arguments are {sys.argv[1:]}")
13 |
14 | cli = LightningCLI(
15 | model_class=LightningModel2,
16 | datamodule_class=CustomDataModule,
17 | run=False,
18 | save_config_overwrite=True,
19 | seed_everything_default=123,
20 | trainer_defaults={
21 | "max_epochs": 10,
22 | "callbacks": [ModelCheckpoint(monitor="val_acc")],
23 | },
24 | )
25 |
26 | print(cli.model.hidden_units)
27 |
28 | lightning_model = LightningModel2(
29 | model=None,
30 | hidden_units=cli.model.hidden_units,
31 | learning_rate=cli.model.learning_rate,
32 | )
33 |
34 | cli.trainer.fit(lightning_model, datamodule=cli.datamodule)
35 | cli.trainer.test(lightning_model, datamodule=cli.datamodule)
36 |
--------------------------------------------------------------------------------
/templates/lightning-cli/cli-configurable/shared_utilities.py:
--------------------------------------------------------------------------------
1 | import lightning as L
2 | import numpy as np
3 | import torch
4 | import torch.nn.functional as F
5 | import torchmetrics
6 | from sklearn.datasets import make_classification
7 | from sklearn.model_selection import train_test_split
8 | from torch.utils.data import DataLoader, Dataset
9 |
10 |
11 | class PyTorchMLP2(torch.nn.Module):
12 | def __init__(self, num_features, hidden_units, num_classes):
13 | super().__init__()
14 |
15 | # Initialize MLP layers
16 | all_layers = []
17 | for hidden_unit in hidden_units:
18 | layer = torch.nn.Linear(num_features, hidden_unit)
19 | all_layers.append(layer)
20 | all_layers.append(torch.nn.ReLU())
21 | num_features = hidden_unit
22 |
23 | output_layer = torch.nn.Linear(
24 | in_features=hidden_units[-1], out_features=num_classes
25 | )
26 |
27 | all_layers.append(output_layer)
28 | self.layers = torch.nn.Sequential(*all_layers)
29 |
30 | def forward(self, x):
31 | x = torch.flatten(x, start_dim=1)
32 | logits = self.layers(x)
33 | return logits
34 |
35 |
36 | class LightningModel2(L.LightningModule):
37 | def __init__(self, model=None, hidden_units=None, learning_rate=None):
38 | super().__init__()
39 |
40 | self.learning_rate = learning_rate
41 | self.hidden_units = hidden_units
42 |
43 | if model is None:
44 | self.model = PyTorchMLP2(
45 | num_features=100, hidden_units=hidden_units, num_classes=2
46 | )
47 |
48 | self.save_hyperparameters(ignore=["model"])
49 |
50 | self.train_acc = torchmetrics.Accuracy()
51 | self.val_acc = torchmetrics.Accuracy()
52 | self.test_acc = torchmetrics.Accuracy()
53 |
54 | def forward(self, x):
55 | return self.model(x)
56 |
57 | def _shared_step(self, batch):
58 | features, true_labels = batch
59 | logits = self(features)
60 |
61 | loss = F.cross_entropy(logits, true_labels)
62 | predicted_labels = torch.argmax(logits, dim=1)
63 | return loss, true_labels, predicted_labels
64 |
65 | def training_step(self, batch, batch_idx):
66 | loss, true_labels, predicted_labels = self._shared_step(batch)
67 |
68 | self.log("train_loss", loss)
69 | self.train_acc(predicted_labels, true_labels)
70 | self.log(
71 | "train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False
72 | )
73 | return loss
74 |
75 | def validation_step(self, batch, batch_idx):
76 | loss, true_labels, predicted_labels = self._shared_step(batch)
77 |
78 | self.log("val_loss", loss, prog_bar=True)
79 | self.val_acc(predicted_labels, true_labels)
80 | self.log("val_acc", self.val_acc, prog_bar=True)
81 |
82 | def test_step(self, batch, batch_idx):
83 | loss, true_labels, predicted_labels = self._shared_step(batch)
84 | self.test_acc(predicted_labels, true_labels)
85 | self.log("test_acc", self.test_acc)
86 |
87 | def configure_optimizers(self):
88 | optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate)
89 | return optimizer
90 |
91 |
92 | class CustomDataset(Dataset):
93 | def __init__(self, feature_array, label_array, transform=None):
94 |
95 | self.x = feature_array
96 | self.y = label_array
97 | self.transform = transform
98 |
99 | def __getitem__(self, index):
100 | x = self.x[index]
101 | y = self.y[index]
102 |
103 | if self.transform is not None:
104 | x = self.transform(x)
105 |
106 | return x, y
107 |
108 | def __len__(self):
109 | return self.y.shape[0]
110 |
111 |
112 | class CustomDataModule(L.LightningDataModule):
113 | def __init__(self, data_dir="./mnist", batch_size=64):
114 | super().__init__()
115 | self.data_dir = data_dir
116 | self.batch_size = batch_size
117 |
118 | def prepare_data(self):
119 | # download
120 | pass
121 |
122 | def setup(self, stage: str):
123 |
124 | X, y = make_classification(
125 | n_samples=20000,
126 | n_features=100,
127 | n_informative=10,
128 | n_redundant=40,
129 | n_repeated=25,
130 | n_clusters_per_class=5,
131 | flip_y=0.05,
132 | class_sep=0.5,
133 | random_state=123,
134 | )
135 |
136 | X_train, X_test, y_train, y_test = train_test_split(
137 | X, y, test_size=0.2, random_state=123
138 | )
139 |
140 | X_train, X_val, y_train, y_val = train_test_split(
141 | X_train, y_train, test_size=0.1, random_state=123
142 | )
143 |
144 | self.train_dataset = CustomDataset(
145 | feature_array=X_train.astype(np.float32),
146 | label_array=y_train.astype(np.int64),
147 | )
148 |
149 | self.val_dataset = CustomDataset(
150 | feature_array=X_val.astype(np.float32), label_array=y_val.astype(np.int64)
151 | )
152 |
153 | self.test_dataset = CustomDataset(
154 | feature_array=X_test.astype(np.float32), label_array=y_test.astype(np.int64)
155 | )
156 |
157 | def train_dataloader(self):
158 | train_loader = DataLoader(
159 | dataset=self.train_dataset,
160 | batch_size=32,
161 | shuffle=True,
162 | drop_last=True,
163 | num_workers=0,
164 | )
165 | return train_loader
166 |
167 | def val_dataloader(self):
168 | val_loader = DataLoader(
169 | dataset=self.val_dataset,
170 | batch_size=32,
171 | shuffle=False,
172 | num_workers=0,
173 | )
174 | return val_loader
175 |
176 | def test_dataloader(self):
177 | test_loader = DataLoader(
178 | dataset=self.test_dataset, batch_size=32, shuffle=False, num_workers=0
179 | )
180 | return test_loader
181 |
--------------------------------------------------------------------------------
/templates/lightning-cli/cli-simple/README.md:
--------------------------------------------------------------------------------
1 | Check usage via:
2 |
3 | ```
4 | python mlp_cli.py --help
5 | ```
6 |
7 |
8 |
9 |
10 |
11 | ```
12 | :
13 | --model CONFIG Path to a configuration file.
14 | --model.model MODEL (type: Optional[Any], default: null)
15 | --model.learning_rate LEARNING_RATE
16 | (type: Optional[Any], default: null)
17 |
18 | :
19 | --data CONFIG Path to a configuration file.
20 | --data.data_dir DATA_DIR
21 | (type: Any, default: ./mnist)
22 | --data.batch_size BATCH_SIZE
23 | (type: Any, default: 64)
24 | ```
25 |
26 |
27 |
28 | Usage example:
29 |
30 | ```
31 | python mlp_cli.py --model.learning_rate 0.1
32 | ```
33 |
--------------------------------------------------------------------------------
/templates/lightning-cli/cli-simple/mlp_cli.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from lightning.pytorch.callbacks import ModelCheckpoint
4 | from lightning.pytorch.utilities.cli import LightningCLI
5 | from shared_utilities import CustomDataModule, LightningModel, PyTorchMLP
6 | from watermark import watermark
7 |
8 | if __name__ == "__main__":
9 |
10 | print(watermark(packages="torch,lightning"))
11 |
12 | print(f"The provided arguments are {sys.argv[1:]}")
13 |
14 | cli = LightningCLI(
15 | model_class=LightningModel,
16 | datamodule_class=CustomDataModule,
17 | run=False,
18 | save_config_overwrite=True,
19 | seed_everything_default=123,
20 | trainer_defaults={
21 | "max_epochs": 10,
22 | "callbacks": [ModelCheckpoint(monitor="val_acc")],
23 | },
24 | )
25 |
26 | pytorch_model = PyTorchMLP(num_features=100, num_classes=2)
27 | lightning_model = LightningModel(
28 | model=pytorch_model, learning_rate=cli.model.learning_rate
29 | )
30 |
31 | cli.trainer.fit(lightning_model, datamodule=cli.datamodule)
32 | cli.trainer.test(lightning_model, datamodule=cli.datamodule)
33 |
--------------------------------------------------------------------------------
/templates/lightning-cli/cli-simple/shared_utilities.py:
--------------------------------------------------------------------------------
1 | import lightning as L
2 | import numpy as np
3 | import torch
4 | import torch.nn.functional as F
5 | import torchmetrics
6 | from sklearn.datasets import make_classification
7 | from sklearn.model_selection import train_test_split
8 | from torch.utils.data import DataLoader, Dataset
9 |
10 |
11 | class PyTorchMLP(torch.nn.Module):
12 | def __init__(self, num_features, num_classes):
13 | super().__init__()
14 |
15 | self.all_layers = torch.nn.Sequential(
16 | # 1st hidden layer
17 | torch.nn.Linear(num_features, 100),
18 | torch.nn.ReLU(),
19 | # 2nd hidden layer
20 | torch.nn.Linear(100, 50),
21 | torch.nn.ReLU(),
22 | # output layer
23 | torch.nn.Linear(50, num_classes),
24 | )
25 |
26 | def forward(self, x):
27 | x = torch.flatten(x, start_dim=1)
28 | logits = self.all_layers(x)
29 | return logits
30 |
31 |
32 | class LightningModel(L.LightningModule):
33 | def __init__(self, model=None, learning_rate=None):
34 | super().__init__()
35 |
36 | self.learning_rate = learning_rate
37 | self.model = model
38 |
39 | self.save_hyperparameters(ignore=["model"])
40 |
41 | self.train_acc = torchmetrics.Accuracy()
42 | self.val_acc = torchmetrics.Accuracy()
43 | self.test_acc = torchmetrics.Accuracy()
44 |
45 | def forward(self, x):
46 | return self.model(x)
47 |
48 | def _shared_step(self, batch):
49 | features, true_labels = batch
50 | logits = self(features)
51 |
52 | loss = F.cross_entropy(logits, true_labels)
53 | predicted_labels = torch.argmax(logits, dim=1)
54 | return loss, true_labels, predicted_labels
55 |
56 | def training_step(self, batch, batch_idx):
57 | loss, true_labels, predicted_labels = self._shared_step(batch)
58 |
59 | self.log("train_loss", loss)
60 | self.train_acc(predicted_labels, true_labels)
61 | self.log(
62 | "train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False
63 | )
64 | return loss
65 |
66 | def validation_step(self, batch, batch_idx):
67 | loss, true_labels, predicted_labels = self._shared_step(batch)
68 |
69 | self.log("val_loss", loss, prog_bar=True)
70 | self.val_acc(predicted_labels, true_labels)
71 | self.log("val_acc", self.val_acc, prog_bar=True)
72 |
73 | def test_step(self, batch, batch_idx):
74 | loss, true_labels, predicted_labels = self._shared_step(batch)
75 | self.test_acc(predicted_labels, true_labels)
76 | self.log("test_acc", self.test_acc)
77 |
78 | def configure_optimizers(self):
79 | optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate)
80 | return optimizer
81 |
82 |
83 | class CustomDataset(Dataset):
84 | def __init__(self, feature_array, label_array, transform=None):
85 |
86 | self.x = feature_array
87 | self.y = label_array
88 | self.transform = transform
89 |
90 | def __getitem__(self, index):
91 | x = self.x[index]
92 | y = self.y[index]
93 |
94 | if self.transform is not None:
95 | x = self.transform(x)
96 |
97 | return x, y
98 |
99 | def __len__(self):
100 | return self.y.shape[0]
101 |
102 |
103 | class CustomDataModule(L.LightningDataModule):
104 | def __init__(self, data_dir="./mnist", batch_size=64):
105 | super().__init__()
106 | self.data_dir = data_dir
107 | self.batch_size = batch_size
108 |
109 | def prepare_data(self):
110 | # download
111 | pass
112 |
113 | def setup(self, stage: str):
114 |
115 | X, y = make_classification(
116 | n_samples=20000,
117 | n_features=100,
118 | n_informative=10,
119 | n_redundant=40,
120 | n_repeated=25,
121 | n_clusters_per_class=5,
122 | flip_y=0.05,
123 | class_sep=0.5,
124 | random_state=123,
125 | )
126 |
127 | X_train, X_test, y_train, y_test = train_test_split(
128 | X, y, test_size=0.2, random_state=123
129 | )
130 |
131 | X_train, X_val, y_train, y_val = train_test_split(
132 | X_train, y_train, test_size=0.1, random_state=123
133 | )
134 |
135 | self.train_dataset = CustomDataset(
136 | feature_array=X_train.astype(np.float32),
137 | label_array=y_train.astype(np.int64),
138 | )
139 |
140 | self.val_dataset = CustomDataset(
141 | feature_array=X_val.astype(np.float32), label_array=y_val.astype(np.int64)
142 | )
143 |
144 | self.test_dataset = CustomDataset(
145 | feature_array=X_test.astype(np.float32), label_array=y_test.astype(np.int64)
146 | )
147 |
148 | def train_dataloader(self):
149 | train_loader = DataLoader(
150 | dataset=self.train_dataset,
151 | batch_size=32,
152 | shuffle=True,
153 | drop_last=True,
154 | num_workers=0,
155 | )
156 | return train_loader
157 |
158 | def val_dataloader(self):
159 | val_loader = DataLoader(
160 | dataset=self.val_dataset,
161 | batch_size=32,
162 | shuffle=False,
163 | num_workers=0,
164 | )
165 | return val_loader
166 |
167 | def test_dataloader(self):
168 | test_loader = DataLoader(
169 | dataset=self.test_dataset, batch_size=32, shuffle=False, num_workers=0
170 | )
171 | return test_loader
172 |
--------------------------------------------------------------------------------
/templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/checkpoints/epoch=8-step=4050.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/checkpoints/epoch=8-step=4050.ckpt
--------------------------------------------------------------------------------
/templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/checkpoints/last.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/checkpoints/last.ckpt
--------------------------------------------------------------------------------
/templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/hparams.yaml:
--------------------------------------------------------------------------------
1 | {}
2 |
--------------------------------------------------------------------------------
/templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/metrics.csv:
--------------------------------------------------------------------------------
1 | train_loss,epoch,step,val_loss,val_acc,train_acc,test_acc
2 | 0.7030411958694458,0,49,,,,
3 | 0.5976124405860901,0,99,,,,
4 | 0.561132550239563,0,149,,,,
5 | 0.5131489038467407,0,199,,,,
6 | 0.3952806293964386,0,249,,,,
7 | 0.5731342434883118,0,299,,,,
8 | 0.6261370182037354,0,349,,,,
9 | 0.4576362371444702,0,399,,,,
10 | 0.4602738320827484,0,449,,,,
11 | ,0,449,0.5030142068862915,0.7637500166893005,,
12 | ,0,449,,,0.7206944227218628,
13 | 0.5635536313056946,1,499,,,,
14 | 0.40492358803749084,1,549,,,,
15 | 0.6099315285682678,1,599,,,,
16 | 0.25897836685180664,1,649,,,,
17 | 0.4928973317146301,1,699,,,,
18 | 0.4685414731502533,1,749,,,,
19 | 0.2871425449848175,1,799,,,,
20 | 0.7240744233131409,1,849,,,,
21 | 0.6184459924697876,1,899,,,,
22 | ,1,899,0.4767351448535919,0.7762500047683716,,
23 | ,1,899,,,0.7831249833106995,
24 | 0.28222334384918213,2,949,,,,
25 | 0.46554091572761536,2,999,,,,
26 | 0.2788914144039154,2,1049,,,,
27 | 0.5021936297416687,2,1099,,,,
28 | 0.39235246181488037,2,1149,,,,
29 | 0.4701291620731354,2,1199,,,,
30 | 0.6642038226127625,2,1249,,,,
31 | 0.3132133483886719,2,1299,,,,
32 | 0.4558600187301636,2,1349,,,,
33 | ,2,1349,0.4215010106563568,0.8218749761581421,,
34 | ,2,1349,,,0.8137500286102295,
35 | 0.5070080757141113,3,1399,,,,
36 | 0.3764300048351288,3,1449,,,,
37 | 0.41579946875572205,3,1499,,,,
38 | 0.4973910450935364,3,1549,,,,
39 | 0.38687312602996826,3,1599,,,,
40 | 0.38586685061454773,3,1649,,,,
41 | 0.3792935609817505,3,1699,,,,
42 | 0.2482021301984787,3,1749,,,,
43 | 0.3135770857334137,3,1799,,,,
44 | ,3,1799,0.43951261043548584,0.8118749856948853,,
45 | ,3,1799,,,0.8353472352027893,
46 | 0.31604233384132385,4,1849,,,,
47 | 0.2924385368824005,4,1899,,,,
48 | 0.5909687280654907,4,1949,,,,
49 | 0.43162015080451965,4,1999,,,,
50 | 0.1551673710346222,4,2049,,,,
51 | 0.294137567281723,4,2099,,,,
52 | 0.27724581956863403,4,2149,,,,
53 | 0.30173832178115845,4,2199,,,,
54 | 0.3373233675956726,4,2249,,,,
55 | ,4,2249,0.3982531428337097,0.8399999737739563,,
56 | ,4,2249,,,0.8496527671813965,
57 | 0.37676140666007996,5,2299,,,,
58 | 0.24763153493404388,5,2349,,,,
59 | 0.3966788947582245,5,2399,,,,
60 | 0.31472867727279663,5,2449,,,,
61 | 0.43135133385658264,5,2499,,,,
62 | 0.20865577459335327,5,2549,,,,
63 | 0.4858931601047516,5,2599,,,,
64 | 0.34753215312957764,5,2649,,,,
65 | 0.31440043449401855,5,2699,,,,
66 | ,5,2699,0.3687181770801544,0.8531249761581421,,
67 | ,5,2699,,,0.8579166531562805,
68 | 0.38662850856781006,6,2749,,,,
69 | 0.3259159028530121,6,2799,,,,
70 | 0.46409744024276733,6,2849,,,,
71 | 0.39982515573501587,6,2899,,,,
72 | 0.12523581087589264,6,2949,,,,
73 | 0.2844661772251129,6,2999,,,,
74 | 0.22201985120773315,6,3049,,,,
75 | 0.18588170409202576,6,3099,,,,
76 | 0.3063857853412628,6,3149,,,,
77 | ,6,3149,0.3941511809825897,0.8487499952316284,,
78 | ,6,3149,,,0.867638885974884,
79 | 0.20423810184001923,7,3199,,,,
80 | 0.33459576964378357,7,3249,,,,
81 | 0.23248085379600525,7,3299,,,,
82 | 0.20521828532218933,7,3349,,,,
83 | 0.35759085416793823,7,3399,,,,
84 | 0.15906117856502533,7,3449,,,,
85 | 0.32100117206573486,7,3499,,,,
86 | 0.2697495222091675,7,3549,,,,
87 | 0.22270238399505615,7,3599,,,,
88 | ,7,3599,0.3673551082611084,0.862500011920929,,
89 | ,7,3599,,,0.8736805319786072,
90 | 0.25052112340927124,8,3649,,,,
91 | 0.31102049350738525,8,3699,,,,
92 | 0.29404163360595703,8,3749,,,,
93 | 0.15693902969360352,8,3799,,,,
94 | 0.23918089270591736,8,3849,,,,
95 | 0.15603046119213104,8,3899,,,,
96 | 0.15565559267997742,8,3949,,,,
97 | 0.3508184850215912,8,3999,,,,
98 | 0.20335736870765686,8,4049,,,,
99 | ,8,4049,0.370413213968277,0.8756250143051147,,
100 | ,8,4049,,,0.8795138597488403,
101 | 0.22157014906406403,9,4099,,,,
102 | 0.18203894793987274,9,4149,,,,
103 | 0.21458816528320312,9,4199,,,,
104 | 0.18833862245082855,9,4249,,,,
105 | 0.290866881608963,9,4299,,,,
106 | 0.2798851728439331,9,4349,,,,
107 | 0.2465393990278244,9,4399,,,,
108 | 0.2927503287792206,9,4449,,,,
109 | 0.19159245491027832,9,4499,,,,
110 | ,9,4499,0.38742703199386597,0.8575000166893005,,
111 | ,9,4499,,,0.8877778053283691,
112 | ,10,4500,,,,0.8557500243186951
113 | ,10,4500,,,,0.8622499704360962
114 | ,10,4500,,,,0.8557500243186951
115 |
--------------------------------------------------------------------------------
/templates/modern-early-stop-with-checkpointing/shared_utilities.py:
--------------------------------------------------------------------------------
1 | import lightning as L
2 | import numpy as np
3 | import torch
4 | import torch.nn.functional as F
5 | import torchmetrics
6 | from sklearn.datasets import make_classification
7 | from sklearn.model_selection import train_test_split
8 | from torch.utils.data import DataLoader, Dataset
9 |
10 |
11 | class LightningModel(L.LightningModule):
12 | def __init__(self, model, learning_rate):
13 | super().__init__()
14 |
15 | self.learning_rate = learning_rate
16 | self.model = model
17 |
18 | self.train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2)
19 | self.val_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2)
20 | self.test_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2)
21 |
22 | def forward(self, x):
23 | return self.model(x)
24 |
25 | def _shared_step(self, batch):
26 | features, true_labels = batch
27 | logits = self(features)
28 |
29 | loss = F.cross_entropy(logits, true_labels)
30 | predicted_labels = torch.argmax(logits, dim=1)
31 | return loss, true_labels, predicted_labels
32 |
33 | def training_step(self, batch, batch_idx):
34 | loss, true_labels, predicted_labels = self._shared_step(batch)
35 |
36 | self.log("train_loss", loss)
37 | self.train_acc(predicted_labels, true_labels)
38 | self.log(
39 | "train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False
40 | )
41 | return loss
42 |
43 | def validation_step(self, batch, batch_idx):
44 | loss, true_labels, predicted_labels = self._shared_step(batch)
45 |
46 | self.log("val_loss", loss, prog_bar=True)
47 | self.val_acc(predicted_labels, true_labels)
48 | self.log("val_acc", self.val_acc, prog_bar=True)
49 |
50 | def test_step(self, batch, batch_idx):
51 | loss, true_labels, predicted_labels = self._shared_step(batch)
52 | self.test_acc(predicted_labels, true_labels)
53 | self.log("test_acc", self.test_acc)
54 |
55 | def configure_optimizers(self):
56 | optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate)
57 | return optimizer
58 |
59 |
60 | class CustomDataset(Dataset):
61 | def __init__(self, feature_array, label_array, transform=None):
62 |
63 | self.x = feature_array
64 | self.y = label_array
65 | self.transform = transform
66 |
67 | def __getitem__(self, index):
68 | x = self.x[index]
69 | y = self.y[index]
70 |
71 | if self.transform is not None:
72 | x = self.transform(x)
73 |
74 | return x, y
75 |
76 | def __len__(self):
77 | return self.y.shape[0]
78 |
79 |
80 | class CustomDataModule(L.LightningDataModule):
81 | def __init__(self, data_dir="./mnist", batch_size=64):
82 | super().__init__()
83 | self.data_dir = data_dir
84 | self.batch_size = batch_size
85 |
86 | def prepare_data(self):
87 | # download
88 | pass
89 |
90 | def setup(self, stage: str):
91 |
92 | X, y = make_classification(
93 | n_samples=20000,
94 | n_features=100,
95 | n_informative=10,
96 | n_redundant=40,
97 | n_repeated=25,
98 | n_clusters_per_class=5,
99 | flip_y=0.05,
100 | class_sep=0.5,
101 | random_state=123,
102 | )
103 |
104 | X_train, X_test, y_train, y_test = train_test_split(
105 | X, y, test_size=0.2, random_state=123
106 | )
107 |
108 | X_train, X_val, y_train, y_val = train_test_split(
109 | X_train, y_train, test_size=0.1, random_state=123
110 | )
111 |
112 | self.train_dataset = CustomDataset(
113 | feature_array=X_train.astype(np.float32),
114 | label_array=y_train.astype(np.int64),
115 | )
116 |
117 | self.val_dataset = CustomDataset(
118 | feature_array=X_val.astype(np.float32), label_array=y_val.astype(np.int64)
119 | )
120 |
121 | self.test_dataset = CustomDataset(
122 | feature_array=X_test.astype(np.float32), label_array=y_test.astype(np.int64)
123 | )
124 |
125 | def train_dataloader(self):
126 | train_loader = DataLoader(
127 | dataset=self.train_dataset,
128 | batch_size=32,
129 | shuffle=True,
130 | drop_last=True,
131 | num_workers=0,
132 | )
133 | return train_loader
134 |
135 | def val_dataloader(self):
136 | val_loader = DataLoader(
137 | dataset=self.val_dataset,
138 | batch_size=32,
139 | shuffle=False,
140 | num_workers=0,
141 | )
142 | return val_loader
143 |
144 | def test_dataloader(self):
145 | test_loader = DataLoader(
146 | dataset=self.test_dataset, batch_size=32, shuffle=False, num_workers=0
147 | )
148 | return test_loader
149 |
--------------------------------------------------------------------------------
/templates/pl_classifier/README.md:
--------------------------------------------------------------------------------
1 | # Classifier Project Template
2 |
3 |
4 |
5 | This is a classifier template code for re-use. In this specific instance, it's MobileNet v3 (large) on CIFAR-10 (rescaled to ImageNet size, 224x224).
6 |
7 |
8 |
9 | I recommend setting up this project as follows:
10 |
11 |
12 |
13 | ## 1 - Set up a fresh environment
14 |
15 | ```bash
16 | conda create -n clf-template python=3.8
17 | conda activate clf-template
18 | ```
19 |
20 |
21 |
22 | ## 2 - Install project requirements
23 |
24 |
25 | ```bash
26 | pip install -r requirements.txt
27 | ```
28 |
29 |
30 |
31 | ## 3 - Install utility code as a Python package
32 |
33 | This is optional and only required if you want to run the code outside this reposistory.
34 |
35 | Assuming you are inside this folder, run
36 |
37 | ```bash
38 | pip install -e .
39 | ```
40 |
41 |
42 |
43 | ## 4 - Inspect the Dataset
44 |
45 |
46 |
47 | Run the notebook [./notebooks/4_inspecting-the-dataset.ipynb](./notebooks/4_inspecting-the-dataset.ipynb).
48 |
49 |
50 |
51 | ## 5 - Run the Main Training Script
52 |
53 |
54 | Run the [main.py](main.py) code as follows, e.g., on a server:
55 |
56 | ```bash
57 | python main.py --output_path my-results \
58 | --mixed_precision true \
59 | --num_epochs 10 \
60 | --batch_size 128 \
61 | --learning_rate 0.0005 \
62 | --num_epochs 10 \
63 | --accelerator gpu \
64 | --num_devices 4 \
65 | --strategy ddp_spawn
66 | --log_accuracy true \
67 | ```
68 |
69 | - Run this script with different hyperparameter settings.
70 | - You can change `--num_devices` to `"auto"` to utilize all GPUs on the given machine.
71 |
72 |
73 |
74 |
75 |
76 |
77 | ## 6 - Inspect the results
78 |
79 | Run the notebook [./notebooks/6_inspecting-the-dataset.ipynb](./notebooks/6_evaluating-the-results.ipynb).
80 |
81 |
82 |
83 | ## 7 - Iterate
84 |
85 | - Repeat steps 4-7 with modified datasets, models, and so forth.
86 |
87 |
88 |
89 | ## 8 - Use the Final Model
90 |
91 | - See the [Inference in Production](https://pytorch-lightning.readthedocs.io/en/stable/common/production_inference.html) docs for your use case.
92 |
93 |
94 |
95 |
--------------------------------------------------------------------------------
/templates/pl_classifier/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import time
3 |
4 | import lightning as L # pip install lightning
5 | import torch
6 | from lightning.pytorch.callbacks import ModelCheckpoint
7 | from lightning.pytorch.loggers import CSVLogger
8 | from torchvision import transforms
9 | from watermark import watermark # pip install watermark
10 |
11 | from my_classifier_template.dataset import Cifar10DataModule
12 | from my_classifier_template.model import LightningClassifier
13 |
14 |
15 | def parse_cmdline_args(parser=None):
16 |
17 | if parser is None:
18 | parser = argparse.ArgumentParser()
19 |
20 | parser.add_argument("--accelerator", type=str, default="auto")
21 |
22 | parser.add_argument("--batch_size", type=int, default=32)
23 |
24 | parser.add_argument("--data_path", type=str, default="./data")
25 |
26 | parser.add_argument("--learning_rate", type=float, default=0.0005)
27 |
28 | parser.add_argument(
29 | "--log_accuracy", type=str, choices=("true", "false"), default="true"
30 | )
31 |
32 | parser.add_argument(
33 | "--mixed_precision", type=str, choices=("true", "false"), default="true"
34 | )
35 |
36 | parser.add_argument("--num_epochs", type=int, default=10)
37 |
38 | parser.add_argument("--num_workers", type=int, default=3)
39 |
40 | parser.add_argument("--output_path", type=str, required=True)
41 |
42 | parser.add_argument(
43 | "--pretrained", type=str, choices=("true", "false"), default="false"
44 | )
45 |
46 | parser.add_argument("--num_devices", nargs="+", default="auto")
47 |
48 | parser.add_argument("--device_numbers", type=str, default="")
49 |
50 | parser.add_argument("--random_seed", type=int, default=-1)
51 |
52 | parser.add_argument("--strategy", type=str, default="")
53 |
54 | parser.set_defaults(feature=True)
55 | args = parser.parse_args()
56 |
57 | if not args.strategy:
58 | args.strategy = None
59 |
60 | if args.num_devices != "auto":
61 | args.devices = int(args.num_devices[0])
62 | if args.device_numbers:
63 | args.devices = [int(i) for i in args.device_numbers.split(",")]
64 |
65 | d = {"true": True, "false": False}
66 |
67 | args.log_accuracy = d[args.log_accuracy]
68 | args.pretrained = d[args.pretrained]
69 | args.mixed_precision = d[args.mixed_precision]
70 | if args.mixed_precision:
71 | args.mixed_precision = 16
72 | else:
73 | args.mixed_precision = 32
74 |
75 | return args
76 |
77 |
78 | if __name__ == "__main__":
79 |
80 | print(watermark())
81 | print(watermark(packages="torch,pytorch_lightning"))
82 |
83 | parser = argparse.ArgumentParser()
84 | args = parse_cmdline_args(parser)
85 |
86 | torch.manual_seed(args.random_seed)
87 |
88 | custom_train_transform = transforms.Compose(
89 | [
90 | transforms.Resize((256, 256)),
91 | transforms.RandomCrop((224, 224)),
92 | transforms.ToTensor(),
93 | ]
94 | )
95 |
96 | custom_test_transform = transforms.Compose(
97 | [
98 | transforms.Resize((256, 256)),
99 | transforms.CenterCrop((224, 224)),
100 | transforms.ToTensor(),
101 | ]
102 | )
103 |
104 | data_module = Cifar10DataModule(
105 | batch_size=args.batch_size,
106 | data_path=args.data_path,
107 | num_workers=args.num_workers,
108 | train_transform=custom_train_transform,
109 | test_transform=custom_test_transform,
110 | )
111 |
112 | pytorch_model = torch.hub.load(
113 | "pytorch/vision:v0.11.0", "mobilenet_v3_large", pretrained=args.pretrained
114 | )
115 |
116 | pytorch_model.classifier[-1] = torch.nn.Linear(
117 | in_features=1280, out_features=10 # as in original
118 | ) # number of class labels in Cifar-10)
119 |
120 | lightning_model = LightningClassifier(
121 | pytorch_model, learning_rate=args.learning_rate, log_accuracy=args.log_accuracy
122 | )
123 |
124 | if args.log_accuracy:
125 | callbacks = [
126 | ModelCheckpoint(
127 | save_top_k=1, mode="max", monitor="valid_acc"
128 | ) # save top 1 model
129 | ]
130 | else:
131 | callbacks = [
132 | ModelCheckpoint(
133 | save_top_k=1, mode="min", monitor="valid_loss"
134 | ) # save top 1 model
135 | ]
136 |
137 | logger = CSVLogger(save_dir=args.output_path, name="my-model")
138 |
139 | trainer = L.Trainer(
140 | max_epochs=args.num_epochs,
141 | callbacks=callbacks,
142 | accelerator=args.accelerator,
143 | devices=args.devices,
144 | logger=logger,
145 | strategy=args.strategy,
146 | precision=args.mixed_precision,
147 | deterministic=False,
148 | log_every_n_steps=10,
149 | )
150 |
151 | start_time = time.time()
152 | trainer.fit(model=lightning_model, datamodule=data_module)
153 |
154 | train_time = time.time()
155 | runtime = (train_time - start_time) / 60
156 | print(f"Training took {runtime:.2f} min.")
157 |
158 | # setup data on host machine
159 | data_module.prepare_data()
160 | data_module.setup()
161 |
162 | before = time.time()
163 | val_acc = trainer.test(dataloaders=data_module.val_dataloader())
164 | runtime = (time.time() - before) / 60
165 | print(f"Inference on the validation set took {runtime:.2f} min.")
166 |
167 | runtime = (time.time() - start_time) / 60
168 | print(f"The total runtime was {runtime:.2f} min.")
169 |
170 | print("Validation accuracy:", val_acc)
171 |
172 | print("Trainer log dir:", trainer.logger.log_dir)
173 |
174 | path = trainer.checkpoint_callback.best_model_path
175 | print("Best model path:", path)
176 |
--------------------------------------------------------------------------------
/templates/pl_classifier/my_classifier_template/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/templates/pl_classifier/my_classifier_template/__init__.py
--------------------------------------------------------------------------------
/templates/pl_classifier/my_classifier_template/dataset.py:
--------------------------------------------------------------------------------
1 | import pytorch_lightning as pl
2 | from torch.utils.data import DataLoader
3 | from torch.utils.data.dataset import random_split
4 | from torchvision import datasets, transforms
5 |
6 |
7 | class Cifar10DataModule(pl.LightningDataModule):
8 | def __init__(
9 | self,
10 | batch_size,
11 | train_transform=None,
12 | test_transform=None,
13 | num_workers=4,
14 | data_path="./",
15 | ):
16 | super().__init__()
17 | self.data_path = data_path
18 | self.batch_size = batch_size
19 | self.num_workers = num_workers
20 | self.custom_train_transform = train_transform
21 | self.custom_test_transform = test_transform
22 |
23 | def prepare_data(self):
24 | datasets.CIFAR10(root=self.data_path, download=True)
25 | return
26 |
27 | def setup(self, stage=None):
28 |
29 | if self.custom_train_transform is None:
30 | self.train_transform = transforms.Compose(
31 | [
32 | transforms.Resize((70, 70)),
33 | transforms.RandomCrop((64, 64)),
34 | transforms.ToTensor(),
35 | ]
36 | )
37 | else:
38 | self.train_transform = self.custom_train_transform
39 |
40 | if self.custom_train_transform is None:
41 | self.test_transform = transforms.Compose(
42 | [
43 | transforms.Resize((70, 70)),
44 | transforms.CenterCrop((64, 64)),
45 | transforms.ToTensor(),
46 | ]
47 | )
48 | else:
49 | self.test_transform = self.custom_test_transform
50 |
51 | train = datasets.CIFAR10(
52 | root=self.data_path,
53 | train=True,
54 | transform=self.train_transform,
55 | download=False,
56 | )
57 |
58 | self.test = datasets.CIFAR10(
59 | root=self.data_path,
60 | train=False,
61 | transform=self.test_transform,
62 | download=False,
63 | )
64 |
65 | self.train, self.valid = random_split(train, lengths=[45000, 5000])
66 |
67 | def train_dataloader(self):
68 | train_loader = DataLoader(
69 | dataset=self.train,
70 | batch_size=self.batch_size,
71 | drop_last=True,
72 | shuffle=True,
73 | persistent_workers=True,
74 | num_workers=self.num_workers,
75 | )
76 | return train_loader
77 |
78 | def val_dataloader(self):
79 | valid_loader = DataLoader(
80 | dataset=self.valid,
81 | batch_size=self.batch_size,
82 | drop_last=False,
83 | persistent_workers=True,
84 | shuffle=False,
85 | num_workers=self.num_workers,
86 | )
87 | return valid_loader
88 |
89 | def test_dataloader(self):
90 | test_loader = DataLoader(
91 | dataset=self.test,
92 | batch_size=self.batch_size,
93 | drop_last=False,
94 | persistent_workers=True,
95 | shuffle=False,
96 | num_workers=self.num_workers,
97 | )
98 | return test_loader
99 |
--------------------------------------------------------------------------------
/templates/pl_classifier/my_classifier_template/model.py:
--------------------------------------------------------------------------------
1 | import pytorch_lightning as pl
2 | import torch
3 | import torchmetrics
4 |
5 |
6 | # LightningModule that receives a PyTorch model as input
7 | class LightningClassifier(pl.LightningModule):
8 | def __init__(self, model, learning_rate, log_accuracy):
9 | super().__init__()
10 |
11 | self.log_accuracy = log_accuracy
12 |
13 | # Note that the other __init__ parameters will be available as
14 | # self.hparams.argname after calling self.save_hyperparameters below
15 |
16 | # The inherited PyTorch module
17 | self.model = model
18 | if hasattr(model, "dropout_proba"):
19 | self.dropout_proba = model.dropout_proba
20 |
21 | # Save settings and hyperparameters to the log directory
22 | # but skip the model parameters
23 | self.save_hyperparameters(ignore=["model"])
24 |
25 | # Set up attributes for computing the accuracy
26 | self.train_acc = torchmetrics.Accuracy()
27 | self.valid_acc = torchmetrics.Accuracy()
28 | self.test_acc = torchmetrics.Accuracy()
29 |
30 | # Defining the forward method is only necessary
31 | # if you want to use a Trainer's .predict() method (optional)
32 | def forward(self, x):
33 | return self.model(x)
34 |
35 | # A common forward step to compute the loss and labels
36 | # this is used for training, validation, and testing below
37 | def _shared_step(self, batch):
38 | features, true_labels = batch
39 | logits = self(features)
40 | loss = torch.nn.functional.cross_entropy(logits, true_labels)
41 | predicted_labels = torch.argmax(logits, dim=1)
42 |
43 | return loss, true_labels, predicted_labels
44 |
45 | def training_step(self, batch, batch_idx):
46 | loss, true_labels, predicted_labels = self._shared_step(batch)
47 | self.log("train_loss", loss)
48 |
49 | # Do another forward pass in .eval() mode to compute accuracy
50 | # while accountingfor Dropout, BatchNorm etc. behavior
51 | # during evaluation (inference)
52 | self.model.eval()
53 | with torch.no_grad():
54 | _, true_labels, predicted_labels = self._shared_step(batch)
55 |
56 | if self.log_accuracy:
57 | self.train_acc(predicted_labels, true_labels)
58 | self.log("train_acc", self.train_acc, on_epoch=True, on_step=False)
59 | self.model.train()
60 |
61 | return loss # this is passed to the optimzer for training
62 |
63 | def validation_step(self, batch, batch_idx):
64 | loss, true_labels, predicted_labels = self._shared_step(batch)
65 | self.log("valid_loss", loss)
66 | self.valid_acc(predicted_labels, true_labels)
67 |
68 | if self.log_accuracy:
69 | self.log(
70 | "valid_acc",
71 | self.valid_acc,
72 | on_epoch=True,
73 | on_step=False,
74 | prog_bar=True,
75 | )
76 |
77 | def test_step(self, batch, batch_idx):
78 | loss, true_labels, predicted_labels = self._shared_step(batch)
79 | self.test_acc(predicted_labels, true_labels)
80 | self.log("test_acc", self.test_acc, on_epoch=True, on_step=False)
81 |
82 | def configure_optimizers(self):
83 | optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
84 | return optimizer
85 |
--------------------------------------------------------------------------------
/templates/pl_classifier/my_classifier_template/plotting.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | import torch
4 |
5 |
6 | def show_failures(
7 | model,
8 | data_loader,
9 | unnormalizer=None,
10 | class_dict=None,
11 | nrows=3,
12 | ncols=5,
13 | figsize=None,
14 | ):
15 |
16 | failure_features = []
17 | failure_pred_labels = []
18 | failure_true_labels = []
19 |
20 | for batch_idx, (features, targets) in enumerate(data_loader):
21 |
22 | with torch.no_grad():
23 | features = features
24 | targets = targets
25 | logits = model(features)
26 | predictions = torch.argmax(logits, dim=1)
27 |
28 | for i in range(features.shape[0]):
29 | if targets[i] != predictions[i]:
30 | failure_features.append(features[i])
31 | failure_pred_labels.append(predictions[i])
32 | failure_true_labels.append(targets[i])
33 |
34 | if len(failure_true_labels) >= nrows * ncols:
35 | break
36 |
37 | features = torch.stack(failure_features, dim=0)
38 | targets = torch.tensor(failure_true_labels)
39 | predictions = torch.tensor(failure_pred_labels)
40 |
41 | fig, axes = plt.subplots(
42 | nrows=nrows, ncols=ncols, sharex=True, sharey=True, figsize=figsize
43 | )
44 |
45 | if unnormalizer is not None:
46 | for idx in range(features.shape[0]):
47 | features[idx] = unnormalizer(features[idx])
48 | nhwc_img = np.transpose(features, axes=(0, 2, 3, 1))
49 |
50 | if nhwc_img.shape[-1] == 1:
51 | nhw_img = np.squeeze(nhwc_img.numpy(), axis=3)
52 |
53 | for idx, ax in enumerate(axes.ravel()):
54 | ax.imshow(nhw_img[idx], cmap="binary")
55 | if class_dict is not None:
56 | ax.title.set_text(
57 | f"P: {class_dict[predictions[idx].item()]}"
58 | f"\nT: {class_dict[targets[idx].item()]}"
59 | )
60 | else:
61 | ax.title.set_text(f"P: {predictions[idx]} | T: {targets[idx]}")
62 | ax.axison = False
63 |
64 | else:
65 |
66 | for idx, ax in enumerate(axes.ravel()):
67 | ax.imshow(nhwc_img[idx])
68 | if class_dict is not None:
69 | ax.title.set_text(
70 | f"P: {class_dict[predictions[idx].item()]}"
71 | f"\nT: {class_dict[targets[idx].item()]}"
72 | )
73 | else:
74 | ax.title.set_text(f"P: {predictions[idx]} | T: {targets[idx]}")
75 | ax.axison = False
76 | return fig, axes
77 |
--------------------------------------------------------------------------------
/templates/pl_classifier/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==1.0.0
2 | aiohttp==3.8.1
3 | aiosignal==1.2.0
4 | appnope==0.1.3
5 | asttokens==2.0.5
6 | async-timeout==4.0.2
7 | attrs==21.4.0
8 | backcall==0.2.0
9 | cachetools==5.1.0
10 | certifi==2022.5.18.1
11 | charset-normalizer==2.0.12
12 | decorator==5.1.1
13 | executing==0.8.3
14 | frozenlist==1.3.0
15 | fsspec==2022.5.0
16 | google-auth==2.6.6
17 | google-auth-oauthlib==0.4.6
18 | grpcio==1.46.3
19 | idna==3.3
20 | importlib-metadata==4.11.4
21 | ipython==8.3.0
22 | jedi==0.18.1
23 | Markdown==3.3.7
24 | matplotlib-inline==0.1.3
25 | mlxtend==0.19.0
26 | multidict==6.0.2
27 | numpy==1.22.4
28 | oauthlib==3.2.0
29 | packaging==21.3
30 | parso==0.8.3
31 | pexpect==4.8.0
32 | pickleshare==0.7.5
33 | Pillow==9.1.1
34 | prompt-toolkit==3.0.29
35 | protobuf==3.20.1
36 | ptyprocess==0.7.0
37 | pure-eval==0.2.2
38 | pyasn1==0.4.8
39 | pyasn1-modules==0.2.8
40 | pyDeprecate==0.3.2
41 | Pygments==2.12.0
42 | pyparsing==3.0.9
43 | pytorch-lightning==1.6.3
44 | PyYAML==6.0
45 | requests==2.27.1
46 | requests-oauthlib==1.3.1
47 | rsa==4.8
48 | six==1.16.0
49 | stack-data==0.2.0
50 | tensorboard==2.9.0
51 | tensorboard-data-server==0.6.1
52 | tensorboard-plugin-wit==1.8.1
53 | torch==1.11.0
54 | torchaudio==0.11.0
55 | torchmetrics==0.8.2
56 | torchvision==0.12.0
57 | tqdm==4.64.0
58 | traitlets==5.2.1.post0
59 | typing_extensions==4.2.0
60 | urllib3==1.26.9
61 | watermark==2.3.0
62 | wcwidth==0.2.5
63 | Werkzeug==2.1.2
64 | yarl==1.7.2
65 | zipp==3.8.0
66 |
--------------------------------------------------------------------------------
/templates/pl_classifier/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 |
4 | setuptools.setup(
5 | name='my_classifier_template',
6 | version='0.1',
7 | author='sebastian',
8 | packages=setuptools.find_packages(),
9 | )
--------------------------------------------------------------------------------