├── .gitignore ├── LICENSE ├── README.md ├── baselines └── rule-based-text-classifier_vader.ipynb ├── benchmark ├── pandas-pyarrow │ ├── pandas2-pyarrow.ipynb │ └── results.png ├── pytorch-lightning-m1-gpu │ ├── README.md │ ├── main.py │ ├── my_classifier_template │ │ ├── __init__.py │ │ ├── dataset.py │ │ └── model.py │ ├── requirements.txt │ └── setup.py └── pytorch-m1-gpu │ ├── README.md │ ├── lenet-mnist-results │ ├── 1080ti.txt │ ├── 2080ti.txt │ ├── 3090.txt │ ├── intel-cpu.txt │ ├── m1-cpu.txt │ ├── m1-gpu.txt │ ├── m1pro-cpu.txt │ └── m1pro-gpu.txt │ ├── lenet-mnist.py │ ├── mlp-mnist.py │ ├── mlp-results │ ├── m1-cpu-mlp.txt │ ├── m1-gpu-mlp.txt │ ├── mlp-1080ti.txt │ ├── mlp-2080ti.txt │ ├── mlp-3090.txt │ ├── mlp-intel-cpu.txt │ ├── mlp-m1max-gpu_torch1.13.0.dev20220522.txt │ ├── mlp-m1pro-cpu_torch1.12.0.dev20220518.txt │ ├── mlp-m1pro-cpu_torch1.13.0.dev20220522.txt │ ├── mlp-m1pro-gpu_torch1.12.0.dev20220518.txt │ └── mlp-m1pro-gpu_torch1.13.0.dev20220522.txt │ ├── vgg16-cifar10-results │ ├── 1080ti.txt │ ├── 2080ti.txt │ ├── 3090.txt │ ├── intel-cpu.txt │ ├── m1-plain-gpu_torch 1.12.0.dev20220518.txt │ ├── m1-plain-gpu_torch-1.13.0.dev20220522.txt │ ├── m1max-gpu_torch-1.12.0.dev20220518.txt │ ├── m1max-gpu_torch-1.13.0.dev20220522.txt │ ├── m1pro-cpu_torch 1.12.0.dev20220518.txt │ ├── m1pro-cpu_torch1.13.0.dev20220522.txt │ ├── m1pro-gpu_torch 1.12.0.dev20220518.txt │ ├── m1pro-gpu_torch1.13.0.dev20220522.txt │ ├── m1ultra-cpu_torch-1.13.0.dev20220522.txt │ ├── m1ultra-gpu_torch-1.13.0.dev20220522.txt │ ├── titanv-cpu.txt │ └── titanv.txt │ └── vgg16-cifar10.py ├── categorical-features ├── data │ └── iris_mod.csv ├── gradient-boosting-with-categorical-feat.ipynb ├── sklearn-categorical-numerical-mix.ipynb ├── sklearn-onehot-encoding-mixedtype-df.ipynb ├── sklearn-ordinal-encoding-mixedtype-df.ipynb └── sklearn-permutation-importance.ipynb ├── cloud-resources └── xgboost-lightning-gpu │ ├── README.md │ ├── my_xgboost_classifier.py │ └── xgboost-cloud-gpu.py ├── demos ├── basic-pytorch-cnn-for-3-ele-pytorch-video.ipynb ├── data │ └── cat │ │ └── cat.jpeg ├── imagenet_int_to_label.py └── torchvision-efficientnet-v2.ipynb ├── evaluation ├── ci-for-ml │ ├── ci-simulation-repeated.ipynb │ ├── ci-simulation-repeated │ │ ├── 1_normal_approx.py │ │ ├── 2.1_bootstrap_t.py │ │ ├── 2.2_bootstrap_percentile.py │ │ ├── 2.3_bootstrap_632.py │ │ ├── 3_bootstrap_test.py │ │ └── get_dataset.py │ ├── ci-simulation.ipynb │ └── confidence-intervals-for-ml.ipynb └── lightning-jupyter-tensorboard │ ├── notebook.ipynb │ └── shared_utilities.py ├── hyperparameter-tuning-methods ├── 01.1-gridsearch-decisiontree-example.ipynb ├── 01.2-gridsearch-stacking-example.ipynb ├── 02.1-randomsearch-decisiontree-example.ipynb ├── 02.2-randomsearch-stacking-example.ipynb ├── 03.1-hyperopt-decisiontree-example.ipynb ├── 03.2-hyperopt-xgboost-example.ipynb ├── 04.1-optuna-decisiontree-example.ipynb ├── 04.2-optuna-xgboost-example.ipynb ├── 04.3-optuna-lightgbm-example.ipynb ├── 05.1-successive-halving-decisiontree.ipynb ├── 05.2-successive-halving-stacking.ipynb ├── 06.1-genetic-opt.ipynb ├── 07.1-orion_wip.ipynb ├── figures │ └── orion-recommendations.png ├── lightning-hpo-optuna │ ├── README.md │ ├── mlp_cli2.py │ ├── shared_utilities.py │ └── sweeper.py └── sklearn-parameter-sampler.ipynb ├── learning-rates └── scheduler-comparison │ ├── 1-baseline.ipynb │ ├── 2-step-decay.ipynb │ ├── 3-cosine-restarts.ipynb │ ├── 4-cosine-epoch-decay.ipynb │ ├── 5-cosine-batch-decay.ipynb │ ├── 6-cosine-batch-decay-warmstart.ipynb │ ├── overview.png │ └── shared_utilities.py ├── losses └── pytorch-loss-functions │ ├── binary-cross-entropy-in-pytorch.ipynb │ └── vgg16-smile-classifier │ ├── dataset.py │ ├── model.py │ ├── vgg16-bceloss.ipynb │ └── vgg16-bcewithlogitsloss.ipynb ├── math └── Four-matrix-multiplications.ipynb ├── regression └── pytorch-regression-model.ipynb └── templates ├── lightning-cli ├── cli-configurable │ ├── README.md │ ├── mlp_cli2.py │ └── shared_utilities.py └── cli-simple │ ├── README.md │ ├── mlp_cli.py │ └── shared_utilities.py ├── modern-early-stop-with-checkpointing ├── checkpointing.ipynb ├── logs │ └── my-model │ │ └── version_0 │ │ ├── checkpoints │ │ ├── epoch=8-step=4050.ckpt │ │ └── last.ckpt │ │ ├── hparams.yaml │ │ └── metrics.csv └── shared_utilities.py └── pl_classifier ├── README.md ├── main.py ├── my_classifier_template ├── __init__.py ├── dataset.py ├── model.py └── plotting.py ├── notebooks ├── 4_inspecting-the-dataset.ipynb └── 6_evaluating-the-results.ipynb ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | benchmark/pytorch-m1-gpu/data 2 | 3 | # macOS 4 | .DS_Store 5 | 6 | # Notebooks 7 | .ipynb_checkpoints 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | pip-wheel-metadata/ 32 | share/python-wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # IPython 89 | profile_default/ 90 | ipython_config.py 91 | 92 | # pyenv 93 | .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 103 | __pypackages__/ 104 | 105 | # Celery stuff 106 | celerybeat-schedule 107 | celerybeat.pid 108 | 109 | # SageMath parsed files 110 | *.sage.py 111 | 112 | # Environments 113 | .env 114 | .venv 115 | env/ 116 | venv/ 117 | ENV/ 118 | env.bak/ 119 | venv.bak/ 120 | 121 | # Spyder project settings 122 | .spyderproject 123 | .spyproject 124 | 125 | # Rope project settings 126 | .ropeproject 127 | 128 | # mkdocs documentation 129 | /site 130 | 131 | # mypy 132 | .mypy_cache/ 133 | .dmypy.json 134 | dmypy.json 135 | 136 | # Pyre type checker 137 | .pyre/ 138 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Sebastian Raschka 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # machine-learning-notes 2 | Collection of useful machine learning codes and snippets (originally intended for my personal use) 3 | -------------------------------------------------------------------------------- /benchmark/pandas-pyarrow/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/benchmark/pandas-pyarrow/results.png -------------------------------------------------------------------------------- /benchmark/pytorch-lightning-m1-gpu/README.md: -------------------------------------------------------------------------------- 1 | This is some simple benchmark code for evaluating MobileNet v3 (large) on CIFAR-10 (rescaled to ImageNet size, 224x224). 2 | 3 | 4 | 5 | You can set up the experiments as follows: 6 | 7 | 8 | 9 | ## 1 Set up a fresh environment 10 | 11 | ``` 12 | conda create -n clf-template python=3.8 13 | conda activate clf-template 14 | ``` 15 | 16 | 17 | 18 | ## 2 Install requirements 19 | 20 | 21 | ``` 22 | pip install -r requirements.txt 23 | ``` 24 | 25 | 26 | 27 | Recommended: upgrade PyTorch and PyTorch Lightning to the latest versions, e.g., 28 | 29 | ``` 30 | pip install torch --upgrade 31 | pip install pytorch_lighting --upgrade 32 | ``` 33 | 34 | 35 | 36 | 37 | 38 | ## 3 Install as Python Package 39 | 40 | This is optional and only required if you want to run the code outside this reposistory 41 | 42 | Assuming you are inside this folder, run 43 | 44 | ``` 45 | pip install -e . 46 | ``` 47 | 48 | 49 | 50 | ## 4 Install Nightly Releases with M1 GPU support 51 | 52 | 53 | TBD 54 | 55 | 56 | 57 | # Benchmark results 58 | 59 | 60 | 61 | You can run the following codes to replicate the benchmarks. 62 | 63 | 64 | 65 | ## GTX 1080Ti 66 | 67 | On a workstation with 4 x GTX 1080Ti cards and Intel Xeon E5-2650 (12 core) 68 | 69 | 70 | 71 | 4 GPUs 72 | 73 | ``` 74 | python main.py --output_path results \ 75 | --mixed_precision false \ 76 | --num_epochs 3 \ 77 | --batch_size 256 \ 78 | --num_epochs 3 \ 79 | --num_devices 4 \ 80 | --log_accuracy false \ 81 | --accelerator gpu \ 82 | --strategy ddp_spawn 83 | ``` 84 | 85 | Training time: 2.20 min 86 | Inference time (test set): 0.32 min 87 | 88 | --- 89 | 90 | 91 | 92 | 1 GPU 93 | 94 | ``` 95 | python main.py --output_path results \ 96 | --mixed_precision false \ 97 | --num_epochs 3 \ 98 | --batch_size 128 \ 99 | --num_epochs 3 \ 100 | --num_devices 1 \ 101 | --log_accuracy false \ 102 | --accelerator gpu \ 103 | ``` 104 | 105 | Training time: 6.47 min 106 | Inference time (test set): 0.11 min 107 | 108 | --- 109 | 110 | 111 | 112 | Multi-CPU with `ddp_spawn` 113 | 114 | ``` 115 | python main.py --output_path results \ 116 | --mixed_precision false \ 117 | --num_epochs 3 \ 118 | --batch_size 256 \ 119 | --num_epochs 3 \ 120 | --num_devices auto \ 121 | --log_accuracy false \ 122 | --accelerator cpu \ 123 | --strategy ddp_spawn 124 | ``` 125 | 126 | Training time: 127 | Inference time (test set): 128 | 129 | --- 130 | 131 | 132 | 133 | 1 CPU 134 | 135 | ``` 136 | python main.py --output_path results \ 137 | --mixed_precision false \ 138 | --num_epochs 3 \ 139 | --batch_size 256 \ 140 | --num_epochs 3 \ 141 | --log_accuracy false \ 142 | --num_devices 1 \ 143 | --accelerator cpu \ 144 | ``` 145 | 146 | Training time: 147 | Inference time (test set): 148 | 149 | --- 150 | 151 | 152 | 153 | ## RTX 2080Ti 154 | 155 | python main.py --output_path results \ 156 | --mixed_precision false \ 157 | --num_epochs 3 \ 158 | --batch_size 128 \ 159 | --num_epochs 3 \ 160 | --device_numbers 1,2,3,5 \ 161 | --log_accuracy false \ 162 | --accelerator gpu \ 163 | --strategy ddp_spawn 164 | 165 | 1.56 min 166 | 167 | 0.38 168 | 169 | python main.py --output_path results \ 170 | --mixed_precision true \ 171 | --num_epochs 3 \ 172 | --batch_size 128 \ 173 | --num_epochs 3 \ 174 | --device_numbers 1,2,3,5 \ 175 | --log_accuracy false \ 176 | --accelerator gpu \ 177 | --strategy ddp_spawn 178 | 179 | 1.42 min 180 | 181 | 0.44 182 | 183 | python main.py --output_path results \ 184 | --mixed_precision true \ 185 | --num_epochs 3 \ 186 | --batch_size 128 \ 187 | --num_epochs 3 \ 188 | --num_devices 1 \ 189 | --log_accuracy false \ 190 | --accelerator gpu \ 191 | --strategy ddp_spawn 192 | 193 | 194 | 195 | ## M1 Pro 196 | 197 | TBD 198 | -------------------------------------------------------------------------------- /benchmark/pytorch-lightning-m1-gpu/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pytorch_lightning as pl 3 | from pytorch_lightning.callbacks import ModelCheckpoint 4 | from pytorch_lightning.loggers import CSVLogger 5 | import time 6 | import torch 7 | from torchvision import transforms 8 | from watermark import watermark 9 | 10 | from my_classifier_template.dataset import Cifar10DataModule 11 | from my_classifier_template.model import LightningClassifier 12 | 13 | 14 | def parse_cmdline_args(parser=None): 15 | 16 | if parser is None: 17 | parser = argparse.ArgumentParser() 18 | 19 | parser.add_argument('--accelerator', 20 | type=str, 21 | default="auto") 22 | 23 | parser.add_argument('--batch_size', 24 | type=int, 25 | default=32) 26 | 27 | parser.add_argument('--data_path', 28 | type=str, 29 | default='./data') 30 | 31 | parser.add_argument('--learning_rate', 32 | type=float, 33 | default=0.0005) 34 | 35 | parser.add_argument('--log_accuracy', 36 | type=str, 37 | choices=("true", "false"), 38 | default="true") 39 | 40 | parser.add_argument('--mixed_precision', 41 | type=str, 42 | choices=("true", "false"), 43 | default="true") 44 | 45 | parser.add_argument('--num_epochs', 46 | type=int, 47 | default=10) 48 | 49 | parser.add_argument('--num_workers', 50 | type=int, 51 | default=3) 52 | 53 | parser.add_argument('--output_path', 54 | type=str, 55 | required=True) 56 | 57 | parser.add_argument('--pretrained', 58 | type=str, 59 | choices=("true", "false"), 60 | default="false") 61 | 62 | parser.add_argument('--num_devices', 63 | nargs="+", 64 | default="auto") 65 | 66 | parser.add_argument('--device_numbers', 67 | type=str, 68 | default="") 69 | 70 | parser.add_argument('--random_seed', 71 | type=int, 72 | default=-1) 73 | 74 | parser.add_argument('--strategy', 75 | type=str, 76 | default="") 77 | 78 | parser.set_defaults(feature=True) 79 | args = parser.parse_args() 80 | 81 | if not args.strategy: 82 | args.strategy = None 83 | 84 | if args.num_devices != "auto": 85 | args.devices = int(args.num_devices[0]) 86 | if args.device_numbers: 87 | args.devices = [int(i) for i in args.device_numbers.split(',')] 88 | 89 | d = {'true': True, 90 | 'false': False} 91 | 92 | args.log_accuracy = d[args.log_accuracy] 93 | args.pretrained = d[args.pretrained] 94 | args.mixed_precision = d[args.mixed_precision] 95 | if args.mixed_precision: 96 | args.mixed_precision = 16 97 | else: 98 | args.mixed_precision = 32 99 | 100 | return args 101 | 102 | 103 | if __name__ == "__main__": 104 | 105 | print(watermark()) 106 | print(watermark(packages="torch,pytorch_lightning")) 107 | 108 | parser = argparse.ArgumentParser() 109 | args = parse_cmdline_args(parser) 110 | 111 | torch.manual_seed(args.random_seed) 112 | 113 | custom_train_transform = transforms.Compose( 114 | [ 115 | transforms.Resize((256, 256)), 116 | transforms.RandomCrop((224, 224)), 117 | transforms.ToTensor(), 118 | ] 119 | ) 120 | 121 | custom_test_transform = transforms.Compose( 122 | [ 123 | transforms.Resize((256, 256)), 124 | transforms.CenterCrop((224, 224)), 125 | transforms.ToTensor(), 126 | ] 127 | ) 128 | 129 | data_module = Cifar10DataModule( 130 | batch_size=args.batch_size, 131 | data_path=args.data_path, 132 | num_workers=args.num_workers, 133 | train_transform=custom_train_transform, 134 | test_transform=custom_test_transform) 135 | 136 | pytorch_model = torch.hub.load( 137 | 'pytorch/vision:v0.11.0', 138 | 'mobilenet_v3_large', 139 | pretrained=args.pretrained) 140 | 141 | pytorch_model.classifier[-1] = torch.nn.Linear( 142 | in_features=1280, out_features=10 # as in original 143 | ) # number of class labels in Cifar-10) 144 | 145 | lightning_model = LightningClassifier( 146 | pytorch_model, learning_rate=args.learning_rate, log_accuracy=args.log_accuracy) 147 | 148 | if args.log_accuracy: 149 | callbacks = [ 150 | ModelCheckpoint( 151 | save_top_k=1, mode="max", monitor="valid_acc" 152 | ) # save top 1 model 153 | ] 154 | else: 155 | callbacks = [ 156 | ModelCheckpoint( 157 | save_top_k=1, mode="min", monitor="valid_loss" 158 | ) # save top 1 model 159 | ] 160 | 161 | logger = CSVLogger(save_dir=args.output_path, name="my-model") 162 | 163 | trainer = pl.Trainer( 164 | max_epochs=args.num_epochs, 165 | callbacks=callbacks, 166 | accelerator=args.accelerator, 167 | devices=args.devices, 168 | logger=logger, 169 | strategy=args.strategy, 170 | precision=args.mixed_precision, 171 | deterministic=False, 172 | log_every_n_steps=10, 173 | ) 174 | 175 | start_time = time.time() 176 | trainer.fit(model=lightning_model, datamodule=data_module) 177 | 178 | train_time = time.time() 179 | runtime = (train_time - start_time) / 60 180 | print(f"Training took {runtime:.2f} min.") 181 | 182 | # setup data on host machine 183 | data_module.prepare_data() 184 | data_module.setup() 185 | 186 | before = time.time() 187 | val_acc = trainer.test(dataloaders=data_module.val_dataloader()) 188 | runtime = (time.time() - before) / 60 189 | print(f"Inference on the validation set took {runtime:.2f} min.") 190 | 191 | before = time.time() 192 | test_acc = trainer.test(dataloaders=data_module.test_dataloader()) 193 | runtime = (time.time() - before) / 60 194 | print(f"Inference on the test set took {runtime:.2f} min.") 195 | 196 | runtime = (time.time() - start_time) / 60 197 | print(f"The total runtime was {runtime:.2f} min.") 198 | 199 | print("Validation accuracy:", val_acc) 200 | print("Test accuracy:", test_acc) 201 | -------------------------------------------------------------------------------- /benchmark/pytorch-lightning-m1-gpu/my_classifier_template/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/benchmark/pytorch-lightning-m1-gpu/my_classifier_template/__init__.py -------------------------------------------------------------------------------- /benchmark/pytorch-lightning-m1-gpu/my_classifier_template/dataset.py: -------------------------------------------------------------------------------- 1 | import pytorch_lightning as pl 2 | from torchvision import datasets 3 | from torch.utils.data.dataset import random_split 4 | from torch.utils.data import DataLoader 5 | from torchvision import transforms 6 | 7 | 8 | class Cifar10DataModule(pl.LightningDataModule): 9 | def __init__(self, batch_size, train_transform=None, test_transform=None, num_workers=4, data_path="./"): 10 | super().__init__() 11 | self.data_path = data_path 12 | self.batch_size = batch_size 13 | self.num_workers = num_workers 14 | self.custom_train_transform = train_transform 15 | self.custom_test_transform = test_transform 16 | 17 | def prepare_data(self): 18 | datasets.CIFAR10(root=self.data_path, download=True) 19 | return 20 | 21 | def setup(self, stage=None): 22 | 23 | if self.custom_train_transform is None: 24 | self.train_transform = transforms.Compose( 25 | [ 26 | transforms.Resize((70, 70)), 27 | transforms.RandomCrop((64, 64)), 28 | transforms.ToTensor(), 29 | ] 30 | ) 31 | else: 32 | self.train_transform = self.custom_train_transform 33 | 34 | if self.custom_train_transform is None: 35 | self.test_transform = transforms.Compose( 36 | [ 37 | transforms.Resize((70, 70)), 38 | transforms.CenterCrop((64, 64)), 39 | transforms.ToTensor(), 40 | ] 41 | ) 42 | else: 43 | self.test_transform = self.custom_test_transform 44 | 45 | train = datasets.CIFAR10( 46 | root=self.data_path, 47 | train=True, 48 | transform=self.train_transform, 49 | download=False, 50 | ) 51 | 52 | self.test = datasets.CIFAR10( 53 | root=self.data_path, 54 | train=False, 55 | transform=self.test_transform, 56 | download=False, 57 | ) 58 | 59 | self.train, self.valid = random_split(train, lengths=[45000, 5000]) 60 | 61 | def train_dataloader(self): 62 | train_loader = DataLoader( 63 | dataset=self.train, 64 | batch_size=self.batch_size, 65 | drop_last=True, 66 | shuffle=True, 67 | persistent_workers=True, 68 | num_workers=self.num_workers, 69 | ) 70 | return train_loader 71 | 72 | def val_dataloader(self): 73 | valid_loader = DataLoader( 74 | dataset=self.valid, 75 | batch_size=self.batch_size, 76 | drop_last=False, 77 | persistent_workers=True, 78 | shuffle=False, 79 | num_workers=self.num_workers, 80 | ) 81 | return valid_loader 82 | 83 | def test_dataloader(self): 84 | test_loader = DataLoader( 85 | dataset=self.test, 86 | batch_size=self.batch_size, 87 | drop_last=False, 88 | persistent_workers=True, 89 | shuffle=False, 90 | num_workers=self.num_workers, 91 | ) 92 | return test_loader -------------------------------------------------------------------------------- /benchmark/pytorch-lightning-m1-gpu/my_classifier_template/model.py: -------------------------------------------------------------------------------- 1 | 2 | import pytorch_lightning as pl 3 | import torch 4 | import torchmetrics 5 | 6 | 7 | # LightningModule that receives a PyTorch model as input 8 | class LightningClassifier(pl.LightningModule): 9 | def __init__(self, model, learning_rate, log_accuracy): 10 | super().__init__() 11 | 12 | self.log_accuracy = log_accuracy 13 | 14 | # Note that the other __init__ parameters will be available as 15 | # self.hparams.argname after calling self.save_hyperparameters below 16 | 17 | # The inherited PyTorch module 18 | self.model = model 19 | if hasattr(model, "dropout_proba"): 20 | self.dropout_proba = model.dropout_proba 21 | 22 | # Save settings and hyperparameters to the log directory 23 | # but skip the model parameters 24 | self.save_hyperparameters(ignore=["model"]) 25 | 26 | # Set up attributes for computing the accuracy 27 | self.train_acc = torchmetrics.Accuracy() 28 | self.valid_acc = torchmetrics.Accuracy() 29 | self.test_acc = torchmetrics.Accuracy() 30 | 31 | # Defining the forward method is only necessary 32 | # if you want to use a Trainer's .predict() method (optional) 33 | def forward(self, x): 34 | return self.model(x) 35 | 36 | # A common forward step to compute the loss and labels 37 | # this is used for training, validation, and testing below 38 | def _shared_step(self, batch): 39 | features, true_labels = batch 40 | logits = self(features) 41 | loss = torch.nn.functional.cross_entropy(logits, true_labels) 42 | predicted_labels = torch.argmax(logits, dim=1) 43 | 44 | return loss, true_labels, predicted_labels 45 | 46 | def training_step(self, batch, batch_idx): 47 | loss, true_labels, predicted_labels = self._shared_step(batch) 48 | self.log("train_loss", loss) 49 | 50 | # Do another forward pass in .eval() mode to compute accuracy 51 | # while accountingfor Dropout, BatchNorm etc. behavior 52 | # during evaluation (inference) 53 | self.model.eval() 54 | with torch.no_grad(): 55 | _, true_labels, predicted_labels = self._shared_step(batch) 56 | 57 | if self.log_accuracy: 58 | self.train_acc(predicted_labels, true_labels) 59 | self.log("train_acc", self.train_acc, on_epoch=True, on_step=False) 60 | self.model.train() 61 | 62 | return loss # this is passed to the optimzer for training 63 | 64 | def validation_step(self, batch, batch_idx): 65 | loss, true_labels, predicted_labels = self._shared_step(batch) 66 | self.log("valid_loss", loss) 67 | self.valid_acc(predicted_labels, true_labels) 68 | 69 | if self.log_accuracy: 70 | self.log( 71 | "valid_acc", 72 | self.valid_acc, 73 | on_epoch=True, 74 | on_step=False, 75 | prog_bar=True, 76 | ) 77 | 78 | def test_step(self, batch, batch_idx): 79 | loss, true_labels, predicted_labels = self._shared_step(batch) 80 | self.test_acc(predicted_labels, true_labels) 81 | self.log("test_acc", self.test_acc, on_epoch=True, on_step=False) 82 | 83 | def configure_optimizers(self): 84 | optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate) 85 | return optimizer -------------------------------------------------------------------------------- /benchmark/pytorch-lightning-m1-gpu/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.0.0 2 | aiohttp==3.8.1 3 | aiosignal==1.2.0 4 | appnope==0.1.3 5 | asttokens==2.0.5 6 | async-timeout==4.0.2 7 | attrs==21.4.0 8 | backcall==0.2.0 9 | cachetools==5.1.0 10 | certifi==2022.5.18.1 11 | charset-normalizer==2.0.12 12 | decorator==5.1.1 13 | executing==0.8.3 14 | frozenlist==1.3.0 15 | fsspec==2022.5.0 16 | google-auth==2.6.6 17 | google-auth-oauthlib==0.4.6 18 | grpcio==1.46.3 19 | idna==3.3 20 | importlib-metadata==4.11.4 21 | ipython==8.3.0 22 | jedi==0.18.1 23 | Markdown==3.3.7 24 | matplotlib-inline==0.1.3 25 | multidict==6.0.2 26 | numpy==1.22.4 27 | oauthlib==3.2.0 28 | packaging==21.3 29 | parso==0.8.3 30 | pexpect==4.8.0 31 | pickleshare==0.7.5 32 | Pillow==9.1.1 33 | prompt-toolkit==3.0.29 34 | protobuf==3.20.1 35 | ptyprocess==0.7.0 36 | pure-eval==0.2.2 37 | pyasn1==0.4.8 38 | pyasn1-modules==0.2.8 39 | pyDeprecate==0.3.2 40 | Pygments==2.12.0 41 | pyparsing==3.0.9 42 | pytorch-lightning==1.6.3 43 | PyYAML==6.0 44 | requests==2.27.1 45 | requests-oauthlib==1.3.1 46 | rsa==4.8 47 | six==1.16.0 48 | stack-data==0.2.0 49 | tensorboard==2.9.0 50 | tensorboard-data-server==0.6.1 51 | tensorboard-plugin-wit==1.8.1 52 | torch==1.11.0 53 | torchaudio==0.11.0 54 | torchmetrics==0.8.2 55 | torchvision==0.12.0 56 | tqdm==4.64.0 57 | traitlets==5.2.1.post0 58 | typing_extensions==4.2.0 59 | urllib3==1.26.9 60 | watermark==2.3.0 61 | wcwidth==0.2.5 62 | Werkzeug==2.1.2 63 | yarl==1.7.2 64 | zipp==3.8.0 65 | -------------------------------------------------------------------------------- /benchmark/pytorch-lightning-m1-gpu/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | setuptools.setup( 5 | name='my_classifier_template', 6 | version='0.1', 7 | author='sebastian', 8 | packages=setuptools.find_packages(), 9 | ) -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/README.md: -------------------------------------------------------------------------------- 1 | You can run these scripts as follows: 2 | 3 | 4 | 5 | - CPU: `python lenet-mnist.py --device "cpu"` 6 | - NVIDIA GPU: `python lenet-mnist.py --device "cuda"` 7 | - Apple M1: `python lenet-mnist.py --device "mps"` 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/lenet-mnist-results/1080ti.txt: -------------------------------------------------------------------------------- 1 | torch 1.10.0 2 | device cuda:1 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1010 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0734 8 | Time / epoch without evaluation: 0.44 min 9 | Epoch: 001/001 | Train: 97.32% | Validation: 97.77% | Best Validation (Ep. 001): 97.77% 10 | Time elapsed: 0.94 min 11 | Total Training Time: 0.94 min 12 | Test accuracy 97.40% 13 | Total Time: 0.97 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/lenet-mnist-results/2080ti.txt: -------------------------------------------------------------------------------- 1 | torch 1.11.0+cu102 2 | device cuda:1 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1011 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0731 8 | Time / epoch without evaluation: 0.44 min 9 | Epoch: 001/001 | Train: 97.32% | Validation: 97.77% | Best Validation (Ep. 001): 97.77% 10 | Time elapsed: 0.91 min 11 | Total Training Time: 0.91 min 12 | Test accuracy 97.40% 13 | Total Time: 0.94 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/lenet-mnist-results/3090.txt: -------------------------------------------------------------------------------- 1 | torch 1.11.0 2 | device cuda 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1438 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1010 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0733 8 | Time / epoch without evaluation: 0.18 min 9 | Epoch: 001/001 | Train: 97.32% | Validation: 97.75% | Best Validation (Ep. 001): 97.75% 10 | Time elapsed: 0.37 min 11 | Total Training Time: 0.37 min 12 | Test accuracy 97.40% 13 | Total Time: 0.38 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/lenet-mnist-results/intel-cpu.txt: -------------------------------------------------------------------------------- 1 | torch 1.10.0 2 | device cpu 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1009 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0732 8 | Time / epoch without evaluation: 0.41 min 9 | Epoch: 001/001 | Train: 97.32% | Validation: 97.77% | Best Validation (Ep. 001): 97.77% 10 | Time elapsed: 0.86 min 11 | Total Training Time: 0.86 min 12 | Test accuracy 97.39% 13 | Total Time: 0.89 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/lenet-mnist-results/m1-cpu.txt: -------------------------------------------------------------------------------- 1 | torch 1.11.0 2 | device cpu 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1009 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0732 8 | Time / epoch without evaluation: 0.18 min 9 | Epoch: 001/001 | Train: 97.33% | Validation: 97.78% | Best Validation (Ep. 001): 97.78% 10 | Time elapsed: 0.29 min 11 | Total Training Time: 0.29 min 12 | Test accuracy 97.42% 13 | Total Time: 0.31 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/lenet-mnist-results/m1-gpu.txt: -------------------------------------------------------------------------------- 1 | torch 1.12.0.dev20220518 2 | device mps 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1010 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0730 8 | Time / epoch without evaluation: 0.92 min 9 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00% 10 | Time elapsed: 1.13 min 11 | Total Training Time: 1.13 min 12 | Test accuracy 0.00% 13 | Total Time: 1.17 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/lenet-mnist-results/m1pro-cpu.txt: -------------------------------------------------------------------------------- 1 | torch 1.12.0.dev20220518 2 | device cpu 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1009 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0733 8 | Time / epoch without evaluation: 0.08 min 9 | Epoch: 001/001 | Train: 97.32% | Validation: 97.77% | Best Validation (Ep. 001): 97.77% 10 | Time elapsed: 0.13 min 11 | Total Training Time: 0.13 min 12 | Test accuracy 97.41% 13 | Total Time: 0.15 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/lenet-mnist-results/m1pro-gpu.txt: -------------------------------------------------------------------------------- 1 | torch 1.12.0.dev20220518 2 | device mps 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1010 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0730 8 | Time / epoch without evaluation: 0.58 min 9 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00% 10 | Time elapsed: 0.75 min 11 | Total Training Time: 0.75 min 12 | Test accuracy 0.00% 13 | Total Time: 0.78 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/lenet-mnist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import argparse 5 | import os 6 | import random 7 | import time 8 | 9 | import numpy as np 10 | import torch 11 | import torch.nn as nn 12 | import torchvision 13 | from torch.utils.data import DataLoader, SubsetRandomSampler 14 | from torchvision import datasets, transforms 15 | 16 | 17 | def set_all_seeds(seed): 18 | os.environ["PL_GLOBAL_SEED"] = str(seed) 19 | random.seed(seed) 20 | np.random.seed(seed) 21 | torch.manual_seed(seed) 22 | torch.cuda.manual_seed_all(seed) 23 | 24 | 25 | def compute_accuracy(model, data_loader, device): 26 | model.eval() 27 | with torch.no_grad(): 28 | correct_pred, num_examples = 0, 0 29 | for i, (features, targets) in enumerate(data_loader): 30 | 31 | features = features.to(device) 32 | targets = targets.to(device) 33 | 34 | logits = model(features) 35 | _, predicted_labels = torch.max(logits, 1) 36 | num_examples += targets.size(0) 37 | correct_pred += (predicted_labels.cpu() == targets.cpu()).sum() 38 | return correct_pred.float() / num_examples * 100 39 | 40 | 41 | def train_classifier_simple_v2( 42 | model, 43 | num_epochs, 44 | train_loader, 45 | valid_loader, 46 | test_loader, 47 | optimizer, 48 | device, 49 | logging_interval=50, 50 | best_model_save_path=None, 51 | scheduler=None, 52 | skip_train_acc=False, 53 | scheduler_on="valid_acc", 54 | ): 55 | 56 | start_time = time.time() 57 | minibatch_loss_list, train_acc_list, valid_acc_list = [], [], [] 58 | best_valid_acc, best_epoch = -float("inf"), 0 59 | 60 | for epoch in range(num_epochs): 61 | 62 | epoch_start_time = time.time() 63 | model.train() 64 | for batch_idx, (features, targets) in enumerate(train_loader): 65 | 66 | features = features.to(device) 67 | targets = targets.to(device) 68 | 69 | # ## FORWARD AND BACK PROP 70 | logits = model(features) 71 | loss = torch.nn.functional.cross_entropy(logits, targets) 72 | optimizer.zero_grad() 73 | 74 | loss.backward() 75 | 76 | # ## UPDATE MODEL PARAMETERS 77 | optimizer.step() 78 | 79 | # ## LOGGING 80 | minibatch_loss_list.append(loss.item()) 81 | if not batch_idx % logging_interval: 82 | print( 83 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} " 84 | f"| Batch {batch_idx:04d}/{len(train_loader):04d} " 85 | f"| Loss: {loss:.4f}" 86 | ) 87 | 88 | model.eval() 89 | 90 | elapsed = (time.time() - epoch_start_time) / 60 91 | print(f"Time / epoch without evaluation: {elapsed:.2f} min") 92 | with torch.no_grad(): # save memory during inference 93 | if not skip_train_acc: 94 | train_acc = compute_accuracy(model, train_loader, device=device).item() 95 | else: 96 | train_acc = float("nan") 97 | valid_acc = compute_accuracy(model, valid_loader, device=device).item() 98 | train_acc_list.append(train_acc) 99 | valid_acc_list.append(valid_acc) 100 | 101 | if valid_acc > best_valid_acc: 102 | best_valid_acc, best_epoch = valid_acc, epoch + 1 103 | if best_model_save_path: 104 | torch.save(model.state_dict(), best_model_save_path) 105 | 106 | print( 107 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} " 108 | f"| Train: {train_acc :.2f}% " 109 | f"| Validation: {valid_acc :.2f}% " 110 | f"| Best Validation " 111 | f"(Ep. {best_epoch:03d}): {best_valid_acc :.2f}%" 112 | ) 113 | 114 | elapsed = (time.time() - start_time) / 60 115 | print(f"Time elapsed: {elapsed:.2f} min") 116 | 117 | if scheduler is not None: 118 | 119 | if scheduler_on == "valid_acc": 120 | scheduler.step(valid_acc_list[-1]) 121 | elif scheduler_on == "minibatch_loss": 122 | scheduler.step(minibatch_loss_list[-1]) 123 | else: 124 | raise ValueError("Invalid `scheduler_on` choice.") 125 | 126 | elapsed = (time.time() - start_time) / 60 127 | print(f"Total Training Time: {elapsed:.2f} min") 128 | 129 | test_acc = compute_accuracy(model, test_loader, device=device) 130 | print(f"Test accuracy {test_acc :.2f}%") 131 | 132 | elapsed = (time.time() - start_time) / 60 133 | print(f"Total Time: {elapsed:.2f} min") 134 | 135 | return minibatch_loss_list, train_acc_list, valid_acc_list 136 | 137 | 138 | def get_dataloaders_mnist( 139 | batch_size, 140 | num_workers=0, 141 | validation_fraction=None, 142 | train_transforms=None, 143 | test_transforms=None, 144 | ): 145 | 146 | if train_transforms is None: 147 | train_transforms = transforms.ToTensor() 148 | 149 | if test_transforms is None: 150 | test_transforms = transforms.ToTensor() 151 | 152 | train_dataset = datasets.MNIST( 153 | root="data", train=True, transform=train_transforms, download=True 154 | ) 155 | 156 | valid_dataset = datasets.MNIST(root="data", train=True, transform=test_transforms) 157 | 158 | test_dataset = datasets.MNIST(root="data", train=False, transform=test_transforms) 159 | 160 | if validation_fraction is not None: 161 | num = int(validation_fraction * 60000) 162 | train_indices = torch.arange(0, 60000 - num) 163 | valid_indices = torch.arange(60000 - num, 60000) 164 | 165 | train_sampler = SubsetRandomSampler(train_indices) 166 | valid_sampler = SubsetRandomSampler(valid_indices) 167 | 168 | valid_loader = DataLoader( 169 | dataset=valid_dataset, 170 | batch_size=batch_size, 171 | num_workers=num_workers, 172 | sampler=valid_sampler, 173 | ) 174 | 175 | train_loader = DataLoader( 176 | dataset=train_dataset, 177 | batch_size=batch_size, 178 | num_workers=num_workers, 179 | drop_last=True, 180 | sampler=train_sampler, 181 | ) 182 | 183 | else: 184 | train_loader = DataLoader( 185 | dataset=train_dataset, 186 | batch_size=batch_size, 187 | num_workers=num_workers, 188 | drop_last=True, 189 | shuffle=True, 190 | ) 191 | 192 | test_loader = DataLoader( 193 | dataset=test_dataset, 194 | batch_size=batch_size, 195 | num_workers=num_workers, 196 | shuffle=False, 197 | ) 198 | 199 | if validation_fraction is None: 200 | return train_loader, test_loader 201 | else: 202 | return train_loader, valid_loader, test_loader 203 | 204 | 205 | class LeNet5(nn.Module): 206 | def __init__(self, num_classes, grayscale=False): 207 | super(LeNet5, self).__init__() 208 | 209 | self.grayscale = grayscale 210 | self.num_classes = num_classes 211 | 212 | if self.grayscale: 213 | in_channels = 1 214 | else: 215 | in_channels = 3 216 | 217 | self.features = nn.Sequential( 218 | nn.Conv2d(in_channels, 6, kernel_size=5), 219 | nn.Tanh(), 220 | nn.MaxPool2d(kernel_size=2), 221 | nn.Conv2d(6, 16, kernel_size=5), 222 | nn.Tanh(), 223 | nn.MaxPool2d(kernel_size=2), 224 | ) 225 | 226 | self.classifier = nn.Sequential( 227 | nn.Linear(16 * 5 * 5, 120), 228 | nn.Tanh(), 229 | nn.Linear(120, 84), 230 | nn.Tanh(), 231 | nn.Linear(84, num_classes), 232 | ) 233 | 234 | def forward(self, x): 235 | x = self.features(x) 236 | x = torch.flatten(x, 1) 237 | logits = self.classifier(x) 238 | return logits 239 | 240 | 241 | if __name__ == "__main__": 242 | 243 | parser = argparse.ArgumentParser() 244 | parser.add_argument( 245 | "--device", type=str, required=True, help="Which GPU device to use." 246 | ) 247 | 248 | args = parser.parse_args() 249 | 250 | RANDOM_SEED = 123 251 | BATCH_SIZE = 128 252 | NUM_EPOCHS = 1 253 | DEVICE = torch.device(args.device) 254 | 255 | print("torch", torch.__version__) 256 | print("device", DEVICE) 257 | 258 | train_transforms = torchvision.transforms.Compose( 259 | [ 260 | torchvision.transforms.Resize((32, 32)), 261 | torchvision.transforms.ToTensor(), 262 | torchvision.transforms.Normalize((0.5), (0.5)), 263 | ] 264 | ) 265 | 266 | test_transforms = torchvision.transforms.Compose( 267 | [ 268 | torchvision.transforms.Resize((32, 32)), 269 | torchvision.transforms.ToTensor(), 270 | torchvision.transforms.Normalize((0.5), (0.5)), 271 | ] 272 | ) 273 | 274 | train_loader, valid_loader, test_loader = get_dataloaders_mnist( 275 | batch_size=BATCH_SIZE, 276 | validation_fraction=0.1, 277 | train_transforms=train_transforms, 278 | test_transforms=test_transforms, 279 | num_workers=2, 280 | ) 281 | 282 | torch.manual_seed(RANDOM_SEED) 283 | 284 | model = LeNet5(num_classes=10, grayscale=True) 285 | 286 | model = model.to(DEVICE) 287 | 288 | optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 289 | 290 | minibatch_loss_list, train_acc_list, valid_acc_list = train_classifier_simple_v2( 291 | model=model, 292 | num_epochs=NUM_EPOCHS, 293 | train_loader=train_loader, 294 | valid_loader=valid_loader, 295 | test_loader=test_loader, 296 | optimizer=optimizer, 297 | best_model_save_path=None, 298 | device=DEVICE, 299 | scheduler_on="valid_acc", 300 | logging_interval=100, 301 | ) 302 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-mnist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import argparse 5 | import os 6 | import random 7 | import time 8 | 9 | import numpy as np 10 | import torch 11 | import torch.nn as nn 12 | import torchvision 13 | from torch.utils.data import DataLoader, SubsetRandomSampler 14 | from torchvision import datasets, transforms 15 | 16 | 17 | def set_all_seeds(seed): 18 | os.environ["PL_GLOBAL_SEED"] = str(seed) 19 | random.seed(seed) 20 | np.random.seed(seed) 21 | torch.manual_seed(seed) 22 | torch.cuda.manual_seed_all(seed) 23 | 24 | 25 | def compute_accuracy(model, data_loader, device): 26 | model.eval() 27 | with torch.no_grad(): 28 | correct_pred, num_examples = 0, 0 29 | for i, (features, targets) in enumerate(data_loader): 30 | 31 | features = features.to(device) 32 | targets = targets.to(device) 33 | 34 | logits = model(features) 35 | _, predicted_labels = torch.max(logits, 1) 36 | num_examples += targets.size(0) 37 | correct_pred += (predicted_labels.cpu() == targets.cpu()).sum() 38 | return correct_pred.float() / num_examples * 100 39 | 40 | 41 | def train_classifier_simple_v2( 42 | model, 43 | num_epochs, 44 | train_loader, 45 | valid_loader, 46 | test_loader, 47 | optimizer, 48 | device, 49 | logging_interval=50, 50 | best_model_save_path=None, 51 | scheduler=None, 52 | skip_train_acc=False, 53 | scheduler_on="valid_acc", 54 | ): 55 | 56 | start_time = time.time() 57 | minibatch_loss_list, train_acc_list, valid_acc_list = [], [], [] 58 | best_valid_acc, best_epoch = -float("inf"), 0 59 | 60 | for epoch in range(num_epochs): 61 | 62 | epoch_start_time = time.time() 63 | model.train() 64 | for batch_idx, (features, targets) in enumerate(train_loader): 65 | 66 | features = features.to(device) 67 | targets = targets.to(device) 68 | 69 | # ## FORWARD AND BACK PROP 70 | logits = model(features) 71 | loss = torch.nn.functional.cross_entropy(logits, targets) 72 | optimizer.zero_grad() 73 | 74 | loss.backward() 75 | 76 | # ## UPDATE MODEL PARAMETERS 77 | optimizer.step() 78 | 79 | # ## LOGGING 80 | minibatch_loss_list.append(loss.item()) 81 | if not batch_idx % logging_interval: 82 | print( 83 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} " 84 | f"| Batch {batch_idx:04d}/{len(train_loader):04d} " 85 | f"| Loss: {loss:.4f}" 86 | ) 87 | 88 | model.eval() 89 | 90 | elapsed = (time.time() - epoch_start_time) / 60 91 | print(f"Time / epoch without evaluation: {elapsed:.2f} min") 92 | with torch.no_grad(): # save memory during inference 93 | if not skip_train_acc: 94 | train_acc = compute_accuracy(model, train_loader, device=device).item() 95 | else: 96 | train_acc = float("nan") 97 | valid_acc = compute_accuracy(model, valid_loader, device=device).item() 98 | train_acc_list.append(train_acc) 99 | valid_acc_list.append(valid_acc) 100 | 101 | if valid_acc > best_valid_acc: 102 | best_valid_acc, best_epoch = valid_acc, epoch + 1 103 | if best_model_save_path: 104 | torch.save(model.state_dict(), best_model_save_path) 105 | 106 | print( 107 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} " 108 | f"| Train: {train_acc :.2f}% " 109 | f"| Validation: {valid_acc :.2f}% " 110 | f"| Best Validation " 111 | f"(Ep. {best_epoch:03d}): {best_valid_acc :.2f}%" 112 | ) 113 | 114 | elapsed = (time.time() - start_time) / 60 115 | print(f"Time elapsed: {elapsed:.2f} min") 116 | 117 | if scheduler is not None: 118 | 119 | if scheduler_on == "valid_acc": 120 | scheduler.step(valid_acc_list[-1]) 121 | elif scheduler_on == "minibatch_loss": 122 | scheduler.step(minibatch_loss_list[-1]) 123 | else: 124 | raise ValueError("Invalid `scheduler_on` choice.") 125 | 126 | elapsed = (time.time() - start_time) / 60 127 | print(f"Total Training Time: {elapsed:.2f} min") 128 | 129 | test_acc = compute_accuracy(model, test_loader, device=device) 130 | print(f"Test accuracy {test_acc :.2f}%") 131 | 132 | elapsed = (time.time() - start_time) / 60 133 | print(f"Total Time: {elapsed:.2f} min") 134 | 135 | return minibatch_loss_list, train_acc_list, valid_acc_list 136 | 137 | 138 | def get_dataloaders_mnist( 139 | batch_size, 140 | num_workers=0, 141 | validation_fraction=None, 142 | train_transforms=None, 143 | test_transforms=None, 144 | ): 145 | 146 | if train_transforms is None: 147 | train_transforms = transforms.ToTensor() 148 | 149 | if test_transforms is None: 150 | test_transforms = transforms.ToTensor() 151 | 152 | train_dataset = datasets.MNIST( 153 | root="data", train=True, transform=train_transforms, download=True 154 | ) 155 | 156 | valid_dataset = datasets.MNIST(root="data", train=True, transform=test_transforms) 157 | 158 | test_dataset = datasets.MNIST(root="data", train=False, transform=test_transforms) 159 | 160 | if validation_fraction is not None: 161 | num = int(validation_fraction * 60000) 162 | train_indices = torch.arange(0, 60000 - num) 163 | valid_indices = torch.arange(60000 - num, 60000) 164 | 165 | train_sampler = SubsetRandomSampler(train_indices) 166 | valid_sampler = SubsetRandomSampler(valid_indices) 167 | 168 | valid_loader = DataLoader( 169 | dataset=valid_dataset, 170 | batch_size=batch_size, 171 | num_workers=num_workers, 172 | sampler=valid_sampler, 173 | ) 174 | 175 | train_loader = DataLoader( 176 | dataset=train_dataset, 177 | batch_size=batch_size, 178 | num_workers=num_workers, 179 | drop_last=True, 180 | sampler=train_sampler, 181 | ) 182 | 183 | else: 184 | train_loader = DataLoader( 185 | dataset=train_dataset, 186 | batch_size=batch_size, 187 | num_workers=num_workers, 188 | drop_last=True, 189 | shuffle=True, 190 | ) 191 | 192 | test_loader = DataLoader( 193 | dataset=test_dataset, 194 | batch_size=batch_size, 195 | num_workers=num_workers, 196 | shuffle=False, 197 | ) 198 | 199 | if validation_fraction is None: 200 | return train_loader, test_loader 201 | else: 202 | return train_loader, valid_loader, test_loader 203 | 204 | 205 | class PyTorchModel(torch.nn.Module): 206 | def __init__(self, input_size, hidden_units, num_classes): 207 | super().__init__() 208 | 209 | # Initialize MLP layers 210 | all_layers = [] 211 | for hidden_unit in hidden_units: 212 | layer = torch.nn.Linear(input_size, hidden_unit, bias=False) 213 | all_layers.append(layer) 214 | all_layers.append(torch.nn.ReLU()) 215 | input_size = hidden_unit 216 | 217 | output_layer = torch.nn.Linear( 218 | in_features=hidden_units[-1], 219 | out_features=num_classes) 220 | 221 | all_layers.append(output_layer) 222 | self.layers = torch.nn.Sequential(*all_layers) 223 | 224 | def forward(self, x): 225 | x = torch.flatten(x, start_dim=1) # to make it work for image inputs 226 | x = self.layers(x) 227 | return x # x are the model's logits 228 | 229 | 230 | if __name__ == "__main__": 231 | 232 | parser = argparse.ArgumentParser() 233 | parser.add_argument( 234 | "--device", type=str, required=True, help="Which GPU device to use." 235 | ) 236 | 237 | args = parser.parse_args() 238 | 239 | RANDOM_SEED = 123 240 | BATCH_SIZE = 128 241 | NUM_EPOCHS = 1 242 | DEVICE = torch.device(args.device) 243 | 244 | print("torch", torch.__version__) 245 | print("device", DEVICE) 246 | 247 | train_transforms = torchvision.transforms.Compose( 248 | [ 249 | torchvision.transforms.Resize((28, 28)), 250 | torchvision.transforms.ToTensor(), 251 | torchvision.transforms.Normalize((0.5), (0.5)), 252 | ] 253 | ) 254 | 255 | test_transforms = torchvision.transforms.Compose( 256 | [ 257 | torchvision.transforms.Resize((28, 28)), 258 | torchvision.transforms.ToTensor(), 259 | torchvision.transforms.Normalize((0.5), (0.5)), 260 | ] 261 | ) 262 | 263 | train_loader, valid_loader, test_loader = get_dataloaders_mnist( 264 | batch_size=BATCH_SIZE, 265 | validation_fraction=0.1, 266 | train_transforms=train_transforms, 267 | test_transforms=test_transforms, 268 | num_workers=2, 269 | ) 270 | 271 | torch.manual_seed(RANDOM_SEED) 272 | 273 | model = PyTorchModel(input_size=784, hidden_units=(256, 128, 64), num_classes=10) 274 | 275 | model = model.to(DEVICE) 276 | 277 | optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 278 | 279 | minibatch_loss_list, train_acc_list, valid_acc_list = train_classifier_simple_v2( 280 | model=model, 281 | num_epochs=NUM_EPOCHS, 282 | train_loader=train_loader, 283 | valid_loader=valid_loader, 284 | test_loader=test_loader, 285 | optimizer=optimizer, 286 | best_model_save_path=None, 287 | device=DEVICE, 288 | scheduler_on="valid_acc", 289 | logging_interval=100, 290 | ) 291 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-results/m1-cpu-mlp.txt: -------------------------------------------------------------------------------- 1 | torch 1.12.0.dev20220518 2 | device cpu 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4212 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2405 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3460 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.2137 8 | Time / epoch without evaluation: 0.06 min 9 | Epoch: 001/001 | Train: 95.62% | Validation: 96.57% | Best Validation (Ep. 001): 96.57% 10 | Time elapsed: 0.10 min 11 | Total Training Time: 0.10 min 12 | Test accuracy 95.76% 13 | Total Time: 0.11 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-results/m1-gpu-mlp.txt: -------------------------------------------------------------------------------- 1 | torch 1.12.0.dev20220518 2 | device mps 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4186 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2519 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3339 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.1897 8 | Time / epoch without evaluation: 0.34 min 9 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00% 10 | Time elapsed: 0.46 min 11 | Total Training Time: 0.46 min 12 | Test accuracy 0.00% 13 | Total Time: 0.48 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-results/mlp-1080ti.txt: -------------------------------------------------------------------------------- 1 | torch 1.10.0 2 | device cuda:1 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4133 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2513 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3265 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.1889 8 | Time / epoch without evaluation: 0.40 min 9 | Epoch: 001/001 | Train: 95.44% | Validation: 96.42% | Best Validation (Ep. 001): 96.42% 10 | Time elapsed: 0.86 min 11 | Total Training Time: 0.86 min 12 | Test accuracy 95.43% 13 | Total Time: 0.88 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-results/mlp-2080ti.txt: -------------------------------------------------------------------------------- 1 | torch 1.8.1+cu102 2 | device cuda:1 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4140 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2467 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3569 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.1951 8 | Time / epoch without evaluation: 0.34 min 9 | Epoch: 001/001 | Train: 95.67% | Validation: 96.63% | Best Validation (Ep. 001): 96.63% 10 | Time elapsed: 0.72 min 11 | Total Training Time: 0.72 min 12 | Test accuracy 95.82% 13 | Total Time: 0.74 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-results/mlp-3090.txt: -------------------------------------------------------------------------------- 1 | torch 1.11.0 2 | device cuda 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3063 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.3426 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2980 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3660 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.3541 8 | Time / epoch without evaluation: 0.17 min 9 | Epoch: 001/001 | Train: 92.08% | Validation: 93.63% | Best Validation (Ep. 001): 93.63% 10 | Time elapsed: 0.36 min 11 | Total Training Time: 0.36 min 12 | Test accuracy 92.35% 13 | Total Time: 0.37 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-results/mlp-intel-cpu.txt: -------------------------------------------------------------------------------- 1 | torch 1.10.0 2 | device cpu 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4140 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2491 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3578 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.1877 8 | Time / epoch without evaluation: 0.39 min 9 | Epoch: 001/001 | Train: 95.67% | Validation: 96.47% | Best Validation (Ep. 001): 96.47% 10 | Time elapsed: 0.83 min 11 | Total Training Time: 0.83 min 12 | Test accuracy 95.68% 13 | Total Time: 0.85 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-results/mlp-m1max-gpu_torch1.13.0.dev20220522.txt: -------------------------------------------------------------------------------- 1 | torch 1.13.0.dev20220522 2 | device mps 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3098 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.2646 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.1437 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.1010 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.0732 8 | Time / epoch without evaluation: 0.09 min 9 | Epoch: 001/001 | Train: 97.33% | Validation: 97.77% | Best Validation (Ep. 001): 97.77% 10 | Time elapsed: 0.14 min 11 | Total Training Time: 0.14 min 12 | Test accuracy 97.39% 13 | Total Time: 0.16 min -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-results/mlp-m1pro-cpu_torch1.12.0.dev20220518.txt: -------------------------------------------------------------------------------- 1 | torch 1.12.0.dev20220518 2 | device cpu 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4175 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2491 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3253 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.2008 8 | Time / epoch without evaluation: 0.06 min 9 | Epoch: 001/001 | Train: 95.71% | Validation: 96.43% | Best Validation (Ep. 001): 96.43% 10 | Time elapsed: 0.11 min 11 | Total Training Time: 0.11 min 12 | Test accuracy 95.57% 13 | Total Time: 0.13 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-results/mlp-m1pro-cpu_torch1.13.0.dev20220522.txt: -------------------------------------------------------------------------------- 1 | torch 1.13.0.dev20220522 2 | device cpu 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3063 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.3429 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.3083 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3685 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.3482 8 | Time / epoch without evaluation: 0.04 min 9 | Epoch: 001/001 | Train: 91.43% | Validation: 93.38% | Best Validation (Ep. 001): 93.38% 10 | Time elapsed: 0.08 min 11 | Total Training Time: 0.08 min 12 | Test accuracy 91.99% 13 | Total Time: 0.10 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-results/mlp-m1pro-gpu_torch1.12.0.dev20220518.txt: -------------------------------------------------------------------------------- 1 | torch 1.12.0.dev20220518 2 | device mps 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.2962 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.4186 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.2519 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3339 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.1897 8 | Time / epoch without evaluation: 0.22 min 9 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00% 10 | Time elapsed: 0.34 min 11 | Total Training Time: 0.34 min 12 | Test accuracy 0.00% 13 | Total Time: 0.36 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/mlp-results/mlp-m1pro-gpu_torch1.13.0.dev20220522.txt: -------------------------------------------------------------------------------- 1 | torch 1.13.0.dev20220522 2 | device mps 3 | Epoch: 001/001 | Batch 0000/0421 | Loss: 2.3063 4 | Epoch: 001/001 | Batch 0100/0421 | Loss: 0.3431 5 | Epoch: 001/001 | Batch 0200/0421 | Loss: 0.3089 6 | Epoch: 001/001 | Batch 0300/0421 | Loss: 0.3688 7 | Epoch: 001/001 | Batch 0400/0421 | Loss: 0.3544 8 | Time / epoch without evaluation: 0.07 min 9 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00% 10 | Time elapsed: 0.11 min 11 | Total Training Time: 0.11 min 12 | Test accuracy 0.00% 13 | Total Time: 0.13 min 14 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/1080ti.txt: -------------------------------------------------------------------------------- 1 | torch 1.10.0 2 | device cuda:1 3 | Files already downloaded and verified 4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.4711 5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.2623 6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.2856 7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 1.9907 8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.1907 9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 1.9049 10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.8330 11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.9606 12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.0498 13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.7697 14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8773 15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.7820 16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.6580 17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.2616 18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.8776 19 | Time / epoch without evaluation: 7.65 min 20 | Epoch: 001/001 | Train: 32.51% | Validation: 33.90% | Best Validation (Ep. 001): 33.90% 21 | Time elapsed: 10.21 min 22 | Total Training Time: 10.21 min 23 | Test accuracy 33.03% 24 | Total Time: 10.72 min 25 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/2080ti.txt: -------------------------------------------------------------------------------- 1 | torch 1.11.0+cu102 2 | device cuda:1 3 | Files already downloaded and verified 4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.4484 5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.1690 6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.1240 7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.1157 8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.3050 9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.0078 10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.8894 11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.7767 12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.0213 13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.8129 14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8300 15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.1053 16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.9753 17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.6367 18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.7172 19 | Time / epoch without evaluation: 5.75 min 20 | Epoch: 001/001 | Train: 36.69% | Validation: 36.24% | Best Validation (Ep. 001): 36.24% 21 | Time elapsed: 7.64 min 22 | Total Training Time: 7.64 min 23 | Test accuracy 37.88% 24 | Total Time: 8.01 min 25 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/3090.txt: -------------------------------------------------------------------------------- 1 | torch 1.11.0 2 | device cuda 3 | Files already downloaded and verified 4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.3290 5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.2226 6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.0688 7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.1872 8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.0353 9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 1.9326 10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.0525 11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.8289 12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 1.7227 13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.8406 14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8262 15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.8862 16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.9222 17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.8546 18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.5524 19 | Time / epoch without evaluation: 3.36 min 20 | Epoch: 001/001 | Train: 37.17% | Validation: 37.74% | Best Validation (Ep. 001): 37.74% 21 | Time elapsed: 4.61 min 22 | Total Training Time: 4.61 min 23 | Test accuracy 37.47% 24 | Total Time: 4.85 min 25 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/intel-cpu.txt: -------------------------------------------------------------------------------- 1 | torch 1.10.0 2 | device cpu 3 | Files already downloaded and verified 4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.4879 5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.4600 6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.3141 7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.2136 8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.2040 9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.4007 10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.9307 11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.0611 12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.0015 13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.7914 14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.1395 15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.0222 16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.2381 17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.9949 18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.6678 19 | Time / epoch without evaluation: 150.83 min 20 | Epoch: 001/001 | Train: 33.93% | Validation: 34.20% | Best Validation (Ep. 001): 34.20% 21 | Time elapsed: 198.80 min 22 | Total Training Time: 198.80 min 23 | Test accuracy 34.25% 24 | Total Time: 208.66 min 25 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1-plain-gpu_torch 1.12.0.dev20220518.txt: -------------------------------------------------------------------------------- 1 | torch 1.12.0.dev20220518 2 | device mps 3 | Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz 4 | Extracting data/cifar-10-python.tar.gz to data 5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.3051 6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.3035 7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.2989 8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.3011 9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.3024 10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.3046 11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.3044 12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.3038 13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.3017 14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.3042 15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.3032 16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.3023 17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.3020 18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.3029 19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 2.3038 20 | Time / epoch without evaluation: 113.27 min 21 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00% 22 | Time elapsed: 140.80 min 23 | Total Training Time: 140.80 min 24 | Test accuracy 0.00% 25 | Total Time: 146.50 min 26 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1-plain-gpu_torch-1.13.0.dev20220522.txt: -------------------------------------------------------------------------------- 1 | torch 1.13.0.dev20220522 2 | device mps 3 | Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz 4 | Extracting data/cifar-10-python.tar.gz to data 5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.3012 6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.3048 7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.3054 8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.3006 9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.3014 10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.3046 11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.3033 12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.3026 13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.3020 14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.3052 15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.3041 16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.3035 17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.3024 18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.3021 19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 2.3046 20 | Time / epoch without evaluation: 84.00 min 21 | Epoch: 001/001 | Train: 10.03% | Validation: 9.76% | Best Validation (Ep. 001): 9.76% 22 | Time elapsed: 104.41 min 23 | Total Training Time: 104.41 min 24 | Test accuracy 10.00% 25 | Total Time: 108.71 min 26 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1max-gpu_torch-1.12.0.dev20220518.txt: -------------------------------------------------------------------------------- 1 | torch 1.12.0.dev20220518 2 | device mps 3 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.3857 4 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.4062 5 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.1027 6 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.0253 7 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.1160 8 | Epoch: 001/001 | Batch 0500/1406 | Loss: 1.9523 9 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.9365 10 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.3179 11 | Epoch: 001/001 | Batch 0800/1406 | Loss: 1.9971 12 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.7516 13 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8922 14 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.8546 15 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.7630 16 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.8767 17 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.5391 18 | Time / epoch without evaluation: 42.28 min 19 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00% 20 | Time elapsed: 48.54 min 21 | Total Training Time: 48.54 min 22 | Test accuracy 0.00% 23 | Total Time: 49.99 min -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1max-gpu_torch-1.13.0.dev20220522.txt: -------------------------------------------------------------------------------- 1 | torch 1.13.0.dev20220522 2 | device mps 3 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.6720 4 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.3715 5 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.3356 6 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.0791 7 | Epoch: 001/001 | Batch 0400/1406 | Loss: 1.9815 8 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.0724 9 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.9088 10 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.1451 11 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.2497 12 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.1637 13 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.2672 14 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.8210 15 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.7867 16 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.8080 17 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.6069 18 | Time / epoch without evaluation: 31.54 min 19 | Epoch: 001/001 | Train: 32.69% | Validation: 32.92% | Best Validation (Ep. 001): 32.92% 20 | Time elapsed: 38.46 min 21 | Total Training Time: 38.46 min 22 | Test accuracy 32.59% 23 | Total Time: 40.00 min -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1pro-cpu_torch 1.12.0.dev20220518.txt: -------------------------------------------------------------------------------- 1 | torch 1.12.0.dev20220518 2 | device cpu 3 | Files already downloaded and verified 4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.5936 5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.6695 6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.0085 7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 1.8780 8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 1.9127 9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.0184 10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.8296 11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.8775 12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 1.9802 13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.8182 14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8658 15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.9554 16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.1732 17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.7894 18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.6485 19 | Time / epoch without evaluation: 107.72 min 20 | Epoch: 001/001 | Train: 37.90% | Validation: 38.52% | Best Validation (Ep. 001): 38.52% 21 | Time elapsed: 149.85 min 22 | Total Training Time: 149.85 min 23 | Test accuracy 39.17% 24 | Total Time: 158.48 min 25 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1pro-cpu_torch1.13.0.dev20220522.txt: -------------------------------------------------------------------------------- 1 | torch 1.13.0.dev20220522 2 | device cpu 3 | Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz 4 | Extracting data/cifar-10-python.tar.gz to data 5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.6914 6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.6882 7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.0695 8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.0680 9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.0316 10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.0238 11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.2194 12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.8091 13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.0532 14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.1239 15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8187 16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.8362 17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.0197 18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.0013 19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.6868 20 | Time / epoch without evaluation: 110.48 min 21 | Epoch: 001/001 | Train: 30.79% | Validation: 31.50% | Best Validation (Ep. 001): 31.50% 22 | Time elapsed: 152.91 min 23 | Total Training Time: 152.91 min 24 | Test accuracy 31.56% 25 | Total Time: 161.42 min 26 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1pro-gpu_torch 1.12.0.dev20220518.txt: -------------------------------------------------------------------------------- 1 | torch 1.12.0.dev20220518 2 | device mps 3 | Files already downloaded and verified 4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.2904 5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.3904 6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.3327 7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.3666 8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.3475 9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 1.7922 10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.8935 11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.8857 12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.5368 13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.8395 14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8795 15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.9586 16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.0501 17 | Epoch: 001/001 | Batch 1300/1406 | Loss: inf 18 | Epoch: 001/001 | Batch 1400/1406 | Loss: inf 19 | Time / epoch without evaluation: 59.74 min 20 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00% 21 | Time elapsed: 69.53 min 22 | Total Training Time: 69.53 min 23 | Test accuracy 0.00% 24 | Total Time: 71.63 min 25 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1pro-gpu_torch1.13.0.dev20220522.txt: -------------------------------------------------------------------------------- 1 | torch 1.13.0.dev20220522 2 | device mps 3 | Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz 4 | Extracting data/cifar-10-python.tar.gz to data 5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.3672 6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.8752 7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.1686 8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.1362 9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 1.9429 10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 1.9907 11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.0271 12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.9571 13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.0105 14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.8068 15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 1.8195 16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.6118 17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.6810 18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.7971 19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.6352 20 | Time / epoch without evaluation: 42.12 min 21 | Epoch: 001/001 | Train: 0.00% | Validation: 0.00% | Best Validation (Ep. 001): 0.00% 22 | Time elapsed: 49.75 min 23 | Total Training Time: 49.75 min 24 | Test accuracy 0.00% 25 | Total Time: 51.44 min 26 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1ultra-cpu_torch-1.13.0.dev20220522.txt: -------------------------------------------------------------------------------- 1 | torch 1.13.0.dev20220522 2 | device cpu 3 | Files already downloaded and verified 4 | Using cache found in /Users/alex/.cache/torch/hub/pytorch_vision_v0.11.0 5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.2517 6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.5809 7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.3804 8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.2363 9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.3646 10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.2934 11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.2161 12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.2401 13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.3203 14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.2977 15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.4127 16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.2535 17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.4046 18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.3649 19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 2.1960 20 | Time / epoch without evaluation: 109.27 min 21 | Epoch: 001/001 | Train: 12.44% | Validation: 12.04% | Best Validation (Ep. 001): 12.04% 22 | Time elapsed: 150.19 min 23 | Total Training Time: 150.19 min 24 | 25 | Test accuracy 12.48% 26 | Total Time: 158.49 min -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/m1ultra-gpu_torch-1.13.0.dev20220522.txt: -------------------------------------------------------------------------------- 1 | torch 1.13.0.dev20220522 2 | device mps 3 | Files already downloaded and verified 4 | Using cache found in /Users/alex/.cache/torch/hub/pytorch_vision_v0.11.0 5 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.4455 6 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.4203 7 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.2958 8 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.1548 9 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.0212 10 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.1466 11 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.0805 12 | Epoch: 001/001 | Batch 0700/1406 | Loss: 1.9645 13 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.1037 14 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.1161 15 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.2875 16 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.0838 17 | Epoch: 001/001 | Batch 1200/1406 | Loss: 2.2671 18 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.9454 19 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.9380 20 | Time / epoch without evaluation: 13.41 min 21 | Epoch: 001/001 | Train: 21.51% | Validation: 22.00% | Best Validation (Ep. 001): 22.00% 22 | Time elapsed: 15.80 min 23 | Total Training Time: 15.80 min 24 | Test accuracy 21.05% 25 | Total Time: 16.45 min -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/titanv-cpu.txt: -------------------------------------------------------------------------------- 1 | torch 1.13.0.dev20220522+cu102 2 | device cpu 3 | Files already downloaded and verified 4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.4952 5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.5676 6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.0315 7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.0713 8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.1783 9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.0708 10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 1.8028 11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.0366 12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 2.2274 13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 1.9104 14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.0756 15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 2.0831 16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.7274 17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 2.0498 18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 1.8579 19 | Time / epoch without evaluation: 230.39 min 20 | Epoch: 001/001 | Train: 32.73% | Validation: 34.06% | Best Validation (Ep. 001): 34.06% 21 | Time elapsed: 292.35 min 22 | Total Training Time: 292.35 min 23 | Test accuracy 33.60% 24 | Total Time: 305.01 min 25 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10-results/titanv.txt: -------------------------------------------------------------------------------- 1 | torch 1.13.0.dev20220522+cu102 2 | device cuda 3 | Files already downloaded and verified 4 | Epoch: 001/001 | Batch 0000/1406 | Loss: 2.7397 5 | Epoch: 001/001 | Batch 0100/1406 | Loss: 2.4220 6 | Epoch: 001/001 | Batch 0200/1406 | Loss: 2.2438 7 | Epoch: 001/001 | Batch 0300/1406 | Loss: 2.1947 8 | Epoch: 001/001 | Batch 0400/1406 | Loss: 2.4192 9 | Epoch: 001/001 | Batch 0500/1406 | Loss: 2.1813 10 | Epoch: 001/001 | Batch 0600/1406 | Loss: 2.1236 11 | Epoch: 001/001 | Batch 0700/1406 | Loss: 2.0670 12 | Epoch: 001/001 | Batch 0800/1406 | Loss: 1.9411 13 | Epoch: 001/001 | Batch 0900/1406 | Loss: 2.1265 14 | Epoch: 001/001 | Batch 1000/1406 | Loss: 2.1767 15 | Epoch: 001/001 | Batch 1100/1406 | Loss: 1.9055 16 | Epoch: 001/001 | Batch 1200/1406 | Loss: 1.7954 17 | Epoch: 001/001 | Batch 1300/1406 | Loss: 1.9439 18 | Epoch: 001/001 | Batch 1400/1406 | Loss: 2.3842 19 | Time / epoch without evaluation: 5.03 min 20 | Epoch: 001/001 | Train: 21.43% | Validation: 21.82% | Best Validation (Ep. 001): 21.82% 21 | Time elapsed: 6.73 min 22 | Total Training Time: 6.73 min 23 | Test accuracy 21.72% 24 | Total Time: 7.07 min 25 | -------------------------------------------------------------------------------- /benchmark/pytorch-m1-gpu/vgg16-cifar10.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import argparse 5 | import os 6 | import random 7 | import time 8 | 9 | import numpy as np 10 | import torch 11 | import torchvision 12 | from torch.utils.data import DataLoader 13 | from torch.utils.data import SubsetRandomSampler 14 | from torchvision import datasets, transforms 15 | 16 | 17 | def set_all_seeds(seed): 18 | os.environ["PL_GLOBAL_SEED"] = str(seed) 19 | random.seed(seed) 20 | np.random.seed(seed) 21 | torch.manual_seed(seed) 22 | torch.cuda.manual_seed_all(seed) 23 | 24 | 25 | def compute_accuracy(model, data_loader, device): 26 | model.eval() 27 | with torch.no_grad(): 28 | correct_pred, num_examples = 0, 0 29 | for i, (features, targets) in enumerate(data_loader): 30 | 31 | features = features.to(device) 32 | targets = targets.to(device) 33 | 34 | logits = model(features) 35 | _, predicted_labels = torch.max(logits, 1) 36 | num_examples += targets.size(0) 37 | correct_pred += (predicted_labels.cpu() == targets.cpu()).sum() 38 | return correct_pred.float() / num_examples * 100 39 | 40 | 41 | def train_classifier_simple_v2( 42 | model, 43 | num_epochs, 44 | train_loader, 45 | valid_loader, 46 | test_loader, 47 | optimizer, 48 | device, 49 | logging_interval=50, 50 | best_model_save_path=None, 51 | scheduler=None, 52 | skip_train_acc=False, 53 | scheduler_on="valid_acc", 54 | ): 55 | 56 | start_time = time.time() 57 | minibatch_loss_list, train_acc_list, valid_acc_list = [], [], [] 58 | best_valid_acc, best_epoch = -float("inf"), 0 59 | 60 | for epoch in range(num_epochs): 61 | 62 | epoch_start_time = time.time() 63 | model.train() 64 | for batch_idx, (features, targets) in enumerate(train_loader): 65 | 66 | features = features.to(device) 67 | targets = targets.to(device) 68 | 69 | # ## FORWARD AND BACK PROP 70 | logits = model(features) 71 | loss = torch.nn.functional.cross_entropy(logits, targets) 72 | optimizer.zero_grad() 73 | 74 | loss.backward() 75 | 76 | # ## UPDATE MODEL PARAMETERS 77 | optimizer.step() 78 | 79 | # ## LOGGING 80 | minibatch_loss_list.append(loss.item()) 81 | if not batch_idx % logging_interval: 82 | print( 83 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} " 84 | f"| Batch {batch_idx:04d}/{len(train_loader):04d} " 85 | f"| Loss: {loss:.4f}" 86 | ) 87 | 88 | model.eval() 89 | 90 | elapsed = (time.time() - epoch_start_time) / 60 91 | print(f"Time / epoch without evaluation: {elapsed:.2f} min") 92 | with torch.no_grad(): # save memory during inference 93 | if not skip_train_acc: 94 | train_acc = compute_accuracy(model, train_loader, device=device).item() 95 | else: 96 | train_acc = float("nan") 97 | valid_acc = compute_accuracy(model, valid_loader, device=device).item() 98 | train_acc_list.append(train_acc) 99 | valid_acc_list.append(valid_acc) 100 | 101 | if valid_acc > best_valid_acc: 102 | best_valid_acc, best_epoch = valid_acc, epoch + 1 103 | if best_model_save_path: 104 | torch.save(model.state_dict(), best_model_save_path) 105 | 106 | print( 107 | f"Epoch: {epoch+1:03d}/{num_epochs:03d} " 108 | f"| Train: {train_acc :.2f}% " 109 | f"| Validation: {valid_acc :.2f}% " 110 | f"| Best Validation " 111 | f"(Ep. {best_epoch:03d}): {best_valid_acc :.2f}%" 112 | ) 113 | 114 | elapsed = (time.time() - start_time) / 60 115 | print(f"Time elapsed: {elapsed:.2f} min") 116 | 117 | if scheduler is not None: 118 | 119 | if scheduler_on == "valid_acc": 120 | scheduler.step(valid_acc_list[-1]) 121 | elif scheduler_on == "minibatch_loss": 122 | scheduler.step(minibatch_loss_list[-1]) 123 | else: 124 | raise ValueError("Invalid `scheduler_on` choice.") 125 | 126 | elapsed = (time.time() - start_time) / 60 127 | print(f"Total Training Time: {elapsed:.2f} min") 128 | 129 | test_acc = compute_accuracy(model, test_loader, device=device) 130 | print(f"Test accuracy {test_acc :.2f}%") 131 | 132 | elapsed = (time.time() - start_time) / 60 133 | print(f"Total Time: {elapsed:.2f} min") 134 | 135 | return minibatch_loss_list, train_acc_list, valid_acc_list 136 | 137 | 138 | def get_dataloaders_cifar10( 139 | batch_size, 140 | num_workers=0, 141 | validation_fraction=None, 142 | train_transforms=None, 143 | test_transforms=None, 144 | ): 145 | 146 | if train_transforms is None: 147 | train_transforms = transforms.ToTensor() 148 | 149 | if test_transforms is None: 150 | test_transforms = transforms.ToTensor() 151 | 152 | train_dataset = datasets.CIFAR10( 153 | root="data", train=True, transform=train_transforms, download=True 154 | ) 155 | 156 | valid_dataset = datasets.CIFAR10(root="data", train=True, transform=test_transforms) 157 | 158 | test_dataset = datasets.CIFAR10(root="data", train=False, transform=test_transforms) 159 | 160 | if validation_fraction is not None: 161 | num = int(validation_fraction * 50000) 162 | train_indices = torch.arange(0, 50000 - num) 163 | valid_indices = torch.arange(50000 - num, 50000) 164 | 165 | train_sampler = SubsetRandomSampler(train_indices) 166 | valid_sampler = SubsetRandomSampler(valid_indices) 167 | 168 | valid_loader = DataLoader( 169 | dataset=valid_dataset, 170 | batch_size=batch_size, 171 | num_workers=num_workers, 172 | sampler=valid_sampler, 173 | ) 174 | 175 | train_loader = DataLoader( 176 | dataset=train_dataset, 177 | batch_size=batch_size, 178 | num_workers=num_workers, 179 | drop_last=True, 180 | sampler=train_sampler, 181 | ) 182 | 183 | else: 184 | train_loader = DataLoader( 185 | dataset=train_dataset, 186 | batch_size=batch_size, 187 | num_workers=num_workers, 188 | drop_last=True, 189 | shuffle=True, 190 | ) 191 | 192 | test_loader = DataLoader( 193 | dataset=test_dataset, 194 | batch_size=batch_size, 195 | num_workers=num_workers, 196 | shuffle=False, 197 | ) 198 | 199 | if validation_fraction is None: 200 | return train_loader, test_loader 201 | else: 202 | return train_loader, valid_loader, test_loader 203 | 204 | 205 | if __name__ == "__main__": 206 | 207 | parser = argparse.ArgumentParser() 208 | parser.add_argument( 209 | "--device", type=str, required=True, help="Which GPU device to use." 210 | ) 211 | 212 | args = parser.parse_args() 213 | 214 | RANDOM_SEED = 123 215 | BATCH_SIZE = 32 216 | NUM_EPOCHS = 1 217 | DEVICE = torch.device(args.device) 218 | 219 | print('torch', torch.__version__) 220 | print('device', DEVICE) 221 | 222 | train_transforms = torchvision.transforms.Compose( 223 | [ 224 | torchvision.transforms.Resize((256, 256)), 225 | torchvision.transforms.RandomCrop((224, 224)), 226 | torchvision.transforms.ToTensor(), 227 | torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 228 | ] 229 | ) 230 | 231 | test_transforms = torchvision.transforms.Compose( 232 | [ 233 | torchvision.transforms.Resize((256, 256)), 234 | torchvision.transforms.CenterCrop((224, 224)), 235 | torchvision.transforms.ToTensor(), 236 | torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 237 | ] 238 | ) 239 | 240 | train_loader, valid_loader, test_loader = get_dataloaders_cifar10( 241 | batch_size=BATCH_SIZE, 242 | validation_fraction=0.1, 243 | train_transforms=train_transforms, 244 | test_transforms=test_transforms, 245 | num_workers=2, 246 | ) 247 | 248 | model = torch.hub.load( 249 | "pytorch/vision:v0.11.0", "vgg16_bn", pretrained=False 250 | ) 251 | 252 | model.classifier[-1] = torch.nn.Linear( 253 | in_features=4096, out_features=10 # as in original 254 | ) # number of class labels in Cifar-10) 255 | 256 | model = model.to(DEVICE) 257 | 258 | optimizer = torch.optim.Adam(model.parameters(), lr=0.0005) 259 | 260 | minibatch_loss_list, train_acc_list, valid_acc_list = train_classifier_simple_v2( 261 | model=model, 262 | num_epochs=NUM_EPOCHS, 263 | train_loader=train_loader, 264 | valid_loader=valid_loader, 265 | test_loader=test_loader, 266 | optimizer=optimizer, 267 | best_model_save_path=None, 268 | device=DEVICE, 269 | scheduler_on="valid_acc", 270 | logging_interval=100, 271 | ) 272 | -------------------------------------------------------------------------------- /categorical-features/data/iris_mod.csv: -------------------------------------------------------------------------------- 1 | Id,SepalLength[cm],SepalWidth[cm],PetalLength[cm],PetalWidth[cm],Color_IMadeThisUp,Species 2 | 1,5.1,3.5,1.4,0.2,red,Iris-setosa 3 | 2,4.9,3,1.4,0.2,red,Iris-setosa 4 | 3,4.7,3.2,1.3,0.2,red,Iris-setosa 5 | 4,4.6,3.1,1.5,0.2,red,Iris-setosa 6 | 5,5,3.6,1.4,0.2,red,Iris-setosa 7 | 6,5.4,3.9,1.7,0.4,red,Iris-setosa 8 | 7,4.6,3.4,1.4,0.3,red,Iris-setosa 9 | 8,5,3.4,1.5,0.2,blue,Iris-setosa 10 | 9,4.4,2.9,1.4,0.2,red,Iris-setosa 11 | 10,4.9,3.1,1.5,0.1,red,Iris-setosa 12 | 11,5.4,3.7,1.5,0.2,blue,Iris-setosa 13 | 12,4.8,3.4,1.6,0.2,red,Iris-setosa 14 | 13,4.8,3,1.4,0.1,red,Iris-setosa 15 | 14,4.3,3,1.1,0.1,red,Iris-setosa 16 | 15,5.8,4,1.2,0.2,red,Iris-setosa 17 | 16,5.7,4.4,1.5,0.4,red,Iris-setosa 18 | 17,5.4,3.9,1.3,0.4,red,Iris-setosa 19 | 18,5.1,3.5,1.4,0.3,red,Iris-setosa 20 | 19,5.7,3.8,1.7,0.3,red,Iris-setosa 21 | 20,5.1,3.8,1.5,0.3,blue,Iris-setosa 22 | 21,5.4,3.4,1.7,0.2,red,Iris-setosa 23 | 22,5.1,3.7,1.5,0.4,red,Iris-setosa 24 | 23,4.6,3.6,1,0.2,red,Iris-setosa 25 | 24,5.1,3.3,1.7,0.5,blue,Iris-setosa 26 | 25,4.8,3.4,1.9,0.2,red,Iris-setosa 27 | 26,5,3,1.6,0.2,red,Iris-setosa 28 | 27,5,3.4,1.6,0.4,red,Iris-setosa 29 | 28,5.2,3.5,1.5,0.2,red,Iris-setosa 30 | 29,5.2,3.4,1.4,0.2,red,Iris-setosa 31 | 30,4.7,3.2,1.6,0.2,violet,Iris-setosa 32 | 31,4.8,3.1,1.6,0.2,red,Iris-setosa 33 | 32,5.4,3.4,1.5,0.4,red,Iris-setosa 34 | 33,5.2,4.1,1.5,0.1,red,Iris-setosa 35 | 34,5.5,4.2,1.4,0.2,red,Iris-setosa 36 | 35,4.9,3.1,1.5,0.1,red,Iris-setosa 37 | 36,5,3.2,1.2,0.2,violet,Iris-setosa 38 | 37,5.5,3.5,1.3,0.2,red,Iris-setosa 39 | 38,4.9,3.1,1.5,0.1,red,Iris-setosa 40 | 39,4.4,3,1.3,0.2,red,Iris-setosa 41 | 40,5.1,3.4,1.5,0.2,red,Iris-setosa 42 | 41,5,3.5,1.3,0.3,red,Iris-setosa 43 | 42,4.5,2.3,1.3,0.3,red,Iris-setosa 44 | 43,4.4,3.2,1.3,0.2,red,Iris-setosa 45 | 44,5,3.5,1.6,0.6,red,Iris-setosa 46 | 45,5.1,3.8,1.9,0.4,red,Iris-setosa 47 | 46,4.8,3,1.4,0.3,red,Iris-setosa 48 | 47,5.1,3.8,1.6,0.2,red,Iris-setosa 49 | 48,4.6,3.2,1.4,0.2,red,Iris-setosa 50 | 49,5.3,3.7,1.5,0.2,red,Iris-setosa 51 | 50,5,3.3,1.4,0.2,red,Iris-setosa 52 | 51,7,3.2,4.7,1.4,blue,Iris-versicolor 53 | 52,6.4,3.2,4.5,1.5,blue,Iris-versicolor 54 | 53,6.9,3.1,4.9,1.5,blue,Iris-versicolor 55 | 54,5.5,2.3,4,1.3,blue,Iris-versicolor 56 | 55,6.5,2.8,4.6,1.5,blue,Iris-versicolor 57 | 56,5.7,2.8,4.5,1.3,blue,Iris-versicolor 58 | 57,6.3,3.3,4.7,1.6,blue,Iris-versicolor 59 | 58,4.9,2.4,3.3,1,blue,Iris-versicolor 60 | 59,6.6,2.9,4.6,1.3,blue,Iris-versicolor 61 | 60,5.2,2.7,3.9,1.4,blue,Iris-versicolor 62 | 61,5,2,3.5,1,blue,Iris-versicolor 63 | 62,5.9,3,4.2,1.5,blue,Iris-versicolor 64 | 63,6,2.2,4,1,blue,Iris-versicolor 65 | 64,6.1,2.9,4.7,1.4,blue,Iris-versicolor 66 | 65,5.6,2.9,3.6,1.3,blue,Iris-versicolor 67 | 66,6.7,3.1,4.4,1.4,red,Iris-versicolor 68 | 67,5.6,3,4.5,1.5,blue,Iris-versicolor 69 | 68,5.8,2.7,4.1,1,blue,Iris-versicolor 70 | 69,6.2,2.2,4.5,1.5,blue,Iris-versicolor 71 | 70,5.6,2.5,3.9,1.1,violet,Iris-versicolor 72 | 71,5.9,3.2,4.8,1.8,blue,Iris-versicolor 73 | 72,6.1,2.8,4,1.3,blue,Iris-versicolor 74 | 73,6.3,2.5,4.9,1.5,blue,Iris-versicolor 75 | 74,6.1,2.8,4.7,1.2,blue,Iris-versicolor 76 | 75,6.4,2.9,4.3,1.3,blue,Iris-versicolor 77 | 76,6.6,3,4.4,1.4,blue,Iris-versicolor 78 | 77,6.8,2.8,4.8,1.4,blue,Iris-versicolor 79 | 78,6.7,3,5,1.7,blue,Iris-versicolor 80 | 79,6,2.9,4.5,1.5,blue,Iris-versicolor 81 | 80,5.7,2.6,3.5,1,violet,Iris-versicolor 82 | 81,5.5,2.4,3.8,1.1,blue,Iris-versicolor 83 | 82,5.5,2.4,3.7,1,red,Iris-versicolor 84 | 83,5.8,2.7,3.9,1.2,blue,Iris-versicolor 85 | 84,6,2.7,5.1,1.6,blue,Iris-versicolor 86 | 85,5.4,3,4.5,1.5,blue,Iris-versicolor 87 | 86,6,3.4,4.5,1.6,blue,Iris-versicolor 88 | 87,6.7,3.1,4.7,1.5,blue,Iris-versicolor 89 | 88,6.3,2.3,4.4,1.3,violet,Iris-versicolor 90 | 89,5.6,3,4.1,1.3,blue,Iris-versicolor 91 | 90,5.5,2.5,4,1.3,blue,Iris-versicolor 92 | 91,5.5,2.6,4.4,1.2,blue,Iris-versicolor 93 | 92,6.1,3,4.6,1.4,blue,Iris-versicolor 94 | 93,5.8,2.6,4,1.2,violet,Iris-versicolor 95 | 94,5,2.3,3.3,1,blue,Iris-versicolor 96 | 95,5.6,2.7,4.2,1.3,violet,Iris-versicolor 97 | 96,5.7,3,4.2,1.2,blue,Iris-versicolor 98 | 97,5.7,2.9,4.2,1.3,blue,Iris-versicolor 99 | 98,6.2,2.9,4.3,1.3,blue,Iris-versicolor 100 | 99,5.1,2.5,3,1.1,blue,Iris-versicolor 101 | 100,5.7,2.8,4.1,1.3,blue,Iris-versicolor 102 | 101,6.3,3.3,6,2.5,violet,Iris-virginica 103 | 102,5.8,2.7,5.1,1.9,violet,Iris-virginica 104 | 103,7.1,3,5.9,2.1,violet,Iris-virginica 105 | 104,6.3,2.9,5.6,1.8,violet,Iris-virginica 106 | 105,6.5,3,5.8,2.2,violet,Iris-virginica 107 | 106,7.6,3,6.6,2.1,violet,Iris-virginica 108 | 107,4.9,2.5,4.5,1.7,violet,Iris-virginica 109 | 108,7.3,2.9,6.3,1.8,violet,Iris-virginica 110 | 109,6.7,2.5,5.8,1.8,violet,Iris-virginica 111 | 110,7.2,3.6,6.1,2.5,violet,Iris-virginica 112 | 111,6.5,3.2,5.1,2,violet,Iris-virginica 113 | 112,6.4,2.7,5.3,1.9,violet,Iris-virginica 114 | 113,6.8,3,5.5,2.1,violet,Iris-virginica 115 | 114,5.7,2.5,5,2,violet,Iris-virginica 116 | 115,5.8,2.8,5.1,2.4,violet,Iris-virginica 117 | 116,6.4,3.2,5.3,2.3,violet,Iris-virginica 118 | 117,6.5,3,5.5,1.8,violet,Iris-virginica 119 | 118,7.7,3.8,6.7,2.2,violet,Iris-virginica 120 | 119,7.7,2.6,6.9,2.3,violet,Iris-virginica 121 | 120,6,2.2,5,1.5,violet,Iris-virginica 122 | 121,6.9,3.2,5.7,2.3,blue,Iris-virginica 123 | 122,5.6,2.8,4.9,2,violet,Iris-virginica 124 | 123,7.7,2.8,6.7,2,violet,Iris-virginica 125 | 124,6.3,2.7,4.9,1.8,violet,Iris-virginica 126 | 125,6.7,3.3,5.7,2.1,blue,Iris-virginica 127 | 126,7.2,3.2,6,1.8,violet,Iris-virginica 128 | 127,6.2,2.8,4.8,1.8,violet,Iris-virginica 129 | 128,6.1,3,4.9,1.8,violet,Iris-virginica 130 | 129,6.4,2.8,5.6,2.1,blue,Iris-virginica 131 | 130,7.2,3,5.8,1.6,violet,Iris-virginica 132 | 131,7.4,2.8,6.1,1.9,violet,Iris-virginica 133 | 132,7.9,3.8,6.4,2,violet,Iris-virginica 134 | 133,6.4,2.8,5.6,2.2,violet,Iris-virginica 135 | 134,6.3,2.8,5.1,1.5,red,Iris-virginica 136 | 135,6.1,2.6,5.6,1.4,violet,Iris-virginica 137 | 136,7.7,3,6.1,2.3,violet,Iris-virginica 138 | 137,6.3,3.4,5.6,2.4,violet,Iris-virginica 139 | 138,6.4,3.1,5.5,1.8,violet,Iris-virginica 140 | 139,6,3,4.8,1.8,blue,Iris-virginica 141 | 140,6.9,3.1,5.4,2.1,violet,Iris-virginica 142 | 141,6.7,3.1,5.6,2.4,violet,Iris-virginica 143 | 142,6.9,3.1,5.1,2.3,violet,Iris-virginica 144 | 143,5.8,2.7,5.1,1.9,violet,Iris-virginica 145 | 144,6.8,3.2,5.9,2.3,violet,Iris-virginica 146 | 145,6.7,3.3,5.7,2.5,violet,Iris-virginica 147 | 146,6.7,3,5.2,2.3,violet,Iris-virginica 148 | 147,6.3,2.5,5,1.9,violet,Iris-virginica 149 | 148,6.5,3,5.2,2,blue,Iris-virginica 150 | 149,6.2,3.4,5.4,2.3,violet,Iris-virginica 151 | 150,5.9,3,5.1,1.8,red,Iris-virginica -------------------------------------------------------------------------------- /categorical-features/sklearn-onehot-encoding-mixedtype-df.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "d180958e-8362-4453-ad21-78ec618bc624", 6 | "metadata": {}, 7 | "source": [ 8 | "# OneHot Encoding in Scikit-Learn with DataFrames of Mixed Column Types" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "11bdfc69-a04f-462e-8c14-c0b66dfd1796", 14 | "metadata": {}, 15 | "source": [ 16 | "## Some Toydata" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "ec0cb03a-9a9a-4cad-9716-40fc29641f9a", 22 | "metadata": {}, 23 | "source": [ 24 | "- Imagine we have some dataset that consists of both numerical and categorical features.\n", 25 | "- And we just want to convert the categorical features into a onehot encoding (while leaving the numerical features untouched)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "id": "61f31b73-d486-4bc5-876a-86636c1acb86", 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import pandas as pd" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "id": "02e244fd-76b0-430f-a002-4291d7d687e3", 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/html": [ 47 | "
\n", 48 | "\n", 61 | "\n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | "
numericalcategorical
01.1b
12.1b
23.1b
34.2b
45.1a
56.1a
67.1a
78.1a
81.2c
92.1c
103.1c
114.1c
\n", 132 | "
" 133 | ], 134 | "text/plain": [ 135 | " numerical categorical\n", 136 | "0 1.1 b\n", 137 | "1 2.1 b\n", 138 | "2 3.1 b\n", 139 | "3 4.2 b\n", 140 | "4 5.1 a\n", 141 | "5 6.1 a\n", 142 | "6 7.1 a\n", 143 | "7 8.1 a\n", 144 | "8 1.2 c\n", 145 | "9 2.1 c\n", 146 | "10 3.1 c\n", 147 | "11 4.1 c" 148 | ] 149 | }, 150 | "execution_count": 2, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "feature_1 = [\n", 157 | " 1.1, 2.1, 3.1, 4.2,\n", 158 | " 5.1, 6.1, 7.1, 8.1,\n", 159 | " 1.2, 2.1, 3.1, 4.1\n", 160 | "]\n", 161 | "\n", 162 | "feature_2 = [\n", 163 | " 'b', 'b', 'b', 'b',\n", 164 | " 'a', 'a', 'a', 'a',\n", 165 | " 'c', 'c', 'c', 'c'\n", 166 | "]\n", 167 | "\n", 168 | "df = pd.DataFrame({'numerical': feature_1, 'categorical': feature_2})\n", 169 | "df" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "id": "e8989975-b534-4c3f-bb4c-cbed1b7acaa5", 175 | "metadata": {}, 176 | "source": [ 177 | "## Onehot Encoding" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "id": "26d8a4c3-a5b9-449e-ab27-421b93f95d9b", 183 | "metadata": {}, 184 | "source": [ 185 | "- We can use e.g., scikit-learn's [OneHotEncoder](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html) to expand the categorical column into onehot-encoded ones\n", 186 | "- By default, the `OneHotEncoder` will expand all columns into categorical ones (this includes the numerical ones), which is not what we want if we have mixed-type datasets\n", 187 | "- We can use the [ColumnTransformer](https://scikit-learn.org/stable/modules/generated/sklearn.compose.ColumnTransformer.html) to select specific columns we want to transform, though" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 3, 193 | "id": "90ed36b9-a326-42dc-b7e4-aae1c4f95e50", 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "text/plain": [ 199 | "array([[1. , 0. , 1.1],\n", 200 | " [1. , 0. , 2.1],\n", 201 | " [1. , 0. , 3.1],\n", 202 | " [1. , 0. , 4.2],\n", 203 | " [0. , 0. , 5.1],\n", 204 | " [0. , 0. , 6.1],\n", 205 | " [0. , 0. , 7.1],\n", 206 | " [0. , 0. , 8.1],\n", 207 | " [0. , 1. , 1.2],\n", 208 | " [0. , 1. , 2.1],\n", 209 | " [0. , 1. , 3.1],\n", 210 | " [0. , 1. , 4.1]])" 211 | ] 212 | }, 213 | "execution_count": 3, 214 | "metadata": {}, 215 | "output_type": "execute_result" 216 | } 217 | ], 218 | "source": [ 219 | "import sklearn\n", 220 | "from sklearn.compose import ColumnTransformer\n", 221 | "from sklearn.pipeline import make_pipeline\n", 222 | "from sklearn.preprocessing import OneHotEncoder\n", 223 | "\n", 224 | "\n", 225 | "\n", 226 | "ohe = OneHotEncoder(sparse=False, drop='first', dtype='float')\n", 227 | "\n", 228 | "\n", 229 | "categorical_features = ['categorical']\n", 230 | "\n", 231 | "col_transformer = ColumnTransformer(\n", 232 | " transformers=[\n", 233 | " ('cat', ohe, categorical_features)],\n", 234 | " # include the numerical column(s) via passthrough:\n", 235 | " remainder='passthrough' \n", 236 | ")\n", 237 | "\n", 238 | "col_transformer.fit(df)\n", 239 | "X_t = col_transformer.transform(df)\n", 240 | "X_t" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 4, 246 | "id": "f9110d53-c0af-4929-ad53-a54a7459dbc3", 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "name": "stdout", 251 | "output_type": "stream", 252 | "text": [ 253 | "pandas : 1.4.0\n", 254 | "sklearn: 1.0.2\n", 255 | "\n" 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "%load_ext watermark\n", 261 | "%watermark --iversions" 262 | ] 263 | } 264 | ], 265 | "metadata": { 266 | "kernelspec": { 267 | "display_name": "Python 3.9.2 64-bit ('base': conda)", 268 | "language": "python", 269 | "name": "python392jvsc74a57bd0249cfc85c6a0073df6bca89c83e3180d730f84f7e1f446fbe710b75104ecfa4f" 270 | }, 271 | "language_info": { 272 | "codemirror_mode": { 273 | "name": "ipython", 274 | "version": 3 275 | }, 276 | "file_extension": ".py", 277 | "mimetype": "text/x-python", 278 | "name": "python", 279 | "nbconvert_exporter": "python", 280 | "pygments_lexer": "ipython3", 281 | "version": "3.9.7" 282 | } 283 | }, 284 | "nbformat": 4, 285 | "nbformat_minor": 5 286 | } 287 | -------------------------------------------------------------------------------- /cloud-resources/xgboost-lightning-gpu/README.md: -------------------------------------------------------------------------------- 1 | # Training an XGBoost Classifier Using Cloud GPUs Without Worrying About Infrastructure 2 | 3 | 4 | 5 | Code accompanying the blog article: [Training an XGBoost Classifier Using Cloud GPUs Without Worrying About Infrastructure](https://sebastianraschka.com/blog/2023/xgboost-gpu.html). 6 | 7 | 8 | 9 | Run code as follows: 10 | 11 | 12 | 13 | ```pip install lightning 14 | # run XGBoost classifier locally 15 | python my_xgboost_classifier.py 16 | 17 | # run XGBoost classifier locally via Lightning (if you have a GPU) 18 | pip install lightning 19 | lightning run app xgboost-cloud-gpu.py --setup 20 | 21 | # run XGBoost in Lightning cloud on a V100 22 | lightning run app xgboost-cloud-gpu.py --cloud 23 | ``` 24 | 25 | -------------------------------------------------------------------------------- /cloud-resources/xgboost-lightning-gpu/my_xgboost_classifier.py: -------------------------------------------------------------------------------- 1 | from sklearn import datasets 2 | from sklearn.model_selection import train_test_split 3 | from sklearn.metrics import accuracy_score 4 | from xgboost import XGBClassifier 5 | from joblib import dump 6 | 7 | 8 | def run_classifier(save_as="my_model.joblib", use_gpu=False): 9 | digits = datasets.load_digits() 10 | features, targets = digits.images, digits.target 11 | features = features.reshape(-1, 8*8) 12 | 13 | X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=123) 14 | 15 | if use_gpu: 16 | model = XGBClassifier(tree_method='gpu_hist', gpu_id=0) 17 | else: 18 | model = XGBClassifier() 19 | 20 | model.fit(X_train, y_train) 21 | y_pred = model.predict(X_test) 22 | 23 | accuracy = accuracy_score(y_test, y_pred) 24 | print(f"Accuracy: {accuracy * 100.0:.2f}%") 25 | 26 | dump(model, filename=save_as) 27 | 28 | 29 | if __name__ == "__main__": 30 | run_classifier() 31 | -------------------------------------------------------------------------------- /cloud-resources/xgboost-lightning-gpu/xgboost-cloud-gpu.py: -------------------------------------------------------------------------------- 1 | #!pip install xgboost 2 | #!pip install scikit-learn 3 | 4 | import lightning as L 5 | from lightning.app.storage import Drive 6 | from my_xgboost_classifier import run_classifier 7 | 8 | 9 | class RunCode(L.LightningWork): 10 | def __init__(self): 11 | 12 | # available GPUs and costs: https://lightning.ai/pricing/consumption-rates 13 | super().__init__(cloud_compute=L.CloudCompute("gpu-fast", disk_size=10)) 14 | 15 | # storage for outputs 16 | self.model_storage = Drive("lit://checkpoints") 17 | 18 | def run(self): 19 | # run model code 20 | model_path = "my_model.joblib" 21 | run_classifier(save_as=model_path, use_gpu=True) 22 | self.model_storage.put(model_path) 23 | 24 | 25 | component = RunCode() 26 | app = L.LightningApp(component) -------------------------------------------------------------------------------- /demos/data/cat/cat.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/demos/data/cat/cat.jpeg -------------------------------------------------------------------------------- /evaluation/ci-for-ml/ci-simulation-repeated.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "81dd54a3-d44a-41ce-aed1-9f3eda29482b", 6 | "metadata": {}, 7 | "source": [ 8 | "# CI Simulation Repeated (Supplementary Material)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "539ff52f-0cce-4cb9-aff5-91705ee69ad4", 14 | "metadata": {}, 15 | "source": [ 16 | "- This notebook supplements the [confidence-intervals-for-ml.ipynb](confidence-intervals-for-ml.ipynb) with a case study.\n", 17 | "\n", 18 | "- In particular, this notebook repeats the CI simulation from [ci-simulation.ipynb](ci-simulation.ipynb) multiple times to get a sense of which CI method is good at capturing the true model performance.\n", 19 | "\n", 20 | "- Just to make sure that there are no silent bugs due to variable naming & reuse, each experiment is in a separate Python script." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "id": "2a37522e-fa74-4e44-b398-59b7a975d67f", 26 | "metadata": {}, 27 | "source": [ 28 | "## 1) Normal Approximation Interval Based on the Test Set" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "id": "b8000642-fb48-499e-828f-24396bdffbec", 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "95.6% of 95% confidence intervals contain the true accuracy.\n" 42 | ] 43 | } 44 | ], 45 | "source": [ 46 | "!python ci-simulation-repeated/1_normal_approx.py --repetitions 1000" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "id": "75c876a5-817f-451f-81b0-5af229451e94", 52 | "metadata": {}, 53 | "source": [ 54 | "## 2.1) A *t* Confidence Interval Interval from Bootstrap Samples" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 2, 60 | "id": "c5540a38-f241-4429-a049-2519291319c0", 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "98.5% of 95% confidence intervals contain the true accuracy.\n" 68 | ] 69 | } 70 | ], 71 | "source": [ 72 | "!python ci-simulation-repeated/2.1_bootstrap_t.py --repetitions 1000" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "cbb37329-4482-4dd4-a6eb-ac76f5ada83c", 78 | "metadata": {}, 79 | "source": [ 80 | "## 2.2) Bootstrap Percentile Method" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "id": "0c6afc91-c960-4959-b7d6-8e212a545281", 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "98.0% of 95% confidence intervals contain the true accuracy.\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "!python ci-simulation-repeated/2.2_bootstrap_percentile.py --repetitions 1000" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "id": "32efa9f6-5190-4726-9c95-ec019dae9960", 104 | "metadata": {}, 105 | "source": [ 106 | "## 2.3) .632 Bootstrap" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 4, 112 | "id": "4f520295-dbfc-49a7-b8a4-8dea88d6442e", 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "83.2% of 95% confidence intervals contain the true accuracy.\n" 120 | ] 121 | } 122 | ], 123 | "source": [ 124 | "!python ci-simulation-repeated/2.3_bootstrap_632.py --repetitions 1000" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "id": "eeb6e55a-445d-4f65-b282-85cc2f6a2303", 130 | "metadata": {}, 131 | "source": [ 132 | "## 3) Bootstrapping the Test Set predictions" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 5, 138 | "id": "7edb473f-234e-4e4f-b561-45ca5344fcfc", 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "94.5% of 95% confidence intervals contain the true accuracy.\n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "!python ci-simulation-repeated/3_bootstrap_test.py --repetitions 1000" 151 | ] 152 | } 153 | ], 154 | "metadata": { 155 | "kernelspec": { 156 | "display_name": "Python 3 (ipykernel)", 157 | "language": "python", 158 | "name": "python3" 159 | }, 160 | "language_info": { 161 | "codemirror_mode": { 162 | "name": "ipython", 163 | "version": 3 164 | }, 165 | "file_extension": ".py", 166 | "mimetype": "text/x-python", 167 | "name": "python", 168 | "nbconvert_exporter": "python", 169 | "pygments_lexer": "ipython3", 170 | "version": "3.9.7" 171 | } 172 | }, 173 | "nbformat": 4, 174 | "nbformat_minor": 5 175 | } 176 | -------------------------------------------------------------------------------- /evaluation/ci-for-ml/ci-simulation-repeated/1_normal_approx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from get_dataset import get_dataset 3 | from sklearn.tree import DecisionTreeClassifier 4 | import scipy.stats 5 | import numpy as np 6 | 7 | 8 | def run_method(num_repetitions): 9 | is_inside_list = [] 10 | 11 | for i in range(num_repetitions): 12 | 13 | X_train, y_train, X_test, y_test, X_huge_test, y_huge_test = get_dataset( 14 | random_seed=i 15 | ) 16 | 17 | clf = DecisionTreeClassifier(random_state=123, max_depth=3) 18 | clf.fit(X_train, y_train) 19 | 20 | acc_test_true = clf.score(X_huge_test, y_huge_test) 21 | 22 | ##################################################### 23 | # Compute CI 24 | ##################################################### 25 | 26 | confidence = 0.95 # Change to your desired confidence level 27 | z_value = scipy.stats.norm.ppf((1 + confidence) / 2.0) 28 | acc_test = clf.score(X_test, y_test) 29 | ci_length = z_value * np.sqrt((acc_test * (1 - acc_test)) / y_test.shape[0]) 30 | 31 | ci_lower = acc_test - ci_length 32 | ci_upper = acc_test + ci_length 33 | 34 | # Check CI 35 | is_inside = acc_test_true >= ci_lower and acc_test_true <= ci_upper 36 | 37 | is_inside_list.append(is_inside) 38 | 39 | return is_inside_list 40 | 41 | 42 | if __name__ == "__main__": 43 | 44 | parser = argparse.ArgumentParser() 45 | parser.add_argument( 46 | "-r", 47 | "--repetitions", 48 | required=True, 49 | type=int, 50 | ) 51 | 52 | args = parser.parse_args() 53 | is_inside_list = run_method(args.repetitions) 54 | 55 | print( 56 | f"{np.mean(is_inside_list)*100}% of 95% confidence" 57 | " intervals contain the true accuracy." 58 | ) 59 | -------------------------------------------------------------------------------- /evaluation/ci-for-ml/ci-simulation-repeated/2.1_bootstrap_t.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from get_dataset import get_dataset 3 | from sklearn.tree import DecisionTreeClassifier 4 | import scipy.stats 5 | import numpy as np 6 | 7 | 8 | def run_method(num_repetitions): 9 | is_inside_list = [] 10 | 11 | for i in range(num_repetitions): 12 | 13 | X_train, y_train, X_test, y_test, X_huge_test, y_huge_test = get_dataset( 14 | random_seed=i 15 | ) 16 | 17 | clf = DecisionTreeClassifier(random_state=123, max_depth=3) 18 | clf.fit(X_train, y_train) 19 | 20 | acc_test_true = clf.score(X_huge_test, y_huge_test) 21 | 22 | ##################################################### 23 | # Compute CI 24 | ##################################################### 25 | 26 | rng = np.random.RandomState(seed=12345) 27 | idx = np.arange(y_train.shape[0]) 28 | 29 | bootstrap_train_accuracies = [] 30 | bootstrap_rounds = 200 31 | 32 | for i in range(bootstrap_rounds): 33 | 34 | train_idx = rng.choice(idx, size=idx.shape[0], replace=True) 35 | valid_idx = np.setdiff1d(idx, train_idx, assume_unique=False) 36 | 37 | boot_train_X, boot_train_y = X_train[train_idx], y_train[train_idx] 38 | boot_valid_X, boot_valid_y = X_train[valid_idx], y_train[valid_idx] 39 | 40 | clf.fit(boot_train_X, boot_train_y) 41 | acc = clf.score(boot_valid_X, boot_valid_y) 42 | bootstrap_train_accuracies.append(acc) 43 | 44 | bootstrap_train_mean = np.mean(bootstrap_train_accuracies) 45 | 46 | confidence = 0.95 # Change to your desired confidence level 47 | t_value = scipy.stats.t.ppf((1 + confidence) / 2.0, df=bootstrap_rounds - 1) 48 | 49 | se = 0.0 50 | for acc in bootstrap_train_accuracies: 51 | se += (acc - bootstrap_train_mean) ** 2 52 | se = np.sqrt((1.0 / (bootstrap_rounds - 1)) * se) 53 | 54 | ci_length = t_value * se 55 | 56 | ci_lower = bootstrap_train_mean - ci_length 57 | ci_upper = bootstrap_train_mean + ci_length 58 | 59 | # Check CI 60 | is_inside = acc_test_true >= ci_lower and acc_test_true <= ci_upper 61 | 62 | is_inside_list.append(is_inside) 63 | 64 | return is_inside_list 65 | 66 | 67 | if __name__ == "__main__": 68 | 69 | parser = argparse.ArgumentParser() 70 | parser.add_argument( 71 | "-r", 72 | "--repetitions", 73 | required=True, 74 | type=int, 75 | ) 76 | 77 | args = parser.parse_args() 78 | is_inside_list = run_method(args.repetitions) 79 | 80 | print( 81 | f"{np.mean(is_inside_list)*100}% of 95% confidence" 82 | " intervals contain the true accuracy." 83 | ) 84 | -------------------------------------------------------------------------------- /evaluation/ci-for-ml/ci-simulation-repeated/2.2_bootstrap_percentile.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from get_dataset import get_dataset 3 | from sklearn.tree import DecisionTreeClassifier 4 | import numpy as np 5 | 6 | 7 | def run_method(num_repetitions): 8 | is_inside_list = [] 9 | 10 | for i in range(num_repetitions): 11 | 12 | X_train, y_train, X_test, y_test, X_huge_test, y_huge_test = get_dataset( 13 | random_seed=i 14 | ) 15 | 16 | clf = DecisionTreeClassifier(random_state=123, max_depth=3) 17 | clf.fit(X_train, y_train) 18 | 19 | acc_test_true = clf.score(X_huge_test, y_huge_test) 20 | 21 | ##################################################### 22 | # Compute CI 23 | ##################################################### 24 | 25 | rng = np.random.RandomState(seed=12345) 26 | idx = np.arange(y_train.shape[0]) 27 | 28 | bootstrap_train_accuracies = [] 29 | bootstrap_rounds = 200 30 | 31 | for i in range(bootstrap_rounds): 32 | 33 | train_idx = rng.choice(idx, size=idx.shape[0], replace=True) 34 | valid_idx = np.setdiff1d(idx, train_idx, assume_unique=False) 35 | 36 | boot_train_X, boot_train_y = X_train[train_idx], y_train[train_idx] 37 | boot_valid_X, boot_valid_y = X_train[valid_idx], y_train[valid_idx] 38 | 39 | clf.fit(boot_train_X, boot_train_y) 40 | acc = clf.score(boot_valid_X, boot_valid_y) 41 | bootstrap_train_accuracies.append(acc) 42 | 43 | ci_lower = np.percentile(bootstrap_train_accuracies, 2.5) 44 | ci_upper = np.percentile(bootstrap_train_accuracies, 97.5) 45 | 46 | # Check CI 47 | is_inside = acc_test_true >= ci_lower and acc_test_true <= ci_upper 48 | 49 | is_inside_list.append(is_inside) 50 | 51 | return is_inside_list 52 | 53 | 54 | if __name__ == "__main__": 55 | 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument( 58 | "-r", 59 | "--repetitions", 60 | required=True, 61 | type=int, 62 | ) 63 | 64 | args = parser.parse_args() 65 | is_inside_list = run_method(args.repetitions) 66 | 67 | print( 68 | f"{np.mean(is_inside_list)*100}% of 95% confidence" 69 | " intervals contain the true accuracy." 70 | ) 71 | -------------------------------------------------------------------------------- /evaluation/ci-for-ml/ci-simulation-repeated/2.3_bootstrap_632.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from get_dataset import get_dataset 3 | from sklearn.tree import DecisionTreeClassifier 4 | import numpy as np 5 | 6 | 7 | def run_method(num_repetitions): 8 | is_inside_list = [] 9 | 10 | for i in range(num_repetitions): 11 | 12 | X_train, y_train, X_test, y_test, X_huge_test, y_huge_test = get_dataset( 13 | random_seed=i 14 | ) 15 | 16 | clf = DecisionTreeClassifier(random_state=123, max_depth=3) 17 | clf.fit(X_train, y_train) 18 | 19 | acc_test_true = clf.score(X_huge_test, y_huge_test) 20 | 21 | ##################################################### 22 | # Compute CI 23 | ##################################################### 24 | 25 | rng = np.random.RandomState(seed=12345) 26 | idx = np.arange(y_train.shape[0]) 27 | 28 | bootstrap_train_accuracies = [] 29 | bootstrap_rounds = 200 30 | weight = 0.632 31 | 32 | for i in range(bootstrap_rounds): 33 | 34 | train_idx = rng.choice(idx, size=idx.shape[0], replace=True) 35 | valid_idx = np.setdiff1d(idx, train_idx, assume_unique=False) 36 | 37 | boot_train_X, boot_train_y = X_train[train_idx], y_train[train_idx] 38 | boot_valid_X, boot_valid_y = X_train[valid_idx], y_train[valid_idx] 39 | 40 | clf.fit(boot_train_X, boot_train_y) 41 | train_acc = clf.score(X_train, y_train) 42 | valid_acc = clf.score(boot_valid_X, boot_valid_y) 43 | acc = weight * train_acc + (1.0 - weight) * valid_acc 44 | 45 | bootstrap_train_accuracies.append(acc) 46 | 47 | ci_lower = np.percentile(bootstrap_train_accuracies, 2.5) 48 | ci_upper = np.percentile(bootstrap_train_accuracies, 97.5) 49 | 50 | # Check CI 51 | is_inside = acc_test_true >= ci_lower and acc_test_true <= ci_upper 52 | 53 | is_inside_list.append(is_inside) 54 | 55 | return is_inside_list 56 | 57 | 58 | if __name__ == "__main__": 59 | 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument( 62 | "-r", 63 | "--repetitions", 64 | required=True, 65 | type=int, 66 | ) 67 | 68 | args = parser.parse_args() 69 | is_inside_list = run_method(args.repetitions) 70 | 71 | print( 72 | f"{np.mean(is_inside_list)*100}% of 95% confidence" 73 | " intervals contain the true accuracy." 74 | ) 75 | -------------------------------------------------------------------------------- /evaluation/ci-for-ml/ci-simulation-repeated/3_bootstrap_test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from get_dataset import get_dataset 3 | from sklearn.tree import DecisionTreeClassifier 4 | import numpy as np 5 | 6 | 7 | def run_method(num_repetitions): 8 | is_inside_list = [] 9 | 10 | for i in range(num_repetitions): 11 | 12 | X_train, y_train, X_test, y_test, X_huge_test, y_huge_test = get_dataset( 13 | random_seed=i 14 | ) 15 | 16 | clf = DecisionTreeClassifier(random_state=123, max_depth=3) 17 | clf.fit(X_train, y_train) 18 | 19 | acc_test_true = clf.score(X_huge_test, y_huge_test) 20 | 21 | ##################################################### 22 | # Compute CI 23 | ##################################################### 24 | 25 | predictions_test = clf.predict(X_test) 26 | 27 | rng = np.random.RandomState(seed=12345) 28 | idx = np.arange(y_test.shape[0]) 29 | 30 | test_accuracies = [] 31 | 32 | for i in range(200): 33 | 34 | pred_idx = rng.choice(idx, size=idx.shape[0], replace=True) 35 | acc_test_boot = np.mean(predictions_test[pred_idx] == y_test[pred_idx]) 36 | test_accuracies.append(acc_test_boot) 37 | 38 | ci_lower = np.percentile(test_accuracies, 2.5) 39 | ci_upper = np.percentile(test_accuracies, 97.5) 40 | 41 | # Check CI 42 | is_inside = acc_test_true >= ci_lower and acc_test_true <= ci_upper 43 | is_inside_list.append(is_inside) 44 | 45 | return is_inside_list 46 | 47 | 48 | if __name__ == "__main__": 49 | 50 | parser = argparse.ArgumentParser() 51 | parser.add_argument( 52 | "-r", 53 | "--repetitions", 54 | required=True, 55 | type=int, 56 | ) 57 | 58 | args = parser.parse_args() 59 | is_inside_list = run_method(args.repetitions) 60 | 61 | print( 62 | f"{np.mean(is_inside_list)*100}% of 95% confidence" 63 | " intervals contain the true accuracy." 64 | ) 65 | -------------------------------------------------------------------------------- /evaluation/ci-for-ml/ci-simulation-repeated/get_dataset.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import make_classification 2 | 3 | 4 | def get_dataset(random_seed): 5 | 6 | X, y = make_classification( 7 | n_samples=10_002_000, 8 | n_features=5, 9 | n_redundant=2, 10 | n_classes=2, 11 | n_clusters_per_class=1, 12 | random_state=random_seed, 13 | flip_y=0.25, 14 | ) 15 | 16 | X_train = X[:1_000] 17 | y_train = y[:1_000] 18 | 19 | X_test = X[1_000:2_000] 20 | y_test = y[1_000:2_000] 21 | 22 | X_huge_test = X[2_000:] 23 | y_huge_test = y[2_000:] 24 | 25 | return X_train, y_train, X_test, y_test, X_huge_test, y_huge_test 26 | -------------------------------------------------------------------------------- /evaluation/lightning-jupyter-tensorboard/shared_utilities.py: -------------------------------------------------------------------------------- 1 | import lightning as L 2 | import torch 3 | import torch.nn.functional as F 4 | import torchmetrics 5 | from torch.utils.data import DataLoader 6 | from torch.utils.data.dataset import random_split 7 | from torchvision import datasets, transforms 8 | 9 | 10 | class PyTorchMLP(torch.nn.Module): 11 | def __init__(self, num_features, num_classes): 12 | super().__init__() 13 | 14 | self.all_layers = torch.nn.Sequential( 15 | # 1st hidden layer 16 | torch.nn.Linear(num_features, 100), 17 | torch.nn.BatchNorm1d(100), 18 | torch.nn.ReLU(), 19 | 20 | # 2nd hidden layer 21 | torch.nn.Linear(100, 50), 22 | torch.nn.BatchNorm1d(50), 23 | torch.nn.ReLU(), 24 | 25 | # output layer 26 | torch.nn.Linear(50, num_classes), 27 | ) 28 | 29 | def forward(self, x): 30 | x = torch.flatten(x, start_dim=1) 31 | logits = self.all_layers(x) 32 | return logits 33 | 34 | 35 | class LightningModel(L.LightningModule): 36 | def __init__(self, model, learning_rate): 37 | super().__init__() 38 | 39 | self.learning_rate = learning_rate 40 | self.model = model 41 | 42 | self.save_hyperparameters(ignore=["model"]) 43 | 44 | self.train_acc = torchmetrics.Accuracy() 45 | self.val_acc = torchmetrics.Accuracy() 46 | self.test_acc = torchmetrics.Accuracy() 47 | 48 | def forward(self, x): 49 | return self.model(x) 50 | 51 | def _shared_step(self, batch): 52 | features, true_labels = batch 53 | logits = self(features) 54 | 55 | loss = F.cross_entropy(logits, true_labels) 56 | predicted_labels = torch.argmax(logits, dim=1) 57 | return loss, true_labels, predicted_labels 58 | 59 | def training_step(self, batch, batch_idx): 60 | loss, true_labels, predicted_labels = self._shared_step(batch) 61 | 62 | self.log("loss/train", loss) 63 | self.train_acc(predicted_labels, true_labels) 64 | self.log( 65 | "acc/train", self.train_acc, prog_bar=True, on_epoch=True, on_step=False 66 | ) 67 | return loss 68 | 69 | def validation_step(self, batch, batch_idx): 70 | loss, true_labels, predicted_labels = self._shared_step(batch) 71 | 72 | self.log("loss/val", loss, prog_bar=True) 73 | self.val_acc(predicted_labels, true_labels) 74 | self.log("acc/val", self.val_acc, prog_bar=True) 75 | 76 | def test_step(self, batch, batch_idx): 77 | loss, true_labels, predicted_labels = self._shared_step(batch) 78 | self.test_acc(predicted_labels, true_labels) 79 | self.log("acc/test", self.test_acc) 80 | 81 | def configure_optimizers(self): 82 | optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate) 83 | return optimizer 84 | 85 | 86 | class MnistDataModule(L.LightningDataModule): 87 | def __init__(self, data_path="./", batch_size=64, num_workers=0): 88 | super().__init__() 89 | self.batch_size = batch_size 90 | self.data_path = data_path 91 | self.num_workers = num_workers 92 | 93 | def prepare_data(self): 94 | datasets.MNIST(root=self.data_path, download=True) 95 | return 96 | 97 | def setup(self, stage=None): 98 | # Note transforms.ToTensor() scales input images 99 | # to 0-1 range 100 | train = datasets.MNIST( 101 | root=self.data_path, 102 | train=True, 103 | transform=transforms.ToTensor(), 104 | download=False, 105 | ) 106 | 107 | self.test = datasets.MNIST( 108 | root=self.data_path, 109 | train=False, 110 | transform=transforms.ToTensor(), 111 | download=False, 112 | ) 113 | 114 | self.train, self.valid = random_split(train, lengths=[55000, 5000]) 115 | 116 | def train_dataloader(self): 117 | train_loader = DataLoader( 118 | dataset=self.train, 119 | batch_size=self.batch_size, 120 | drop_last=True, 121 | shuffle=True, 122 | num_workers=self.num_workers, 123 | ) 124 | return train_loader 125 | 126 | def val_dataloader(self): 127 | valid_loader = DataLoader( 128 | dataset=self.valid, 129 | batch_size=self.batch_size, 130 | drop_last=False, 131 | shuffle=False, 132 | num_workers=self.num_workers, 133 | ) 134 | return valid_loader 135 | 136 | def test_dataloader(self): 137 | test_loader = DataLoader( 138 | dataset=self.test, 139 | batch_size=self.batch_size, 140 | drop_last=False, 141 | shuffle=False, 142 | num_workers=self.num_workers, 143 | ) 144 | return test_loader 145 | 146 | 147 | -------------------------------------------------------------------------------- /hyperparameter-tuning-methods/03.1-hyperopt-decisiontree-example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "39329df3-1f99-4b11-9405-5969d52368a7", 6 | "metadata": {}, 7 | "source": [ 8 | "# Deciscion Tree & Hyperopt Example" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "c7d5f0ab-33cd-40f2-82e7-fb2747f04f89", 14 | "metadata": {}, 15 | "source": [ 16 | "Example showing how to use the Hyperopt library (http://hyperopt.github.io) for Bayesian hyperparameter optimization (via tree of parzen estimator)" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "id": "7f61a90e-a119-4bd0-af21-38604c5b4eec", 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "scikit-learn: 1.0\n", 30 | "hyperopt : 0.2.5\n", 31 | "\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "%load_ext watermark\n", 37 | "%watermark -p scikit-learn,hyperopt" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "id": "1f0489c2-dd9c-4e71-a78c-e01201762b37", 43 | "metadata": {}, 44 | "source": [ 45 | "## Dataset" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "id": "271b17ff-5ea4-4161-8b7f-20ba8131d666", 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "Train/Valid/Test sizes: 398 80 171\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "from sklearn import model_selection\n", 64 | "from sklearn.model_selection import train_test_split\n", 65 | "from sklearn import datasets\n", 66 | "\n", 67 | "\n", 68 | "data = datasets.load_breast_cancer()\n", 69 | "X, y = data.data, data.target\n", 70 | "\n", 71 | "X_train, X_test, y_train, y_test = \\\n", 72 | " train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)\n", 73 | "\n", 74 | "X_train_sub, X_valid, y_train_sub, y_valid = \\\n", 75 | " train_test_split(X_train, y_train, test_size=0.2, random_state=1, stratify=y_train)\n", 76 | "\n", 77 | "print('Train/Valid/Test sizes:', y_train.shape[0], y_valid.shape[0], y_test.shape[0])" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "id": "0affc454-9f07-48e6-bcee-e6253d968247", 83 | "metadata": {}, 84 | "source": [ 85 | "## Hyperopt" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 3, 91 | "id": "53282fd6-1292-4b4d-a0b7-980707d61c3c", 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "from hyperopt import Trials, STATUS_OK, tpe, hp, fmin\n", 96 | "import hyperopt.pyll.stochastic" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "id": "5435889f-3cd7-45cd-abb2-632e3b034194", 102 | "metadata": {}, 103 | "source": [ 104 | "Some random sampling examples:" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 4, 110 | "id": "7ca6f8f6-0c78-434a-8121-a83b5708e143", 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "1.8925662130833578" 117 | ] 118 | }, 119 | "execution_count": 4, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "hyperopt.pyll.stochastic.sample(hp.loguniform('test', 1e-5, 1)) # range e^{low} to e^{high}" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 5, 131 | "id": "b2adc867-2d5a-44bd-8115-195ed53d6a7e", 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "1.1" 138 | ] 139 | }, 140 | "execution_count": 5, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "hyperopt.pyll.stochastic.sample(hp.qloguniform('test', 1e-5, 1, 0.1)) # rounded to 0.1" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 6, 152 | "id": "9a6bb270-d2a1-4179-a770-39bad5a8332c", 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "from sklearn.model_selection import cross_val_score\n", 157 | "from sklearn.tree import DecisionTreeClassifier\n", 158 | "import numpy as np\n", 159 | "\n", 160 | "\n", 161 | "\n", 162 | "params = {\n", 163 | " 'min_samples_split': hp.choice('min_samples_split', np.arange(2, 10)),\n", 164 | " 'min_impurity_decrease': hp.quniform('min_impurity_decrease', 0.0, 0.5, 0.05),\n", 165 | " 'max_depth': hp.choice('max_depth', [6, 16, None])\n", 166 | "}\n", 167 | "\n", 168 | "\n", 169 | "\n", 170 | "def optimization_objective(params):\n", 171 | "\n", 172 | "\n", 173 | " tree = DecisionTreeClassifier(random_state=123, **params)\n", 174 | " tree.fit(X_train, y_train)\n", 175 | " \n", 176 | " accuracies = cross_val_score(\n", 177 | " estimator=tree, X=X_train, y=y_train, cv=10, n_jobs=-1)\n", 178 | "\n", 179 | " score = accuracies.mean()\n", 180 | "\n", 181 | " return {'loss':1-score, 'status': STATUS_OK}" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 7, 187 | "id": "a51829c6-234f-401f-84ed-a005f71d0150", 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "100%|████████| 50/50 [00:01<00:00, 32.09trial/s, best loss: 0.06756410256410261]\n" 195 | ] 196 | } 197 | ], 198 | "source": [ 199 | "trials = Trials()\n", 200 | "best = fmin(fn=optimization_objective,\n", 201 | " space=params,\n", 202 | " algo=tpe.suggest,\n", 203 | " max_evals=50,\n", 204 | " trials=trials)" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 8, 210 | "id": "2c26399d-ebfc-4b06-86d9-36e49711e908", 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/plain": [ 216 | "{'max_depth': 2, 'min_impurity_decrease': 0.0, 'min_samples_split': 5}" 217 | ] 218 | }, 219 | "execution_count": 8, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "best" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "id": "42380f27-d982-4ae8-8981-17b7224ebb04", 231 | "metadata": {}, 232 | "source": [ 233 | "- Attention, `fmin` returns results from `hp.choice` as an index!" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 9, 239 | "id": "83e99f85-9ce2-494e-99ea-20ab49dc0b15", 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "name": "stdout", 244 | "output_type": "stream", 245 | "text": [ 246 | "{'max_depth': None, 'min_impurity_decrease': 0.0, 'min_samples_split': 7}\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "from hyperopt import space_eval\n", 252 | "\n", 253 | "best_params = space_eval(params, best)\n", 254 | "print(best_params)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 10, 260 | "id": "fbb610d8-4846-4e9f-a589-adacd0042603", 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "data": { 265 | "text/plain": [ 266 | "DecisionTreeClassifier(min_samples_split=7, random_state=123)" 267 | ] 268 | }, 269 | "execution_count": 10, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "tree = DecisionTreeClassifier(random_state=123, **best_params)\n", 276 | "tree.fit(X_train, y_train)" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 11, 282 | "id": "763e816b-6437-45a9-812f-8b429472d75e", 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "name": "stdout", 287 | "output_type": "stream", 288 | "text": [ 289 | "Training Accuracy: 0.99\n", 290 | "Test Accuracy: 0.94\n" 291 | ] 292 | } 293 | ], 294 | "source": [ 295 | "print(f\"Training Accuracy: {tree.score(X_train, y_train):0.2f}\")\n", 296 | "print(f\"Test Accuracy: {tree.score(X_test, y_test):0.2f}\")" 297 | ] 298 | } 299 | ], 300 | "metadata": { 301 | "kernelspec": { 302 | "display_name": "Python 3 (ipykernel)", 303 | "language": "python", 304 | "name": "python3" 305 | }, 306 | "language_info": { 307 | "codemirror_mode": { 308 | "name": "ipython", 309 | "version": 3 310 | }, 311 | "file_extension": ".py", 312 | "mimetype": "text/x-python", 313 | "name": "python", 314 | "nbconvert_exporter": "python", 315 | "pygments_lexer": "ipython3", 316 | "version": "3.9.6" 317 | } 318 | }, 319 | "nbformat": 4, 320 | "nbformat_minor": 5 321 | } 322 | -------------------------------------------------------------------------------- /hyperparameter-tuning-methods/05.1-successive-halving-decisiontree.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "39329df3-1f99-4b11-9405-5969d52368a7", 6 | "metadata": {}, 7 | "source": [ 8 | "# Decision Tree & Successive Halving Random + Search Example" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "7f61a90e-a119-4bd0-af21-38604c5b4eec", 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "scikit-learn: 1.0\n", 22 | "mlxtend : 0.19.0\n", 23 | "\n" 24 | ] 25 | } 26 | ], 27 | "source": [ 28 | "%load_ext watermark\n", 29 | "%watermark -p scikit-learn,mlxtend" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "1f0489c2-dd9c-4e71-a78c-e01201762b37", 35 | "metadata": {}, 36 | "source": [ 37 | "## Dataset" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "id": "271b17ff-5ea4-4161-8b7f-20ba8131d666", 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "Train/Valid/Test sizes: 398 80 171\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "from sklearn import model_selection\n", 56 | "from sklearn.model_selection import train_test_split\n", 57 | "from sklearn import datasets\n", 58 | "\n", 59 | "\n", 60 | "data = datasets.load_breast_cancer()\n", 61 | "X, y = data.data, data.target\n", 62 | "\n", 63 | "X_train, X_test, y_train, y_test = \\\n", 64 | " train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)\n", 65 | "\n", 66 | "X_train_sub, X_valid, y_train_sub, y_valid = \\\n", 67 | " train_test_split(X_train, y_train, test_size=0.2, random_state=1, stratify=y_train)\n", 68 | "\n", 69 | "print('Train/Valid/Test sizes:', y_train.shape[0], y_valid.shape[0], y_test.shape[0])" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "id": "0c922b01-86f0-4e83-9e36-446f99f6fe1b", 75 | "metadata": {}, 76 | "source": [ 77 | "## Successive Halving + Random Search" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "id": "72e56f33-ec33-46dd-afa2-a1b3c8b3da0b", 83 | "metadata": {}, 84 | "source": [ 85 | "\n", 86 | "- More info: \n", 87 | " - https://scikit-learn.org/stable/modules/grid_search.html#successive-halving-user-guide\n", 88 | " - https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.HalvingRandomSearchCV.html#sklearn.model_selection.HalvingRandomSearchCV" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 3, 94 | "id": "96f0b4c1-803a-436f-93d5-31baab55faa5", 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "0.8882539682539681" 101 | ] 102 | }, 103 | "execution_count": 3, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "import numpy as np\n", 110 | "import scipy.stats\n", 111 | "\n", 112 | "from sklearn.experimental import enable_halving_search_cv\n", 113 | "from sklearn.model_selection import HalvingRandomSearchCV\n", 114 | "\n", 115 | "from sklearn.tree import DecisionTreeClassifier\n", 116 | "\n", 117 | "\n", 118 | "clf = DecisionTreeClassifier(random_state=123)\n", 119 | "\n", 120 | "params = {\n", 121 | " 'min_samples_split': scipy.stats.randint(2, 12),\n", 122 | " 'min_impurity_decrease': scipy.stats.uniform(0.0, 0.5),\n", 123 | " 'max_depth': [6, 16, None]\n", 124 | "}\n", 125 | "\n", 126 | "\n", 127 | "search = HalvingRandomSearchCV(\n", 128 | " estimator=clf, \n", 129 | " param_distributions=params,\n", 130 | " n_candidates='exhaust',\n", 131 | " resource='n_samples',\n", 132 | " factor=3,\n", 133 | " random_state=123,\n", 134 | " n_jobs=1)\n", 135 | "\n", 136 | "\n", 137 | "search.fit(X_train, y_train)\n", 138 | "\n", 139 | "search.best_score_" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 4, 145 | "id": "2c26399d-ebfc-4b06-86d9-36e49711e908", 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "text/plain": [ 151 | "{'max_depth': None,\n", 152 | " 'min_impurity_decrease': 0.029838948304784174,\n", 153 | " 'min_samples_split': 2}" 154 | ] 155 | }, 156 | "execution_count": 4, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": [ 162 | "search.best_params_" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 5, 168 | "id": "763e816b-6437-45a9-812f-8b429472d75e", 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "Training Accuracy: 0.95\n", 176 | "Test Accuracy: 0.94\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "print(f\"Training Accuracy: {search.best_estimator_.score(X_train, y_train):0.2f}\")\n", 182 | "print(f\"Test Accuracy: {search.best_estimator_.score(X_test, y_test):0.2f}\")" 183 | ] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python 3 (ipykernel)", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.9.6" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 5 207 | } 208 | -------------------------------------------------------------------------------- /hyperparameter-tuning-methods/06.1-genetic-opt.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "39329df3-1f99-4b11-9405-5969d52368a7", 6 | "metadata": {}, 7 | "source": [ 8 | "# Genetic Programming-Based Hyperparameter Optimization of a Decision Tree" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "66496ba0-a0c7-4819-9c6d-13daf80c8c9c", 14 | "metadata": {}, 15 | "source": [ 16 | "This notebook shows how to use [`sklearn-genetic-opt`](https://sklearn-genetic-opt.readthedocs.io/en/stable/) for hyperparameter optimization based on genetic algorithms (evolutionary programming). If you are interested in understanding how it works, `sklearn-genetic-opt` is using [DEAP](https://deap.readthedocs.io/) under the hood. \n" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "id": "7f61a90e-a119-4bd0-af21-38604c5b4eec", 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "scikit-learn : 1.0\n", 30 | "sklearn : 1.0\n", 31 | "deap : 1.3.1\n", 32 | "sklearn_genetic: 0.7.0\n", 33 | "\n" 34 | ] 35 | } 36 | ], 37 | "source": [ 38 | "%load_ext watermark\n", 39 | "%watermark -p scikit-learn,sklearn,deap,sklearn_genetic" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "1f0489c2-dd9c-4e71-a78c-e01201762b37", 45 | "metadata": {}, 46 | "source": [ 47 | "## Dataset" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 2, 53 | "id": "271b17ff-5ea4-4161-8b7f-20ba8131d666", 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "Train/Valid/Test sizes: 398 80 171\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "from sklearn import model_selection\n", 66 | "from sklearn.model_selection import train_test_split\n", 67 | "from sklearn import datasets\n", 68 | "\n", 69 | "\n", 70 | "data = datasets.load_breast_cancer()\n", 71 | "X, y = data.data, data.target\n", 72 | "\n", 73 | "X_train, X_test, y_train, y_test = \\\n", 74 | " train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)\n", 75 | "\n", 76 | "X_train_sub, X_valid, y_train_sub, y_valid = \\\n", 77 | " train_test_split(X_train, y_train, test_size=0.2, random_state=1, stratify=y_train)\n", 78 | "\n", 79 | "print('Train/Valid/Test sizes:', y_train.shape[0], y_valid.shape[0], y_test.shape[0])" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "id": "0c922b01-86f0-4e83-9e36-446f99f6fe1b", 85 | "metadata": {}, 86 | "source": [ 87 | "## sklearn-genetic-opt" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "id": "72e56f33-ec33-46dd-afa2-a1b3c8b3da0b", 93 | "metadata": {}, 94 | "source": [ 95 | "- Install: `pip install sklearn-genetic-opt[all]`\n", 96 | "\n", 97 | "- More info: https://sklearn-genetic-opt.readthedocs.io/en/stable/#" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 3, 103 | "id": "96f0b4c1-803a-436f-93d5-31baab55faa5", 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "gen\tnevals\tfitness \tfitness_std\tfitness_max\tfitness_min\n", 111 | "0 \t15 \t0.773962\t0.131052 \t0.914778 \t0.628165 \n", 112 | "1 \t28 \t0.888608\t0.0588224 \t0.914778 \t0.673165 \n", 113 | "2 \t29 \t0.911424\t0.00855215 \t0.914778 \t0.88962 \n", 114 | "3 \t28 \t0.914778\t4.44089e-16\t0.914778 \t0.914778 \n", 115 | "4 \t28 \t0.914778\t4.44089e-16\t0.914778 \t0.914778 \n", 116 | "5 \t28 \t0.914778\t4.44089e-16\t0.914778 \t0.914778 \n", 117 | "6 \t29 \t0.914778\t4.44089e-16\t0.914778 \t0.914778 \n", 118 | "7 \t27 \t0.918297\t0.00703797 \t0.932373 \t0.914778 \n", 119 | "8 \t27 \t0.922989\t0.0087779 \t0.932373 \t0.914778 \n", 120 | "9 \t29 \t0.928854\t0.00703797 \t0.932373 \t0.914778 \n", 121 | "10 \t29 \t0.932373\t3.33067e-16\t0.932373 \t0.932373 \n", 122 | "11 \t29 \t0.932373\t3.33067e-16\t0.932373 \t0.932373 \n", 123 | "12 \t29 \t0.932373\t3.33067e-16\t0.932373 \t0.932373 \n", 124 | "13 \t29 \t0.932861\t0.000974684\t0.93481 \t0.932373 \n", 125 | "14 \t29 \t0.933023\t0.00107755 \t0.93481 \t0.932373 \n", 126 | "15 \t28 \t0.93416 \t0.00107755 \t0.93481 \t0.932373 \n", 127 | "16 \t29 \t0.93481 \t3.33067e-16\t0.93481 \t0.93481 \n", 128 | "17 \t29 \t0.93481 \t3.33067e-16\t0.93481 \t0.93481 \n", 129 | "18 \t29 \t0.93481 \t3.33067e-16\t0.93481 \t0.93481 \n", 130 | "19 \t28 \t0.93481 \t3.33067e-16\t0.93481 \t0.93481 \n", 131 | "20 \t29 \t0.93481 \t3.33067e-16\t0.93481 \t0.93481 \n" 132 | ] 133 | }, 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "0.9348101265822784" 138 | ] 139 | }, 140 | "execution_count": 3, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "import numpy as np\n", 147 | "import scipy.stats\n", 148 | "\n", 149 | "from sklearn_genetic import GASearchCV\n", 150 | "from sklearn_genetic.space import Integer, Categorical, Continuous\n", 151 | "from sklearn.tree import DecisionTreeClassifier\n", 152 | "\n", 153 | "\n", 154 | "clf = DecisionTreeClassifier(random_state=123)\n", 155 | "\n", 156 | "params = {\n", 157 | " 'min_samples_split': Integer(2, 12),\n", 158 | " 'min_impurity_decrease': Continuous(0.0, 0.5),\n", 159 | " 'max_depth': Categorical([6, 16, None])\n", 160 | "}\n", 161 | "\n", 162 | "search = GASearchCV(\n", 163 | " estimator=clf,\n", 164 | " cv=5,\n", 165 | " population_size=15,\n", 166 | " generations=20,\n", 167 | " tournament_size=3,\n", 168 | " elitism=True,\n", 169 | " keep_top_k=4,\n", 170 | " crossover_probability=0.9,\n", 171 | " mutation_probability=0.05,\n", 172 | " param_grid=params,\n", 173 | " criteria='max',\n", 174 | " algorithm='eaMuCommaLambda',\n", 175 | " n_jobs=-1)\n", 176 | "\n", 177 | "search.fit(X_train, y_train)\n", 178 | "\n", 179 | "search.best_score_" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 4, 185 | "id": "2c26399d-ebfc-4b06-86d9-36e49711e908", 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "text/plain": [ 191 | "{'min_samples_split': 8,\n", 192 | " 'min_impurity_decrease': 0.006258039752250311,\n", 193 | " 'max_depth': 16}" 194 | ] 195 | }, 196 | "execution_count": 4, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "search.best_params_" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 5, 208 | "id": "763e816b-6437-45a9-812f-8b429472d75e", 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stdout", 213 | "output_type": "stream", 214 | "text": [ 215 | "Training Accuracy: 0.99\n", 216 | "Test Accuracy: 0.94\n" 217 | ] 218 | } 219 | ], 220 | "source": [ 221 | "print(f\"Training Accuracy: {search.best_estimator_.score(X_train, y_train):0.2f}\")\n", 222 | "print(f\"Test Accuracy: {search.best_estimator_.score(X_test, y_test):0.2f}\")" 223 | ] 224 | } 225 | ], 226 | "metadata": { 227 | "kernelspec": { 228 | "display_name": "Python 3 (ipykernel)", 229 | "language": "python", 230 | "name": "python3" 231 | }, 232 | "language_info": { 233 | "codemirror_mode": { 234 | "name": "ipython", 235 | "version": 3 236 | }, 237 | "file_extension": ".py", 238 | "mimetype": "text/x-python", 239 | "name": "python", 240 | "nbconvert_exporter": "python", 241 | "pygments_lexer": "ipython3", 242 | "version": "3.9.6" 243 | } 244 | }, 245 | "nbformat": 4, 246 | "nbformat_minor": 5 247 | } 248 | -------------------------------------------------------------------------------- /hyperparameter-tuning-methods/figures/orion-recommendations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/hyperparameter-tuning-methods/figures/orion-recommendations.png -------------------------------------------------------------------------------- /hyperparameter-tuning-methods/lightning-hpo-optuna/README.md: -------------------------------------------------------------------------------- 1 | See https://github.com/Lightning-AI/lightning-hpo 2 | 3 | 4 | 5 | Run as 6 | 7 | ``` 8 | python -m lightning run app sweeper.py 9 | ``` -------------------------------------------------------------------------------- /hyperparameter-tuning-methods/lightning-hpo-optuna/mlp_cli2.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from lightning.pytorch.callbacks import ModelCheckpoint 4 | from lightning.pytorch.utilities.cli import LightningCLI 5 | from lightning.pytorch.loggers import CSVLogger 6 | from shared_utilities import CustomDataModule, LightningModel2 7 | from watermark import watermark 8 | 9 | if __name__ == "__main__": 10 | 11 | print(watermark(packages="torch,lightning")) 12 | 13 | print(f"The provided arguments are {sys.argv[1:]}") 14 | 15 | cli = LightningCLI( 16 | model_class=LightningModel2, 17 | datamodule_class=CustomDataModule, 18 | run=False, 19 | save_config_overwrite=True, 20 | seed_everything_default=123, 21 | trainer_defaults={ 22 | "logger": CSVLogger(save_dir="sweep-logs/", name="my-sweep"), 23 | "callbacks": [ModelCheckpoint(monitor="val_acc")], 24 | }, 25 | ) 26 | 27 | print(cli.model.hidden_units) 28 | 29 | lightning_model = LightningModel2( 30 | model=None, 31 | hidden_units=cli.model.hidden_units, 32 | learning_rate=cli.model.learning_rate, 33 | ) 34 | 35 | cli.trainer.fit(lightning_model, datamodule=cli.datamodule) 36 | #cli.trainer.test(lightning_model, datamodule=cli.datamodule) 37 | -------------------------------------------------------------------------------- /hyperparameter-tuning-methods/lightning-hpo-optuna/shared_utilities.py: -------------------------------------------------------------------------------- 1 | import lightning as L 2 | import numpy as np 3 | import torch 4 | import torch.nn.functional as F 5 | import torchmetrics 6 | from sklearn.datasets import make_classification 7 | from sklearn.model_selection import train_test_split 8 | from torch.utils.data import DataLoader, Dataset 9 | 10 | 11 | class PyTorchMLP2(torch.nn.Module): 12 | def __init__(self, num_features, hidden_units, num_classes): 13 | super().__init__() 14 | 15 | # Initialize MLP layers 16 | all_layers = [] 17 | for hidden_unit in hidden_units: 18 | layer = torch.nn.Linear(num_features, hidden_unit) 19 | all_layers.append(layer) 20 | all_layers.append(torch.nn.ReLU()) 21 | num_features = hidden_unit 22 | 23 | output_layer = torch.nn.Linear( 24 | in_features=hidden_units[-1], out_features=num_classes 25 | ) 26 | 27 | all_layers.append(output_layer) 28 | self.layers = torch.nn.Sequential(*all_layers) 29 | 30 | def forward(self, x): 31 | x = torch.flatten(x, start_dim=1) 32 | logits = self.layers(x) 33 | return logits 34 | 35 | 36 | class LightningModel2(L.LightningModule): 37 | def __init__(self, model=None, hidden_units=None, learning_rate=None): 38 | super().__init__() 39 | 40 | self.learning_rate = learning_rate 41 | self.hidden_units = hidden_units 42 | 43 | if model is None: 44 | self.model = PyTorchMLP2( 45 | num_features=100, hidden_units=hidden_units, num_classes=2 46 | ) 47 | 48 | self.save_hyperparameters(ignore=["model"]) 49 | 50 | self.train_acc = torchmetrics.Accuracy() 51 | self.val_acc = torchmetrics.Accuracy() 52 | self.test_acc = torchmetrics.Accuracy() 53 | 54 | def forward(self, x): 55 | return self.model(x) 56 | 57 | def _shared_step(self, batch): 58 | features, true_labels = batch 59 | logits = self(features) 60 | 61 | loss = F.cross_entropy(logits, true_labels) 62 | predicted_labels = torch.argmax(logits, dim=1) 63 | return loss, true_labels, predicted_labels 64 | 65 | def training_step(self, batch, batch_idx): 66 | loss, true_labels, predicted_labels = self._shared_step(batch) 67 | 68 | self.log("train_loss", loss) 69 | self.train_acc(predicted_labels, true_labels) 70 | self.log( 71 | "train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False 72 | ) 73 | return loss 74 | 75 | def validation_step(self, batch, batch_idx): 76 | loss, true_labels, predicted_labels = self._shared_step(batch) 77 | 78 | self.log("val_loss", loss, prog_bar=True) 79 | self.val_acc(predicted_labels, true_labels) 80 | self.log("val_acc", self.val_acc, prog_bar=True) 81 | 82 | def test_step(self, batch, batch_idx): 83 | loss, true_labels, predicted_labels = self._shared_step(batch) 84 | self.test_acc(predicted_labels, true_labels) 85 | self.log("test_acc", self.test_acc) 86 | 87 | def configure_optimizers(self): 88 | optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate) 89 | return optimizer 90 | 91 | 92 | class CustomDataset(Dataset): 93 | def __init__(self, feature_array, label_array, transform=None): 94 | 95 | self.x = feature_array 96 | self.y = label_array 97 | self.transform = transform 98 | 99 | def __getitem__(self, index): 100 | x = self.x[index] 101 | y = self.y[index] 102 | 103 | if self.transform is not None: 104 | x = self.transform(x) 105 | 106 | return x, y 107 | 108 | def __len__(self): 109 | return self.y.shape[0] 110 | 111 | 112 | class CustomDataModule(L.LightningDataModule): 113 | def __init__(self, data_dir="./mnist", batch_size=64): 114 | super().__init__() 115 | self.data_dir = data_dir 116 | self.batch_size = batch_size 117 | 118 | def prepare_data(self): 119 | # download 120 | pass 121 | 122 | def setup(self, stage: str): 123 | 124 | X, y = make_classification( 125 | n_samples=20000, 126 | n_features=100, 127 | n_informative=10, 128 | n_redundant=40, 129 | n_repeated=25, 130 | n_clusters_per_class=5, 131 | flip_y=0.05, 132 | class_sep=0.5, 133 | random_state=123, 134 | ) 135 | 136 | X_train, X_test, y_train, y_test = train_test_split( 137 | X, y, test_size=0.2, random_state=123 138 | ) 139 | 140 | X_train, X_val, y_train, y_val = train_test_split( 141 | X_train, y_train, test_size=0.1, random_state=123 142 | ) 143 | 144 | self.train_dataset = CustomDataset( 145 | feature_array=X_train.astype(np.float32), 146 | label_array=y_train.astype(np.int64), 147 | ) 148 | 149 | self.val_dataset = CustomDataset( 150 | feature_array=X_val.astype(np.float32), label_array=y_val.astype(np.int64) 151 | ) 152 | 153 | self.test_dataset = CustomDataset( 154 | feature_array=X_test.astype(np.float32), label_array=y_test.astype(np.int64) 155 | ) 156 | 157 | def train_dataloader(self): 158 | train_loader = DataLoader( 159 | dataset=self.train_dataset, 160 | batch_size=32, 161 | shuffle=True, 162 | drop_last=True, 163 | num_workers=0, 164 | ) 165 | return train_loader 166 | 167 | def val_dataloader(self): 168 | val_loader = DataLoader( 169 | dataset=self.val_dataset, 170 | batch_size=32, 171 | shuffle=False, 172 | num_workers=0, 173 | ) 174 | return val_loader 175 | 176 | def test_dataloader(self): 177 | test_loader = DataLoader( 178 | dataset=self.test_dataset, batch_size=32, shuffle=False, num_workers=0 179 | ) 180 | return test_loader 181 | -------------------------------------------------------------------------------- /hyperparameter-tuning-methods/lightning-hpo-optuna/sweeper.py: -------------------------------------------------------------------------------- 1 | import os.path as ops 2 | 3 | import optuna 4 | from lightning import LightningApp 5 | 6 | from lightning_hpo import Sweep 7 | from lightning_hpo.algorithm.optuna import OptunaAlgorithm 8 | from lightning_hpo.distributions.distributions import Categorical, IntUniform, LogUniform 9 | 10 | app = LightningApp( 11 | Sweep( 12 | script_path=ops.join(ops.dirname(__file__), "./mlp_cli2.py"), 13 | n_trials=3, 14 | distributions={ 15 | "model.learning_rate": LogUniform(0.001, 0.1), 16 | "model.hidden_units": Categorical(["[50, 100]", "[100, 200]"]), 17 | "data.batch_size": Categorical([32, 64]), 18 | "trainer.max_epochs": IntUniform(1, 3), 19 | }, 20 | algorithm=OptunaAlgorithm(optuna.create_study(direction="maximize")), 21 | framework="pytorch_lightning", 22 | ) 23 | ) -------------------------------------------------------------------------------- /hyperparameter-tuning-methods/sklearn-parameter-sampler.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "928c6039-6f63-4b5a-8821-2df7a647058e", 6 | "metadata": {}, 7 | "source": [ 8 | "# ParameterSampler for Randomized Search" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "0d153468-cb4f-460e-bda1-291f77ecddea", 14 | "metadata": {}, 15 | "source": [ 16 | "A little template that can be used to create submit scripts based on Randomized Search for arbitrary projects." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "id": "854032e0-f6b9-49cd-af3e-a31b4db5705b", 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "my_script.py --param1 2.7858767423914466 --param2 categorical1\n", 30 | "my_script.py --param1 1.7138837047473028 --param2 categorical1\n", 31 | "my_script.py --param1 2.205259076331565 --param2 categorical2\n", 32 | "my_script.py --param1 1.964475733730389 --param2 categorical2\n", 33 | "my_script.py --param1 3.923056793538462 --param2 categorical1\n", 34 | "my_script.py --param1 2.3187771880904404 --param2 categorical2\n", 35 | "my_script.py --param1 1.568470072776602 --param2 categorical1\n", 36 | "my_script.py --param1 2.5092680373504668 --param2 categorical2\n", 37 | "my_script.py --param1 1.7542889787184976 --param2 categorical2\n", 38 | "my_script.py --param1 2.779020708741076 --param2 categorical1\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "from sklearn.model_selection import ParameterSampler\n", 44 | "from scipy.stats import uniform\n", 45 | "\n", 46 | "distributions = dict(param1=uniform(loc=0, scale=4),\n", 47 | " param2=['categorical1', 'categorical2'])\n", 48 | "\n", 49 | "sampler = ParameterSampler(distributions, n_iter=10, random_state=123)\n", 50 | "for param in sampler:\n", 51 | " \n", 52 | " print(\"my_script.py\", end=\"\")\n", 53 | " for k in param:\n", 54 | " print(f' --{k} {param[k]}', end=\"\")\n", 55 | " print()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "id": "a57a7747-814c-48bc-b143-9a624ef1715d", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "Python 3 (ipykernel)", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.9.7" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 5 88 | } 89 | -------------------------------------------------------------------------------- /learning-rates/scheduler-comparison/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/learning-rates/scheduler-comparison/overview.png -------------------------------------------------------------------------------- /learning-rates/scheduler-comparison/shared_utilities.py: -------------------------------------------------------------------------------- 1 | import lightning as L 2 | import numpy as np 3 | import torch 4 | import torch.nn.functional as F 5 | import torchmetrics 6 | from sklearn.datasets import make_classification 7 | from sklearn.model_selection import train_test_split 8 | from torch.utils.data import DataLoader, Dataset 9 | 10 | 11 | class CustomDataset(Dataset): 12 | def __init__(self, feature_array, label_array, transform=None): 13 | 14 | self.x = feature_array 15 | self.y = label_array 16 | self.transform = transform 17 | 18 | def __getitem__(self, index): 19 | x = self.x[index] 20 | y = self.y[index] 21 | 22 | if self.transform is not None: 23 | x = self.transform(x) 24 | 25 | return x, y 26 | 27 | def __len__(self): 28 | return self.y.shape[0] 29 | 30 | 31 | class CustomDataModule(L.LightningDataModule): 32 | def __init__(self, data_dir="./mnist", batch_size=64): 33 | super().__init__() 34 | self.data_dir = data_dir 35 | self.batch_size = batch_size 36 | 37 | def prepare_data(self): 38 | # download 39 | pass 40 | 41 | def setup(self, stage: str): 42 | 43 | X, y = make_classification( 44 | n_samples=20000, 45 | n_features=100, 46 | n_informative=10, 47 | n_redundant=40, 48 | n_repeated=25, 49 | n_clusters_per_class=5, 50 | flip_y=0.05, 51 | class_sep=0.5, 52 | random_state=123, 53 | ) 54 | 55 | X_train, X_test, y_train, y_test = train_test_split( 56 | X, y, test_size=0.2, random_state=123 57 | ) 58 | 59 | X_train, X_val, y_train, y_val = train_test_split( 60 | X_train, y_train, test_size=0.1, random_state=123 61 | ) 62 | 63 | self.train_dataset = CustomDataset( 64 | feature_array=X_train.astype(np.float32), 65 | label_array=y_train.astype(np.int64), 66 | ) 67 | 68 | self.val_dataset = CustomDataset( 69 | feature_array=X_val.astype(np.float32), label_array=y_val.astype(np.int64) 70 | ) 71 | 72 | self.test_dataset = CustomDataset( 73 | feature_array=X_test.astype(np.float32), label_array=y_test.astype(np.int64) 74 | ) 75 | 76 | def train_dataloader(self): 77 | train_loader = DataLoader( 78 | dataset=self.train_dataset, 79 | batch_size=32, 80 | shuffle=True, 81 | drop_last=True, 82 | num_workers=0, 83 | ) 84 | return train_loader 85 | 86 | def val_dataloader(self): 87 | val_loader = DataLoader( 88 | dataset=self.val_dataset, 89 | batch_size=32, 90 | shuffle=False, 91 | num_workers=0, 92 | ) 93 | return val_loader 94 | 95 | def test_dataloader(self): 96 | test_loader = DataLoader( 97 | dataset=self.test_dataset, batch_size=32, shuffle=False, num_workers=0 98 | ) 99 | return test_loader 100 | -------------------------------------------------------------------------------- /losses/pytorch-loss-functions/vgg16-smile-classifier/dataset.py: -------------------------------------------------------------------------------- 1 | from torchvision import datasets 2 | from torchvision import transforms 3 | from torch.utils.data import DataLoader 4 | 5 | 6 | def get_dataloaders_celeba(batch_size, num_workers=0, 7 | train_transforms=None, 8 | test_transforms=None, 9 | download=True): 10 | 11 | if train_transforms is None: 12 | train_transforms = transforms.ToTensor() 13 | 14 | if test_transforms is None: 15 | test_transforms = transforms.ToTensor() 16 | 17 | def get_smile(attr): 18 | return attr[31] 19 | 20 | train_dataset = datasets.CelebA(root='.', 21 | split='train', 22 | transform=train_transforms, 23 | target_type='attr', 24 | target_transform=get_smile, 25 | download=download) 26 | 27 | valid_dataset = datasets.CelebA(root='.', 28 | split='valid', 29 | target_type='attr', 30 | target_transform=get_smile, 31 | transform=test_transforms) 32 | 33 | test_dataset = datasets.CelebA(root='.', 34 | split='test', 35 | target_type='attr', 36 | target_transform=get_smile, 37 | transform=test_transforms) 38 | 39 | train_loader = DataLoader(dataset=train_dataset, 40 | batch_size=batch_size, 41 | num_workers=num_workers, 42 | shuffle=True) 43 | 44 | valid_loader = DataLoader(dataset=valid_dataset, 45 | batch_size=batch_size, 46 | num_workers=num_workers, 47 | shuffle=False) 48 | 49 | test_loader = DataLoader(dataset=test_dataset, 50 | batch_size=batch_size, 51 | num_workers=num_workers, 52 | shuffle=False) 53 | 54 | return train_loader, valid_loader, test_loader 55 | -------------------------------------------------------------------------------- /losses/pytorch-loss-functions/vgg16-smile-classifier/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import pytorch_lightning as pl 4 | import torchmetrics 5 | 6 | 7 | class PyTorchVGG16Logits(nn.Module): 8 | 9 | def __init__(self, num_outputs): 10 | super().__init__() 11 | 12 | 13 | self.block_1 = nn.Sequential( 14 | nn.Conv2d(in_channels=3, 15 | out_channels=64, 16 | kernel_size=(3, 3), 17 | stride=(1, 1), 18 | # (1(32-1)- 32 + 3)/2 = 1 19 | padding=1), 20 | nn.ReLU(), 21 | nn.Conv2d(in_channels=64, 22 | out_channels=64, 23 | kernel_size=(3, 3), 24 | stride=(1, 1), 25 | padding=1), 26 | nn.ReLU(), 27 | nn.MaxPool2d(kernel_size=(2, 2), 28 | stride=(2, 2)) 29 | ) 30 | 31 | self.block_2 = nn.Sequential( 32 | nn.Conv2d(in_channels=64, 33 | out_channels=128, 34 | kernel_size=(3, 3), 35 | stride=(1, 1), 36 | padding=1), 37 | nn.ReLU(), 38 | nn.Conv2d(in_channels=128, 39 | out_channels=128, 40 | kernel_size=(3, 3), 41 | stride=(1, 1), 42 | padding=1), 43 | nn.ReLU(), 44 | nn.MaxPool2d(kernel_size=(2, 2), 45 | stride=(2, 2)) 46 | ) 47 | 48 | self.block_3 = nn.Sequential( 49 | nn.Conv2d(in_channels=128, 50 | out_channels=256, 51 | kernel_size=(3, 3), 52 | stride=(1, 1), 53 | padding=1), 54 | nn.ReLU(), 55 | nn.Conv2d(in_channels=256, 56 | out_channels=256, 57 | kernel_size=(3, 3), 58 | stride=(1, 1), 59 | padding=1), 60 | nn.ReLU(), 61 | nn.Conv2d(in_channels=256, 62 | out_channels=256, 63 | kernel_size=(3, 3), 64 | stride=(1, 1), 65 | padding=1), 66 | nn.ReLU(), 67 | nn.MaxPool2d(kernel_size=(2, 2), 68 | stride=(2, 2)) 69 | ) 70 | 71 | self.block_4 = nn.Sequential( 72 | nn.Conv2d(in_channels=256, 73 | out_channels=512, 74 | kernel_size=(3, 3), 75 | stride=(1, 1), 76 | padding=1), 77 | nn.ReLU(), 78 | nn.Conv2d(in_channels=512, 79 | out_channels=512, 80 | kernel_size=(3, 3), 81 | stride=(1, 1), 82 | padding=1), 83 | nn.ReLU(), 84 | nn.Conv2d(in_channels=512, 85 | out_channels=512, 86 | kernel_size=(3, 3), 87 | stride=(1, 1), 88 | padding=1), 89 | nn.ReLU(), 90 | nn.MaxPool2d(kernel_size=(2, 2), 91 | stride=(2, 2)) 92 | ) 93 | 94 | self.block_5 = nn.Sequential( 95 | nn.Conv2d(in_channels=512, 96 | out_channels=512, 97 | kernel_size=(3, 3), 98 | stride=(1, 1), 99 | padding=1), 100 | nn.ReLU(), 101 | nn.Conv2d(in_channels=512, 102 | out_channels=512, 103 | kernel_size=(3, 3), 104 | stride=(1, 1), 105 | padding=1), 106 | nn.ReLU(), 107 | nn.Conv2d(in_channels=512, 108 | out_channels=512, 109 | kernel_size=(3, 3), 110 | stride=(1, 1), 111 | padding=1), 112 | nn.ReLU(), 113 | nn.MaxPool2d(kernel_size=(2, 2), 114 | stride=(2, 2)) 115 | ) 116 | 117 | self.features = nn.Sequential( 118 | self.block_1, self.block_2, 119 | self.block_3, self.block_4, 120 | self.block_5 121 | ) 122 | 123 | self.classifier = nn.Sequential( 124 | nn.Flatten(), 125 | nn.Linear(512*4*4, 4096), 126 | nn.ReLU(True), 127 | nn.Dropout(p=0.5), 128 | nn.Linear(4096, 4096), 129 | nn.ReLU(True), 130 | nn.Dropout(p=0.5), 131 | nn.Linear(4096, num_outputs), 132 | ) 133 | 134 | #self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) 135 | 136 | for m in self.modules(): 137 | if isinstance(m, torch.nn.Conv2d): 138 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 139 | # m.weight.data.normal_(0, np.sqrt(2. / n)) 140 | m.weight.detach().normal_(0, 0.05) 141 | if m.bias is not None: 142 | m.bias.detach().zero_() 143 | elif isinstance(m, torch.nn.Linear): 144 | m.weight.detach().normal_(0, 0.05) 145 | m.bias.detach().detach().zero_() 146 | 147 | 148 | def forward(self, x): 149 | 150 | x = self.features(x) 151 | #x = self.avgpool(x) 152 | x = self.classifier(x) 153 | return x 154 | 155 | 156 | 157 | # LightningModule that receives a PyTorch model as input 158 | class LightningModelForBCE(pl.LightningModule): 159 | def __init__(self, model, learning_rate, use_logits, loss_fn): 160 | super().__init__() 161 | 162 | self.loss_fn = loss_fn 163 | self.learning_rate = learning_rate 164 | self.use_logits = use_logits 165 | # The inherited PyTorch module 166 | self.model = model 167 | 168 | # Save settings and hyperparameters to the log directory 169 | # but skip the model parameters 170 | self.save_hyperparameters(ignore=['model']) 171 | 172 | # Set up attributes for computing the accuracy 173 | self.train_acc = torchmetrics.Accuracy() 174 | self.valid_acc = torchmetrics.Accuracy() 175 | self.test_acc = torchmetrics.Accuracy() 176 | 177 | # Defining the forward method is only necessary 178 | # if you want to use a Trainer's .predict() method (optional) 179 | def forward(self, x): 180 | return self.model(x) 181 | 182 | def training_step(self, batch, batch_idx): 183 | features, true_labels = batch 184 | outputs = self(features).flatten() 185 | loss = self.loss_fn(outputs, true_labels.float()) 186 | self.log("train_loss", loss) 187 | return loss 188 | 189 | def test_step(self, batch, batch_idx): 190 | features, true_labels = batch 191 | outputs = self(features).flatten() 192 | 193 | if self.use_logits: 194 | predicted_labels = (outputs > 0.0).float() 195 | else: 196 | predicted_labels = (outputs > 0.5).float() 197 | 198 | self.test_acc(predicted_labels, true_labels) 199 | self.log("test_acc", self.test_acc, on_epoch=True, on_step=False) 200 | 201 | def configure_optimizers(self): 202 | optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate) 203 | return optimizer -------------------------------------------------------------------------------- /templates/lightning-cli/cli-configurable/README.md: -------------------------------------------------------------------------------- 1 | Check usage via: 2 | 3 | ``` 4 | python mlp_cli2.py --help 5 | ``` 6 | 7 | ``` 8 | : 9 | --model CONFIG Path to a configuration file. 10 | --model.model MODEL (type: Optional[Any], default: null) 11 | --model.learning_rate LEARNING_RATE 12 | (type: Optional[Any], default: null) 13 | 14 | : 15 | --data CONFIG Path to a configuration file. 16 | --data.data_dir DATA_DIR 17 | (type: Any, default: ./mnist) 18 | --data.batch_size BATCH_SIZE 19 | (type: Any, default: 64) 20 | ``` 21 | 22 | 23 | 24 | Usage example: 25 | 26 | ``` 27 | python mlp_cli2.py --model.learning_rate 0.1 --model.hidden_units "[100, 200]" 28 | ``` 29 | -------------------------------------------------------------------------------- /templates/lightning-cli/cli-configurable/mlp_cli2.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from lightning.pytorch.callbacks import ModelCheckpoint 4 | from lightning.pytorch.utilities.cli import LightningCLI 5 | from shared_utilities import CustomDataModule, LightningModel2 6 | from watermark import watermark 7 | 8 | if __name__ == "__main__": 9 | 10 | print(watermark(packages="torch,lightning")) 11 | 12 | print(f"The provided arguments are {sys.argv[1:]}") 13 | 14 | cli = LightningCLI( 15 | model_class=LightningModel2, 16 | datamodule_class=CustomDataModule, 17 | run=False, 18 | save_config_overwrite=True, 19 | seed_everything_default=123, 20 | trainer_defaults={ 21 | "max_epochs": 10, 22 | "callbacks": [ModelCheckpoint(monitor="val_acc")], 23 | }, 24 | ) 25 | 26 | print(cli.model.hidden_units) 27 | 28 | lightning_model = LightningModel2( 29 | model=None, 30 | hidden_units=cli.model.hidden_units, 31 | learning_rate=cli.model.learning_rate, 32 | ) 33 | 34 | cli.trainer.fit(lightning_model, datamodule=cli.datamodule) 35 | cli.trainer.test(lightning_model, datamodule=cli.datamodule) 36 | -------------------------------------------------------------------------------- /templates/lightning-cli/cli-configurable/shared_utilities.py: -------------------------------------------------------------------------------- 1 | import lightning as L 2 | import numpy as np 3 | import torch 4 | import torch.nn.functional as F 5 | import torchmetrics 6 | from sklearn.datasets import make_classification 7 | from sklearn.model_selection import train_test_split 8 | from torch.utils.data import DataLoader, Dataset 9 | 10 | 11 | class PyTorchMLP2(torch.nn.Module): 12 | def __init__(self, num_features, hidden_units, num_classes): 13 | super().__init__() 14 | 15 | # Initialize MLP layers 16 | all_layers = [] 17 | for hidden_unit in hidden_units: 18 | layer = torch.nn.Linear(num_features, hidden_unit) 19 | all_layers.append(layer) 20 | all_layers.append(torch.nn.ReLU()) 21 | num_features = hidden_unit 22 | 23 | output_layer = torch.nn.Linear( 24 | in_features=hidden_units[-1], out_features=num_classes 25 | ) 26 | 27 | all_layers.append(output_layer) 28 | self.layers = torch.nn.Sequential(*all_layers) 29 | 30 | def forward(self, x): 31 | x = torch.flatten(x, start_dim=1) 32 | logits = self.layers(x) 33 | return logits 34 | 35 | 36 | class LightningModel2(L.LightningModule): 37 | def __init__(self, model=None, hidden_units=None, learning_rate=None): 38 | super().__init__() 39 | 40 | self.learning_rate = learning_rate 41 | self.hidden_units = hidden_units 42 | 43 | if model is None: 44 | self.model = PyTorchMLP2( 45 | num_features=100, hidden_units=hidden_units, num_classes=2 46 | ) 47 | 48 | self.save_hyperparameters(ignore=["model"]) 49 | 50 | self.train_acc = torchmetrics.Accuracy() 51 | self.val_acc = torchmetrics.Accuracy() 52 | self.test_acc = torchmetrics.Accuracy() 53 | 54 | def forward(self, x): 55 | return self.model(x) 56 | 57 | def _shared_step(self, batch): 58 | features, true_labels = batch 59 | logits = self(features) 60 | 61 | loss = F.cross_entropy(logits, true_labels) 62 | predicted_labels = torch.argmax(logits, dim=1) 63 | return loss, true_labels, predicted_labels 64 | 65 | def training_step(self, batch, batch_idx): 66 | loss, true_labels, predicted_labels = self._shared_step(batch) 67 | 68 | self.log("train_loss", loss) 69 | self.train_acc(predicted_labels, true_labels) 70 | self.log( 71 | "train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False 72 | ) 73 | return loss 74 | 75 | def validation_step(self, batch, batch_idx): 76 | loss, true_labels, predicted_labels = self._shared_step(batch) 77 | 78 | self.log("val_loss", loss, prog_bar=True) 79 | self.val_acc(predicted_labels, true_labels) 80 | self.log("val_acc", self.val_acc, prog_bar=True) 81 | 82 | def test_step(self, batch, batch_idx): 83 | loss, true_labels, predicted_labels = self._shared_step(batch) 84 | self.test_acc(predicted_labels, true_labels) 85 | self.log("test_acc", self.test_acc) 86 | 87 | def configure_optimizers(self): 88 | optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate) 89 | return optimizer 90 | 91 | 92 | class CustomDataset(Dataset): 93 | def __init__(self, feature_array, label_array, transform=None): 94 | 95 | self.x = feature_array 96 | self.y = label_array 97 | self.transform = transform 98 | 99 | def __getitem__(self, index): 100 | x = self.x[index] 101 | y = self.y[index] 102 | 103 | if self.transform is not None: 104 | x = self.transform(x) 105 | 106 | return x, y 107 | 108 | def __len__(self): 109 | return self.y.shape[0] 110 | 111 | 112 | class CustomDataModule(L.LightningDataModule): 113 | def __init__(self, data_dir="./mnist", batch_size=64): 114 | super().__init__() 115 | self.data_dir = data_dir 116 | self.batch_size = batch_size 117 | 118 | def prepare_data(self): 119 | # download 120 | pass 121 | 122 | def setup(self, stage: str): 123 | 124 | X, y = make_classification( 125 | n_samples=20000, 126 | n_features=100, 127 | n_informative=10, 128 | n_redundant=40, 129 | n_repeated=25, 130 | n_clusters_per_class=5, 131 | flip_y=0.05, 132 | class_sep=0.5, 133 | random_state=123, 134 | ) 135 | 136 | X_train, X_test, y_train, y_test = train_test_split( 137 | X, y, test_size=0.2, random_state=123 138 | ) 139 | 140 | X_train, X_val, y_train, y_val = train_test_split( 141 | X_train, y_train, test_size=0.1, random_state=123 142 | ) 143 | 144 | self.train_dataset = CustomDataset( 145 | feature_array=X_train.astype(np.float32), 146 | label_array=y_train.astype(np.int64), 147 | ) 148 | 149 | self.val_dataset = CustomDataset( 150 | feature_array=X_val.astype(np.float32), label_array=y_val.astype(np.int64) 151 | ) 152 | 153 | self.test_dataset = CustomDataset( 154 | feature_array=X_test.astype(np.float32), label_array=y_test.astype(np.int64) 155 | ) 156 | 157 | def train_dataloader(self): 158 | train_loader = DataLoader( 159 | dataset=self.train_dataset, 160 | batch_size=32, 161 | shuffle=True, 162 | drop_last=True, 163 | num_workers=0, 164 | ) 165 | return train_loader 166 | 167 | def val_dataloader(self): 168 | val_loader = DataLoader( 169 | dataset=self.val_dataset, 170 | batch_size=32, 171 | shuffle=False, 172 | num_workers=0, 173 | ) 174 | return val_loader 175 | 176 | def test_dataloader(self): 177 | test_loader = DataLoader( 178 | dataset=self.test_dataset, batch_size=32, shuffle=False, num_workers=0 179 | ) 180 | return test_loader 181 | -------------------------------------------------------------------------------- /templates/lightning-cli/cli-simple/README.md: -------------------------------------------------------------------------------- 1 | Check usage via: 2 | 3 | ``` 4 | python mlp_cli.py --help 5 | ``` 6 | 7 | 8 | 9 | 10 | 11 | ``` 12 | : 13 | --model CONFIG Path to a configuration file. 14 | --model.model MODEL (type: Optional[Any], default: null) 15 | --model.learning_rate LEARNING_RATE 16 | (type: Optional[Any], default: null) 17 | 18 | : 19 | --data CONFIG Path to a configuration file. 20 | --data.data_dir DATA_DIR 21 | (type: Any, default: ./mnist) 22 | --data.batch_size BATCH_SIZE 23 | (type: Any, default: 64) 24 | ``` 25 | 26 | 27 | 28 | Usage example: 29 | 30 | ``` 31 | python mlp_cli.py --model.learning_rate 0.1 32 | ``` 33 | -------------------------------------------------------------------------------- /templates/lightning-cli/cli-simple/mlp_cli.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from lightning.pytorch.callbacks import ModelCheckpoint 4 | from lightning.pytorch.utilities.cli import LightningCLI 5 | from shared_utilities import CustomDataModule, LightningModel, PyTorchMLP 6 | from watermark import watermark 7 | 8 | if __name__ == "__main__": 9 | 10 | print(watermark(packages="torch,lightning")) 11 | 12 | print(f"The provided arguments are {sys.argv[1:]}") 13 | 14 | cli = LightningCLI( 15 | model_class=LightningModel, 16 | datamodule_class=CustomDataModule, 17 | run=False, 18 | save_config_overwrite=True, 19 | seed_everything_default=123, 20 | trainer_defaults={ 21 | "max_epochs": 10, 22 | "callbacks": [ModelCheckpoint(monitor="val_acc")], 23 | }, 24 | ) 25 | 26 | pytorch_model = PyTorchMLP(num_features=100, num_classes=2) 27 | lightning_model = LightningModel( 28 | model=pytorch_model, learning_rate=cli.model.learning_rate 29 | ) 30 | 31 | cli.trainer.fit(lightning_model, datamodule=cli.datamodule) 32 | cli.trainer.test(lightning_model, datamodule=cli.datamodule) 33 | -------------------------------------------------------------------------------- /templates/lightning-cli/cli-simple/shared_utilities.py: -------------------------------------------------------------------------------- 1 | import lightning as L 2 | import numpy as np 3 | import torch 4 | import torch.nn.functional as F 5 | import torchmetrics 6 | from sklearn.datasets import make_classification 7 | from sklearn.model_selection import train_test_split 8 | from torch.utils.data import DataLoader, Dataset 9 | 10 | 11 | class PyTorchMLP(torch.nn.Module): 12 | def __init__(self, num_features, num_classes): 13 | super().__init__() 14 | 15 | self.all_layers = torch.nn.Sequential( 16 | # 1st hidden layer 17 | torch.nn.Linear(num_features, 100), 18 | torch.nn.ReLU(), 19 | # 2nd hidden layer 20 | torch.nn.Linear(100, 50), 21 | torch.nn.ReLU(), 22 | # output layer 23 | torch.nn.Linear(50, num_classes), 24 | ) 25 | 26 | def forward(self, x): 27 | x = torch.flatten(x, start_dim=1) 28 | logits = self.all_layers(x) 29 | return logits 30 | 31 | 32 | class LightningModel(L.LightningModule): 33 | def __init__(self, model=None, learning_rate=None): 34 | super().__init__() 35 | 36 | self.learning_rate = learning_rate 37 | self.model = model 38 | 39 | self.save_hyperparameters(ignore=["model"]) 40 | 41 | self.train_acc = torchmetrics.Accuracy() 42 | self.val_acc = torchmetrics.Accuracy() 43 | self.test_acc = torchmetrics.Accuracy() 44 | 45 | def forward(self, x): 46 | return self.model(x) 47 | 48 | def _shared_step(self, batch): 49 | features, true_labels = batch 50 | logits = self(features) 51 | 52 | loss = F.cross_entropy(logits, true_labels) 53 | predicted_labels = torch.argmax(logits, dim=1) 54 | return loss, true_labels, predicted_labels 55 | 56 | def training_step(self, batch, batch_idx): 57 | loss, true_labels, predicted_labels = self._shared_step(batch) 58 | 59 | self.log("train_loss", loss) 60 | self.train_acc(predicted_labels, true_labels) 61 | self.log( 62 | "train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False 63 | ) 64 | return loss 65 | 66 | def validation_step(self, batch, batch_idx): 67 | loss, true_labels, predicted_labels = self._shared_step(batch) 68 | 69 | self.log("val_loss", loss, prog_bar=True) 70 | self.val_acc(predicted_labels, true_labels) 71 | self.log("val_acc", self.val_acc, prog_bar=True) 72 | 73 | def test_step(self, batch, batch_idx): 74 | loss, true_labels, predicted_labels = self._shared_step(batch) 75 | self.test_acc(predicted_labels, true_labels) 76 | self.log("test_acc", self.test_acc) 77 | 78 | def configure_optimizers(self): 79 | optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate) 80 | return optimizer 81 | 82 | 83 | class CustomDataset(Dataset): 84 | def __init__(self, feature_array, label_array, transform=None): 85 | 86 | self.x = feature_array 87 | self.y = label_array 88 | self.transform = transform 89 | 90 | def __getitem__(self, index): 91 | x = self.x[index] 92 | y = self.y[index] 93 | 94 | if self.transform is not None: 95 | x = self.transform(x) 96 | 97 | return x, y 98 | 99 | def __len__(self): 100 | return self.y.shape[0] 101 | 102 | 103 | class CustomDataModule(L.LightningDataModule): 104 | def __init__(self, data_dir="./mnist", batch_size=64): 105 | super().__init__() 106 | self.data_dir = data_dir 107 | self.batch_size = batch_size 108 | 109 | def prepare_data(self): 110 | # download 111 | pass 112 | 113 | def setup(self, stage: str): 114 | 115 | X, y = make_classification( 116 | n_samples=20000, 117 | n_features=100, 118 | n_informative=10, 119 | n_redundant=40, 120 | n_repeated=25, 121 | n_clusters_per_class=5, 122 | flip_y=0.05, 123 | class_sep=0.5, 124 | random_state=123, 125 | ) 126 | 127 | X_train, X_test, y_train, y_test = train_test_split( 128 | X, y, test_size=0.2, random_state=123 129 | ) 130 | 131 | X_train, X_val, y_train, y_val = train_test_split( 132 | X_train, y_train, test_size=0.1, random_state=123 133 | ) 134 | 135 | self.train_dataset = CustomDataset( 136 | feature_array=X_train.astype(np.float32), 137 | label_array=y_train.astype(np.int64), 138 | ) 139 | 140 | self.val_dataset = CustomDataset( 141 | feature_array=X_val.astype(np.float32), label_array=y_val.astype(np.int64) 142 | ) 143 | 144 | self.test_dataset = CustomDataset( 145 | feature_array=X_test.astype(np.float32), label_array=y_test.astype(np.int64) 146 | ) 147 | 148 | def train_dataloader(self): 149 | train_loader = DataLoader( 150 | dataset=self.train_dataset, 151 | batch_size=32, 152 | shuffle=True, 153 | drop_last=True, 154 | num_workers=0, 155 | ) 156 | return train_loader 157 | 158 | def val_dataloader(self): 159 | val_loader = DataLoader( 160 | dataset=self.val_dataset, 161 | batch_size=32, 162 | shuffle=False, 163 | num_workers=0, 164 | ) 165 | return val_loader 166 | 167 | def test_dataloader(self): 168 | test_loader = DataLoader( 169 | dataset=self.test_dataset, batch_size=32, shuffle=False, num_workers=0 170 | ) 171 | return test_loader 172 | -------------------------------------------------------------------------------- /templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/checkpoints/epoch=8-step=4050.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/checkpoints/epoch=8-step=4050.ckpt -------------------------------------------------------------------------------- /templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/checkpoints/last.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/checkpoints/last.ckpt -------------------------------------------------------------------------------- /templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/hparams.yaml: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /templates/modern-early-stop-with-checkpointing/logs/my-model/version_0/metrics.csv: -------------------------------------------------------------------------------- 1 | train_loss,epoch,step,val_loss,val_acc,train_acc,test_acc 2 | 0.7030411958694458,0,49,,,, 3 | 0.5976124405860901,0,99,,,, 4 | 0.561132550239563,0,149,,,, 5 | 0.5131489038467407,0,199,,,, 6 | 0.3952806293964386,0,249,,,, 7 | 0.5731342434883118,0,299,,,, 8 | 0.6261370182037354,0,349,,,, 9 | 0.4576362371444702,0,399,,,, 10 | 0.4602738320827484,0,449,,,, 11 | ,0,449,0.5030142068862915,0.7637500166893005,, 12 | ,0,449,,,0.7206944227218628, 13 | 0.5635536313056946,1,499,,,, 14 | 0.40492358803749084,1,549,,,, 15 | 0.6099315285682678,1,599,,,, 16 | 0.25897836685180664,1,649,,,, 17 | 0.4928973317146301,1,699,,,, 18 | 0.4685414731502533,1,749,,,, 19 | 0.2871425449848175,1,799,,,, 20 | 0.7240744233131409,1,849,,,, 21 | 0.6184459924697876,1,899,,,, 22 | ,1,899,0.4767351448535919,0.7762500047683716,, 23 | ,1,899,,,0.7831249833106995, 24 | 0.28222334384918213,2,949,,,, 25 | 0.46554091572761536,2,999,,,, 26 | 0.2788914144039154,2,1049,,,, 27 | 0.5021936297416687,2,1099,,,, 28 | 0.39235246181488037,2,1149,,,, 29 | 0.4701291620731354,2,1199,,,, 30 | 0.6642038226127625,2,1249,,,, 31 | 0.3132133483886719,2,1299,,,, 32 | 0.4558600187301636,2,1349,,,, 33 | ,2,1349,0.4215010106563568,0.8218749761581421,, 34 | ,2,1349,,,0.8137500286102295, 35 | 0.5070080757141113,3,1399,,,, 36 | 0.3764300048351288,3,1449,,,, 37 | 0.41579946875572205,3,1499,,,, 38 | 0.4973910450935364,3,1549,,,, 39 | 0.38687312602996826,3,1599,,,, 40 | 0.38586685061454773,3,1649,,,, 41 | 0.3792935609817505,3,1699,,,, 42 | 0.2482021301984787,3,1749,,,, 43 | 0.3135770857334137,3,1799,,,, 44 | ,3,1799,0.43951261043548584,0.8118749856948853,, 45 | ,3,1799,,,0.8353472352027893, 46 | 0.31604233384132385,4,1849,,,, 47 | 0.2924385368824005,4,1899,,,, 48 | 0.5909687280654907,4,1949,,,, 49 | 0.43162015080451965,4,1999,,,, 50 | 0.1551673710346222,4,2049,,,, 51 | 0.294137567281723,4,2099,,,, 52 | 0.27724581956863403,4,2149,,,, 53 | 0.30173832178115845,4,2199,,,, 54 | 0.3373233675956726,4,2249,,,, 55 | ,4,2249,0.3982531428337097,0.8399999737739563,, 56 | ,4,2249,,,0.8496527671813965, 57 | 0.37676140666007996,5,2299,,,, 58 | 0.24763153493404388,5,2349,,,, 59 | 0.3966788947582245,5,2399,,,, 60 | 0.31472867727279663,5,2449,,,, 61 | 0.43135133385658264,5,2499,,,, 62 | 0.20865577459335327,5,2549,,,, 63 | 0.4858931601047516,5,2599,,,, 64 | 0.34753215312957764,5,2649,,,, 65 | 0.31440043449401855,5,2699,,,, 66 | ,5,2699,0.3687181770801544,0.8531249761581421,, 67 | ,5,2699,,,0.8579166531562805, 68 | 0.38662850856781006,6,2749,,,, 69 | 0.3259159028530121,6,2799,,,, 70 | 0.46409744024276733,6,2849,,,, 71 | 0.39982515573501587,6,2899,,,, 72 | 0.12523581087589264,6,2949,,,, 73 | 0.2844661772251129,6,2999,,,, 74 | 0.22201985120773315,6,3049,,,, 75 | 0.18588170409202576,6,3099,,,, 76 | 0.3063857853412628,6,3149,,,, 77 | ,6,3149,0.3941511809825897,0.8487499952316284,, 78 | ,6,3149,,,0.867638885974884, 79 | 0.20423810184001923,7,3199,,,, 80 | 0.33459576964378357,7,3249,,,, 81 | 0.23248085379600525,7,3299,,,, 82 | 0.20521828532218933,7,3349,,,, 83 | 0.35759085416793823,7,3399,,,, 84 | 0.15906117856502533,7,3449,,,, 85 | 0.32100117206573486,7,3499,,,, 86 | 0.2697495222091675,7,3549,,,, 87 | 0.22270238399505615,7,3599,,,, 88 | ,7,3599,0.3673551082611084,0.862500011920929,, 89 | ,7,3599,,,0.8736805319786072, 90 | 0.25052112340927124,8,3649,,,, 91 | 0.31102049350738525,8,3699,,,, 92 | 0.29404163360595703,8,3749,,,, 93 | 0.15693902969360352,8,3799,,,, 94 | 0.23918089270591736,8,3849,,,, 95 | 0.15603046119213104,8,3899,,,, 96 | 0.15565559267997742,8,3949,,,, 97 | 0.3508184850215912,8,3999,,,, 98 | 0.20335736870765686,8,4049,,,, 99 | ,8,4049,0.370413213968277,0.8756250143051147,, 100 | ,8,4049,,,0.8795138597488403, 101 | 0.22157014906406403,9,4099,,,, 102 | 0.18203894793987274,9,4149,,,, 103 | 0.21458816528320312,9,4199,,,, 104 | 0.18833862245082855,9,4249,,,, 105 | 0.290866881608963,9,4299,,,, 106 | 0.2798851728439331,9,4349,,,, 107 | 0.2465393990278244,9,4399,,,, 108 | 0.2927503287792206,9,4449,,,, 109 | 0.19159245491027832,9,4499,,,, 110 | ,9,4499,0.38742703199386597,0.8575000166893005,, 111 | ,9,4499,,,0.8877778053283691, 112 | ,10,4500,,,,0.8557500243186951 113 | ,10,4500,,,,0.8622499704360962 114 | ,10,4500,,,,0.8557500243186951 115 | -------------------------------------------------------------------------------- /templates/modern-early-stop-with-checkpointing/shared_utilities.py: -------------------------------------------------------------------------------- 1 | import lightning as L 2 | import numpy as np 3 | import torch 4 | import torch.nn.functional as F 5 | import torchmetrics 6 | from sklearn.datasets import make_classification 7 | from sklearn.model_selection import train_test_split 8 | from torch.utils.data import DataLoader, Dataset 9 | 10 | 11 | class LightningModel(L.LightningModule): 12 | def __init__(self, model, learning_rate): 13 | super().__init__() 14 | 15 | self.learning_rate = learning_rate 16 | self.model = model 17 | 18 | self.train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2) 19 | self.val_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2) 20 | self.test_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2) 21 | 22 | def forward(self, x): 23 | return self.model(x) 24 | 25 | def _shared_step(self, batch): 26 | features, true_labels = batch 27 | logits = self(features) 28 | 29 | loss = F.cross_entropy(logits, true_labels) 30 | predicted_labels = torch.argmax(logits, dim=1) 31 | return loss, true_labels, predicted_labels 32 | 33 | def training_step(self, batch, batch_idx): 34 | loss, true_labels, predicted_labels = self._shared_step(batch) 35 | 36 | self.log("train_loss", loss) 37 | self.train_acc(predicted_labels, true_labels) 38 | self.log( 39 | "train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False 40 | ) 41 | return loss 42 | 43 | def validation_step(self, batch, batch_idx): 44 | loss, true_labels, predicted_labels = self._shared_step(batch) 45 | 46 | self.log("val_loss", loss, prog_bar=True) 47 | self.val_acc(predicted_labels, true_labels) 48 | self.log("val_acc", self.val_acc, prog_bar=True) 49 | 50 | def test_step(self, batch, batch_idx): 51 | loss, true_labels, predicted_labels = self._shared_step(batch) 52 | self.test_acc(predicted_labels, true_labels) 53 | self.log("test_acc", self.test_acc) 54 | 55 | def configure_optimizers(self): 56 | optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate) 57 | return optimizer 58 | 59 | 60 | class CustomDataset(Dataset): 61 | def __init__(self, feature_array, label_array, transform=None): 62 | 63 | self.x = feature_array 64 | self.y = label_array 65 | self.transform = transform 66 | 67 | def __getitem__(self, index): 68 | x = self.x[index] 69 | y = self.y[index] 70 | 71 | if self.transform is not None: 72 | x = self.transform(x) 73 | 74 | return x, y 75 | 76 | def __len__(self): 77 | return self.y.shape[0] 78 | 79 | 80 | class CustomDataModule(L.LightningDataModule): 81 | def __init__(self, data_dir="./mnist", batch_size=64): 82 | super().__init__() 83 | self.data_dir = data_dir 84 | self.batch_size = batch_size 85 | 86 | def prepare_data(self): 87 | # download 88 | pass 89 | 90 | def setup(self, stage: str): 91 | 92 | X, y = make_classification( 93 | n_samples=20000, 94 | n_features=100, 95 | n_informative=10, 96 | n_redundant=40, 97 | n_repeated=25, 98 | n_clusters_per_class=5, 99 | flip_y=0.05, 100 | class_sep=0.5, 101 | random_state=123, 102 | ) 103 | 104 | X_train, X_test, y_train, y_test = train_test_split( 105 | X, y, test_size=0.2, random_state=123 106 | ) 107 | 108 | X_train, X_val, y_train, y_val = train_test_split( 109 | X_train, y_train, test_size=0.1, random_state=123 110 | ) 111 | 112 | self.train_dataset = CustomDataset( 113 | feature_array=X_train.astype(np.float32), 114 | label_array=y_train.astype(np.int64), 115 | ) 116 | 117 | self.val_dataset = CustomDataset( 118 | feature_array=X_val.astype(np.float32), label_array=y_val.astype(np.int64) 119 | ) 120 | 121 | self.test_dataset = CustomDataset( 122 | feature_array=X_test.astype(np.float32), label_array=y_test.astype(np.int64) 123 | ) 124 | 125 | def train_dataloader(self): 126 | train_loader = DataLoader( 127 | dataset=self.train_dataset, 128 | batch_size=32, 129 | shuffle=True, 130 | drop_last=True, 131 | num_workers=0, 132 | ) 133 | return train_loader 134 | 135 | def val_dataloader(self): 136 | val_loader = DataLoader( 137 | dataset=self.val_dataset, 138 | batch_size=32, 139 | shuffle=False, 140 | num_workers=0, 141 | ) 142 | return val_loader 143 | 144 | def test_dataloader(self): 145 | test_loader = DataLoader( 146 | dataset=self.test_dataset, batch_size=32, shuffle=False, num_workers=0 147 | ) 148 | return test_loader 149 | -------------------------------------------------------------------------------- /templates/pl_classifier/README.md: -------------------------------------------------------------------------------- 1 | # Classifier Project Template 2 | 3 | 4 | 5 | This is a classifier template code for re-use. In this specific instance, it's MobileNet v3 (large) on CIFAR-10 (rescaled to ImageNet size, 224x224). 6 | 7 | 8 | 9 | I recommend setting up this project as follows: 10 | 11 | 12 | 13 | ## 1 - Set up a fresh environment 14 | 15 | ```bash 16 | conda create -n clf-template python=3.8 17 | conda activate clf-template 18 | ``` 19 | 20 | 21 | 22 | ## 2 - Install project requirements 23 | 24 | 25 | ```bash 26 | pip install -r requirements.txt 27 | ``` 28 | 29 | 30 | 31 | ## 3 - Install utility code as a Python package 32 | 33 | This is optional and only required if you want to run the code outside this reposistory. 34 | 35 | Assuming you are inside this folder, run 36 | 37 | ```bash 38 | pip install -e . 39 | ``` 40 | 41 | 42 | 43 | ## 4 - Inspect the Dataset 44 | 45 | 46 | 47 | Run the notebook [./notebooks/4_inspecting-the-dataset.ipynb](./notebooks/4_inspecting-the-dataset.ipynb). 48 | 49 | 50 | 51 | ## 5 - Run the Main Training Script 52 | 53 | 54 | Run the [main.py](main.py) code as follows, e.g., on a server: 55 | 56 | ```bash 57 | python main.py --output_path my-results \ 58 | --mixed_precision true \ 59 | --num_epochs 10 \ 60 | --batch_size 128 \ 61 | --learning_rate 0.0005 \ 62 | --num_epochs 10 \ 63 | --accelerator gpu \ 64 | --num_devices 4 \ 65 | --strategy ddp_spawn 66 | --log_accuracy true \ 67 | ``` 68 | 69 | - Run this script with different hyperparameter settings. 70 | - You can change `--num_devices` to `"auto"` to utilize all GPUs on the given machine. 71 | 72 | 73 | 74 | 75 | 76 | 77 | ## 6 - Inspect the results 78 | 79 | Run the notebook [./notebooks/6_inspecting-the-dataset.ipynb](./notebooks/6_evaluating-the-results.ipynb). 80 | 81 | 82 | 83 | ## 7 - Iterate 84 | 85 | - Repeat steps 4-7 with modified datasets, models, and so forth. 86 | 87 | 88 | 89 | ## 8 - Use the Final Model 90 | 91 | - See the [Inference in Production](https://pytorch-lightning.readthedocs.io/en/stable/common/production_inference.html) docs for your use case. 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /templates/pl_classifier/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | 4 | import lightning as L # pip install lightning 5 | import torch 6 | from lightning.pytorch.callbacks import ModelCheckpoint 7 | from lightning.pytorch.loggers import CSVLogger 8 | from torchvision import transforms 9 | from watermark import watermark # pip install watermark 10 | 11 | from my_classifier_template.dataset import Cifar10DataModule 12 | from my_classifier_template.model import LightningClassifier 13 | 14 | 15 | def parse_cmdline_args(parser=None): 16 | 17 | if parser is None: 18 | parser = argparse.ArgumentParser() 19 | 20 | parser.add_argument("--accelerator", type=str, default="auto") 21 | 22 | parser.add_argument("--batch_size", type=int, default=32) 23 | 24 | parser.add_argument("--data_path", type=str, default="./data") 25 | 26 | parser.add_argument("--learning_rate", type=float, default=0.0005) 27 | 28 | parser.add_argument( 29 | "--log_accuracy", type=str, choices=("true", "false"), default="true" 30 | ) 31 | 32 | parser.add_argument( 33 | "--mixed_precision", type=str, choices=("true", "false"), default="true" 34 | ) 35 | 36 | parser.add_argument("--num_epochs", type=int, default=10) 37 | 38 | parser.add_argument("--num_workers", type=int, default=3) 39 | 40 | parser.add_argument("--output_path", type=str, required=True) 41 | 42 | parser.add_argument( 43 | "--pretrained", type=str, choices=("true", "false"), default="false" 44 | ) 45 | 46 | parser.add_argument("--num_devices", nargs="+", default="auto") 47 | 48 | parser.add_argument("--device_numbers", type=str, default="") 49 | 50 | parser.add_argument("--random_seed", type=int, default=-1) 51 | 52 | parser.add_argument("--strategy", type=str, default="") 53 | 54 | parser.set_defaults(feature=True) 55 | args = parser.parse_args() 56 | 57 | if not args.strategy: 58 | args.strategy = None 59 | 60 | if args.num_devices != "auto": 61 | args.devices = int(args.num_devices[0]) 62 | if args.device_numbers: 63 | args.devices = [int(i) for i in args.device_numbers.split(",")] 64 | 65 | d = {"true": True, "false": False} 66 | 67 | args.log_accuracy = d[args.log_accuracy] 68 | args.pretrained = d[args.pretrained] 69 | args.mixed_precision = d[args.mixed_precision] 70 | if args.mixed_precision: 71 | args.mixed_precision = 16 72 | else: 73 | args.mixed_precision = 32 74 | 75 | return args 76 | 77 | 78 | if __name__ == "__main__": 79 | 80 | print(watermark()) 81 | print(watermark(packages="torch,pytorch_lightning")) 82 | 83 | parser = argparse.ArgumentParser() 84 | args = parse_cmdline_args(parser) 85 | 86 | torch.manual_seed(args.random_seed) 87 | 88 | custom_train_transform = transforms.Compose( 89 | [ 90 | transforms.Resize((256, 256)), 91 | transforms.RandomCrop((224, 224)), 92 | transforms.ToTensor(), 93 | ] 94 | ) 95 | 96 | custom_test_transform = transforms.Compose( 97 | [ 98 | transforms.Resize((256, 256)), 99 | transforms.CenterCrop((224, 224)), 100 | transforms.ToTensor(), 101 | ] 102 | ) 103 | 104 | data_module = Cifar10DataModule( 105 | batch_size=args.batch_size, 106 | data_path=args.data_path, 107 | num_workers=args.num_workers, 108 | train_transform=custom_train_transform, 109 | test_transform=custom_test_transform, 110 | ) 111 | 112 | pytorch_model = torch.hub.load( 113 | "pytorch/vision:v0.11.0", "mobilenet_v3_large", pretrained=args.pretrained 114 | ) 115 | 116 | pytorch_model.classifier[-1] = torch.nn.Linear( 117 | in_features=1280, out_features=10 # as in original 118 | ) # number of class labels in Cifar-10) 119 | 120 | lightning_model = LightningClassifier( 121 | pytorch_model, learning_rate=args.learning_rate, log_accuracy=args.log_accuracy 122 | ) 123 | 124 | if args.log_accuracy: 125 | callbacks = [ 126 | ModelCheckpoint( 127 | save_top_k=1, mode="max", monitor="valid_acc" 128 | ) # save top 1 model 129 | ] 130 | else: 131 | callbacks = [ 132 | ModelCheckpoint( 133 | save_top_k=1, mode="min", monitor="valid_loss" 134 | ) # save top 1 model 135 | ] 136 | 137 | logger = CSVLogger(save_dir=args.output_path, name="my-model") 138 | 139 | trainer = L.Trainer( 140 | max_epochs=args.num_epochs, 141 | callbacks=callbacks, 142 | accelerator=args.accelerator, 143 | devices=args.devices, 144 | logger=logger, 145 | strategy=args.strategy, 146 | precision=args.mixed_precision, 147 | deterministic=False, 148 | log_every_n_steps=10, 149 | ) 150 | 151 | start_time = time.time() 152 | trainer.fit(model=lightning_model, datamodule=data_module) 153 | 154 | train_time = time.time() 155 | runtime = (train_time - start_time) / 60 156 | print(f"Training took {runtime:.2f} min.") 157 | 158 | # setup data on host machine 159 | data_module.prepare_data() 160 | data_module.setup() 161 | 162 | before = time.time() 163 | val_acc = trainer.test(dataloaders=data_module.val_dataloader()) 164 | runtime = (time.time() - before) / 60 165 | print(f"Inference on the validation set took {runtime:.2f} min.") 166 | 167 | runtime = (time.time() - start_time) / 60 168 | print(f"The total runtime was {runtime:.2f} min.") 169 | 170 | print("Validation accuracy:", val_acc) 171 | 172 | print("Trainer log dir:", trainer.logger.log_dir) 173 | 174 | path = trainer.checkpoint_callback.best_model_path 175 | print("Best model path:", path) 176 | -------------------------------------------------------------------------------- /templates/pl_classifier/my_classifier_template/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/machine-learning-notes/8c8e867930d000131e463a00c864ce42e8661cad/templates/pl_classifier/my_classifier_template/__init__.py -------------------------------------------------------------------------------- /templates/pl_classifier/my_classifier_template/dataset.py: -------------------------------------------------------------------------------- 1 | import pytorch_lightning as pl 2 | from torch.utils.data import DataLoader 3 | from torch.utils.data.dataset import random_split 4 | from torchvision import datasets, transforms 5 | 6 | 7 | class Cifar10DataModule(pl.LightningDataModule): 8 | def __init__( 9 | self, 10 | batch_size, 11 | train_transform=None, 12 | test_transform=None, 13 | num_workers=4, 14 | data_path="./", 15 | ): 16 | super().__init__() 17 | self.data_path = data_path 18 | self.batch_size = batch_size 19 | self.num_workers = num_workers 20 | self.custom_train_transform = train_transform 21 | self.custom_test_transform = test_transform 22 | 23 | def prepare_data(self): 24 | datasets.CIFAR10(root=self.data_path, download=True) 25 | return 26 | 27 | def setup(self, stage=None): 28 | 29 | if self.custom_train_transform is None: 30 | self.train_transform = transforms.Compose( 31 | [ 32 | transforms.Resize((70, 70)), 33 | transforms.RandomCrop((64, 64)), 34 | transforms.ToTensor(), 35 | ] 36 | ) 37 | else: 38 | self.train_transform = self.custom_train_transform 39 | 40 | if self.custom_train_transform is None: 41 | self.test_transform = transforms.Compose( 42 | [ 43 | transforms.Resize((70, 70)), 44 | transforms.CenterCrop((64, 64)), 45 | transforms.ToTensor(), 46 | ] 47 | ) 48 | else: 49 | self.test_transform = self.custom_test_transform 50 | 51 | train = datasets.CIFAR10( 52 | root=self.data_path, 53 | train=True, 54 | transform=self.train_transform, 55 | download=False, 56 | ) 57 | 58 | self.test = datasets.CIFAR10( 59 | root=self.data_path, 60 | train=False, 61 | transform=self.test_transform, 62 | download=False, 63 | ) 64 | 65 | self.train, self.valid = random_split(train, lengths=[45000, 5000]) 66 | 67 | def train_dataloader(self): 68 | train_loader = DataLoader( 69 | dataset=self.train, 70 | batch_size=self.batch_size, 71 | drop_last=True, 72 | shuffle=True, 73 | persistent_workers=True, 74 | num_workers=self.num_workers, 75 | ) 76 | return train_loader 77 | 78 | def val_dataloader(self): 79 | valid_loader = DataLoader( 80 | dataset=self.valid, 81 | batch_size=self.batch_size, 82 | drop_last=False, 83 | persistent_workers=True, 84 | shuffle=False, 85 | num_workers=self.num_workers, 86 | ) 87 | return valid_loader 88 | 89 | def test_dataloader(self): 90 | test_loader = DataLoader( 91 | dataset=self.test, 92 | batch_size=self.batch_size, 93 | drop_last=False, 94 | persistent_workers=True, 95 | shuffle=False, 96 | num_workers=self.num_workers, 97 | ) 98 | return test_loader 99 | -------------------------------------------------------------------------------- /templates/pl_classifier/my_classifier_template/model.py: -------------------------------------------------------------------------------- 1 | import pytorch_lightning as pl 2 | import torch 3 | import torchmetrics 4 | 5 | 6 | # LightningModule that receives a PyTorch model as input 7 | class LightningClassifier(pl.LightningModule): 8 | def __init__(self, model, learning_rate, log_accuracy): 9 | super().__init__() 10 | 11 | self.log_accuracy = log_accuracy 12 | 13 | # Note that the other __init__ parameters will be available as 14 | # self.hparams.argname after calling self.save_hyperparameters below 15 | 16 | # The inherited PyTorch module 17 | self.model = model 18 | if hasattr(model, "dropout_proba"): 19 | self.dropout_proba = model.dropout_proba 20 | 21 | # Save settings and hyperparameters to the log directory 22 | # but skip the model parameters 23 | self.save_hyperparameters(ignore=["model"]) 24 | 25 | # Set up attributes for computing the accuracy 26 | self.train_acc = torchmetrics.Accuracy() 27 | self.valid_acc = torchmetrics.Accuracy() 28 | self.test_acc = torchmetrics.Accuracy() 29 | 30 | # Defining the forward method is only necessary 31 | # if you want to use a Trainer's .predict() method (optional) 32 | def forward(self, x): 33 | return self.model(x) 34 | 35 | # A common forward step to compute the loss and labels 36 | # this is used for training, validation, and testing below 37 | def _shared_step(self, batch): 38 | features, true_labels = batch 39 | logits = self(features) 40 | loss = torch.nn.functional.cross_entropy(logits, true_labels) 41 | predicted_labels = torch.argmax(logits, dim=1) 42 | 43 | return loss, true_labels, predicted_labels 44 | 45 | def training_step(self, batch, batch_idx): 46 | loss, true_labels, predicted_labels = self._shared_step(batch) 47 | self.log("train_loss", loss) 48 | 49 | # Do another forward pass in .eval() mode to compute accuracy 50 | # while accountingfor Dropout, BatchNorm etc. behavior 51 | # during evaluation (inference) 52 | self.model.eval() 53 | with torch.no_grad(): 54 | _, true_labels, predicted_labels = self._shared_step(batch) 55 | 56 | if self.log_accuracy: 57 | self.train_acc(predicted_labels, true_labels) 58 | self.log("train_acc", self.train_acc, on_epoch=True, on_step=False) 59 | self.model.train() 60 | 61 | return loss # this is passed to the optimzer for training 62 | 63 | def validation_step(self, batch, batch_idx): 64 | loss, true_labels, predicted_labels = self._shared_step(batch) 65 | self.log("valid_loss", loss) 66 | self.valid_acc(predicted_labels, true_labels) 67 | 68 | if self.log_accuracy: 69 | self.log( 70 | "valid_acc", 71 | self.valid_acc, 72 | on_epoch=True, 73 | on_step=False, 74 | prog_bar=True, 75 | ) 76 | 77 | def test_step(self, batch, batch_idx): 78 | loss, true_labels, predicted_labels = self._shared_step(batch) 79 | self.test_acc(predicted_labels, true_labels) 80 | self.log("test_acc", self.test_acc, on_epoch=True, on_step=False) 81 | 82 | def configure_optimizers(self): 83 | optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate) 84 | return optimizer 85 | -------------------------------------------------------------------------------- /templates/pl_classifier/my_classifier_template/plotting.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import torch 4 | 5 | 6 | def show_failures( 7 | model, 8 | data_loader, 9 | unnormalizer=None, 10 | class_dict=None, 11 | nrows=3, 12 | ncols=5, 13 | figsize=None, 14 | ): 15 | 16 | failure_features = [] 17 | failure_pred_labels = [] 18 | failure_true_labels = [] 19 | 20 | for batch_idx, (features, targets) in enumerate(data_loader): 21 | 22 | with torch.no_grad(): 23 | features = features 24 | targets = targets 25 | logits = model(features) 26 | predictions = torch.argmax(logits, dim=1) 27 | 28 | for i in range(features.shape[0]): 29 | if targets[i] != predictions[i]: 30 | failure_features.append(features[i]) 31 | failure_pred_labels.append(predictions[i]) 32 | failure_true_labels.append(targets[i]) 33 | 34 | if len(failure_true_labels) >= nrows * ncols: 35 | break 36 | 37 | features = torch.stack(failure_features, dim=0) 38 | targets = torch.tensor(failure_true_labels) 39 | predictions = torch.tensor(failure_pred_labels) 40 | 41 | fig, axes = plt.subplots( 42 | nrows=nrows, ncols=ncols, sharex=True, sharey=True, figsize=figsize 43 | ) 44 | 45 | if unnormalizer is not None: 46 | for idx in range(features.shape[0]): 47 | features[idx] = unnormalizer(features[idx]) 48 | nhwc_img = np.transpose(features, axes=(0, 2, 3, 1)) 49 | 50 | if nhwc_img.shape[-1] == 1: 51 | nhw_img = np.squeeze(nhwc_img.numpy(), axis=3) 52 | 53 | for idx, ax in enumerate(axes.ravel()): 54 | ax.imshow(nhw_img[idx], cmap="binary") 55 | if class_dict is not None: 56 | ax.title.set_text( 57 | f"P: {class_dict[predictions[idx].item()]}" 58 | f"\nT: {class_dict[targets[idx].item()]}" 59 | ) 60 | else: 61 | ax.title.set_text(f"P: {predictions[idx]} | T: {targets[idx]}") 62 | ax.axison = False 63 | 64 | else: 65 | 66 | for idx, ax in enumerate(axes.ravel()): 67 | ax.imshow(nhwc_img[idx]) 68 | if class_dict is not None: 69 | ax.title.set_text( 70 | f"P: {class_dict[predictions[idx].item()]}" 71 | f"\nT: {class_dict[targets[idx].item()]}" 72 | ) 73 | else: 74 | ax.title.set_text(f"P: {predictions[idx]} | T: {targets[idx]}") 75 | ax.axison = False 76 | return fig, axes 77 | -------------------------------------------------------------------------------- /templates/pl_classifier/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.0.0 2 | aiohttp==3.8.1 3 | aiosignal==1.2.0 4 | appnope==0.1.3 5 | asttokens==2.0.5 6 | async-timeout==4.0.2 7 | attrs==21.4.0 8 | backcall==0.2.0 9 | cachetools==5.1.0 10 | certifi==2022.5.18.1 11 | charset-normalizer==2.0.12 12 | decorator==5.1.1 13 | executing==0.8.3 14 | frozenlist==1.3.0 15 | fsspec==2022.5.0 16 | google-auth==2.6.6 17 | google-auth-oauthlib==0.4.6 18 | grpcio==1.46.3 19 | idna==3.3 20 | importlib-metadata==4.11.4 21 | ipython==8.3.0 22 | jedi==0.18.1 23 | Markdown==3.3.7 24 | matplotlib-inline==0.1.3 25 | mlxtend==0.19.0 26 | multidict==6.0.2 27 | numpy==1.22.4 28 | oauthlib==3.2.0 29 | packaging==21.3 30 | parso==0.8.3 31 | pexpect==4.8.0 32 | pickleshare==0.7.5 33 | Pillow==9.1.1 34 | prompt-toolkit==3.0.29 35 | protobuf==3.20.1 36 | ptyprocess==0.7.0 37 | pure-eval==0.2.2 38 | pyasn1==0.4.8 39 | pyasn1-modules==0.2.8 40 | pyDeprecate==0.3.2 41 | Pygments==2.12.0 42 | pyparsing==3.0.9 43 | pytorch-lightning==1.6.3 44 | PyYAML==6.0 45 | requests==2.27.1 46 | requests-oauthlib==1.3.1 47 | rsa==4.8 48 | six==1.16.0 49 | stack-data==0.2.0 50 | tensorboard==2.9.0 51 | tensorboard-data-server==0.6.1 52 | tensorboard-plugin-wit==1.8.1 53 | torch==1.11.0 54 | torchaudio==0.11.0 55 | torchmetrics==0.8.2 56 | torchvision==0.12.0 57 | tqdm==4.64.0 58 | traitlets==5.2.1.post0 59 | typing_extensions==4.2.0 60 | urllib3==1.26.9 61 | watermark==2.3.0 62 | wcwidth==0.2.5 63 | Werkzeug==2.1.2 64 | yarl==1.7.2 65 | zipp==3.8.0 66 | -------------------------------------------------------------------------------- /templates/pl_classifier/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | setuptools.setup( 5 | name='my_classifier_template', 6 | version='0.1', 7 | author='sebastian', 8 | packages=setuptools.find_packages(), 9 | ) --------------------------------------------------------------------------------