├── .gitignore ├── .pep8speaks.yml ├── LICENSE ├── README.md ├── pytorch-lightning_ipynb ├── cnn │ ├── cnn-alexnet-cifar10.ipynb │ ├── cnn-alexnet-grouped-cifar10.ipynb │ ├── cnn-allconv.ipynb │ ├── cnn-basic.ipynb │ ├── cnn-densenet121-cifar10.ipynb │ ├── cnn-densenet121-mnist.ipynb │ ├── cnn-he-init.ipynb │ ├── cnn-lenet5-cifar10.ipynb │ ├── cnn-lenet5-mnist.ipynb │ ├── cnn-lenet5-quickdraw.ipynb │ ├── cnn-mobilenet-v2-cifar10-2.ipynb │ ├── cnn-mobilenet-v2-cifar10.ipynb │ ├── cnn-mobilenet-v3-large-cifar10.ipynb │ ├── cnn-mobilenet-v3-small-cifar10.ipynb │ ├── cnn-nin-cifar10.ipynb │ ├── cnn-vgg16-celeba.ipynb │ ├── cnn-vgg16.ipynb │ └── cnn-vgg19.ipynb ├── data-augmentation │ └── autoaugment │ │ ├── helper_utilities.py │ │ ├── with-autoaugment.ipynb │ │ ├── with-trivialaugment.ipynb │ │ └── without-augmentation.ipynb ├── kfold │ ├── baseline-light-cnn-mnist.ipynb │ └── kfold-light-cnn-mnist.ipynb ├── mlp │ ├── mlp-basic.ipynb │ ├── mlp-batchnorm.ipynb │ ├── mlp-dropout.ipynb │ └── template_classification_basic.ipynb ├── ordinal │ ├── CORAL-light_cement.ipynb │ ├── CORN-light_cement.ipynb │ ├── baseline-light_cement.ipynb │ ├── beckham2016-light_cement.ipynb │ ├── niu2016-light_cement.ipynb │ └── polat2022-light_cement.ipynb └── transformer │ ├── distilbert-finetune-last-layers.ipynb │ ├── distilbert-finetune-scheduler.ipynb │ ├── distilbert-finetuning-ii-amp │ ├── distilbert-finetuning-ii-2.ipynb │ ├── distilbert-finetuning-ii-amp16.ipynb │ ├── distilbert-finetuning-ii-ampb16.ipynb │ └── figures │ │ └── finetuning-ii.png │ ├── distilbert-finetuning-ii.ipynb │ └── figures │ ├── finetuning-i.png │ └── finetuning-ii.png ├── pytorch_ipynb ├── autoencoder │ ├── ae-basic-with-rf.ipynb │ ├── ae-basic.ipynb │ ├── ae-cnn-cvae.ipynb │ ├── ae-cnn-cvae_no-out-concat.ipynb │ ├── ae-conv-nneighbor-celeba.ipynb │ ├── ae-conv-nneighbor-quickdraw-1.ipynb │ ├── ae-conv-nneighbor.ipynb │ ├── ae-conv-var.ipynb │ ├── ae-cvae.ipynb │ ├── ae-cvae_no-out-concat.ipynb │ ├── ae-deconv-jaccard.ipynb │ ├── ae-deconv-nopool.ipynb │ ├── ae-deconv.ipynb │ └── ae-var.ipynb ├── basic-ml │ ├── logistic-regression.ipynb │ ├── perceptron.ipynb │ ├── softmax-regression-mlxtend-1.ipynb │ └── softmax-regression.ipynb ├── cnn │ ├── cnn-alexnet-cifar10-grouped.ipynb │ ├── cnn-alexnet-cifar10.ipynb │ ├── cnn-allconv.ipynb │ ├── cnn-basic.ipynb │ ├── cnn-densenet121-cifar10.ipynb │ ├── cnn-densenet121-mnist.ipynb │ ├── cnn-embetter-mobilenet.ipynb │ ├── cnn-he-init.ipynb │ ├── cnn-lenet5-cifar10.ipynb │ ├── cnn-lenet5-mnist.ipynb │ ├── cnn-lenet5-quickdraw.ipynb │ ├── cnn-mobilenet-v2-cifar10.ipynb │ ├── cnn-mobilenet-v3-large-cifar10.ipynb │ ├── cnn-mobilenet-v3-small-cifar10.ipynb │ ├── cnn-resnet101-celeba.ipynb │ ├── cnn-resnet101-cifar10.ipynb │ ├── cnn-resnet152-celeba.ipynb │ ├── cnn-resnet18-celeba-dataparallel.ipynb │ ├── cnn-resnet18-mnist.ipynb │ ├── cnn-resnet34-celeba-dataparallel.ipynb │ ├── cnn-resnet34-cifar10-pinmem.ipynb │ ├── cnn-resnet34-mnist.ipynb │ ├── cnn-resnet34-quickdraw.ipynb │ ├── cnn-resnet50-celeba-dataparallel.ipynb │ ├── cnn-resnet50-mnist.ipynb │ ├── cnn-standardized.ipynb │ ├── cnn-vgg16-cats-dogs.ipynb │ ├── cnn-vgg16-celeba-data-parallel.ipynb │ ├── cnn-vgg16-celeba.ipynb │ ├── cnn-vgg16.ipynb │ ├── cnn-vgg19.ipynb │ ├── fc-to-conv.ipynb │ ├── images │ │ ├── cats-and-dogs-download-all.png │ │ ├── celeba-files.png │ │ └── feature-extractor.png │ ├── nin-cifar10.ipynb │ ├── nin-cifar10_batchnorm.ipynb │ ├── nin-cifar10_filter-response-norm.ipynb │ └── resnet-ex-1.ipynb ├── data │ ├── iris.data │ └── perceptron_toydata.txt ├── gan │ ├── dc-wgan-1.ipynb │ ├── dcgan-cats-and-dogs.ipynb │ ├── dcgan-celeba.ipynb │ ├── gan-conv-smoothing.ipynb │ ├── gan-conv.ipynb │ ├── gan.ipynb │ ├── images │ │ ├── screenshot-downl-celeba-aligned.png │ │ └── screenshot-radford-dcgan-generator.png │ └── wgan-1.ipynb ├── gnn │ ├── gnn-basic-1.ipynb │ ├── gnn-basic-edge-1.ipynb │ └── gnn-basic-graph-spectral-1.ipynb ├── helper.py ├── helper_data.py ├── helper_evaluate.py ├── helper_plotting.py ├── helper_train.py ├── helper_utils.py ├── images │ ├── alexnet │ │ ├── alexnet-groups.png │ │ ├── alexnet-paper.png │ │ ├── grouped-convolutions.png │ │ └── grouped-convolutions.pptx │ ├── cyclical-learning-rate │ │ └── cyclical-lr.png │ ├── dataparallel │ │ ├── dataparallel.png │ │ ├── minibatch-update-dataparallel.png │ │ └── minibatch-update.png │ ├── densenet │ │ ├── densenet-fig-2.jpg │ │ └── densenet-tab-1-dnet121.jpg │ ├── fc-to-conv │ │ ├── fc-to-conv-1.png │ │ └── fc-to-conv-2.png │ ├── lenet │ │ └── lenet-5_1.jpg │ ├── manual-gradients │ │ ├── graph_1.png │ │ ├── graph_2.png │ │ └── graph_3.png │ ├── resnets │ │ ├── resnet-ex-1-1.png │ │ ├── resnet-ex-1-2.png │ │ ├── resnet-ex-1-3.png │ │ ├── resnet101 │ │ │ └── resnet101-arch-1.png │ │ ├── resnet152 │ │ │ └── resnet152-arch-1.png │ │ ├── resnet34 │ │ │ └── resnet34-arch.png │ │ └── resnet50 │ │ │ ├── resnet-50-bottleneck.png │ │ │ └── resnet50-arch-1.png │ ├── vgg16 │ │ └── vgg16-arch-table.png │ ├── vgg19 │ │ └── vgg19-arch-table.png │ └── weight-sharing │ │ ├── weight-sharing-1.png │ │ └── weight-sharing-2.png ├── kfold │ └── baseline-cnn-mnist.ipynb ├── lightning │ ├── lightning-mlp-best-model.ipynb │ ├── lightning-mlp-best-model_images │ │ └── 1.png │ ├── lightning-mlp.ipynb │ └── lightning-mlp_images │ │ ├── 1.png │ │ └── 2.png ├── mechanics │ ├── baseline_memory_plot.png │ ├── cnn-weight-sharing.ipynb │ ├── custom-data-loader-afad.ipynb │ ├── custom-data-loader-celeba.ipynb │ ├── custom-data-loader-csv.ipynb │ ├── custom-data-loader-fashionmnist.ipynb │ ├── custom-data-loader-mnist.ipynb │ ├── custom-data-loader-quickdraw.ipynb │ ├── custom-data-loader-svhn.ipynb │ ├── custom-data-loader_dating-historical-color-images.ipynb │ ├── custom-dataloader-png │ │ └── custom-dataloader-example.ipynb │ ├── dataloader-nesting.ipynb │ ├── deterministic_benchmark.ipynb │ ├── deterministic_benchmark_utils.py │ ├── figures │ │ ├── fashion-mnist-sprite.png │ │ ├── gradient-checkpointing-1.png │ │ ├── gradient-checkpointing-2.png │ │ └── gradient-checkpointing-3.png │ ├── gradient-checkpointing-nin.ipynb │ ├── manual-gradients.ipynb │ ├── mlp-sequential.ipynb │ ├── model-pipeline-vgg16.ipynb │ ├── plot-jupyter-matplotlib.ipynb │ ├── torchvision-transform-examples.ipynb │ ├── transferlearning-vgg16.ipynb │ └── validation-splits.ipynb ├── mlp │ ├── mlp-basic.ipynb │ ├── mlp-batchnorm.ipynb │ ├── mlp-dropout.ipynb │ └── mlp-fromscratch__sigmoid-mse.ipynb ├── ordinal │ ├── CORAL_cement.ipynb │ ├── CORN_cement.ipynb │ ├── baseline_cement.ipynb │ ├── beckham2016_cement.ipynb │ ├── niu2016_cement.ipynb │ └── polat2022_cement.ipynb ├── related-libraries │ └── torchmetrics-update-forward.ipynb ├── rnn │ ├── char_rnn-charlesdickens.ipynb │ ├── rnn_bi_multilayer_lstm_own_csv_agnews.ipynb │ ├── rnn_gru_packed_imdb.ipynb │ ├── rnn_lstm_bi_imdb.ipynb │ ├── rnn_lstm_packed_imdb-glove.ipynb │ ├── rnn_lstm_packed_imdb.ipynb │ ├── rnn_lstm_packed_own_csv_imdb.ipynb │ ├── rnn_simple_imdb.ipynb │ └── rnn_simple_packed_imdb.ipynb ├── transfer │ └── transferlearning-vgg16-cifar10-1.ipynb ├── transformer │ ├── 1_distilbert-as-feature-extractor.ipynb │ ├── 1_distilbert-as-feature-extractor_without-dcollate.ipynb │ ├── distilbert-benchmark │ │ ├── 1_just-hf.py │ │ ├── 2_hf_with_lightning.py │ │ └── shared.py │ ├── distilbert-embetter-feature-extractor.ipynb │ ├── distilbert-hf-finetuning.ipynb │ ├── distilbert-hf-finetuning_without-dcollate.ipynb │ ├── distilbert-v0.ipynb │ └── figures │ │ ├── feature-extractor.jpeg │ │ └── finetuning-ii.png ├── tricks │ ├── cnn-alexnet-cifar10-batchincrease.ipynb │ ├── cyclical-learning-rate.ipynb │ └── gradclipping_mlp.ipynb └── viz │ └── cnns │ └── cats-and-dogs │ ├── _cnn-basemodel__vgg16-cats-dogs.ipynb │ ├── cnn-viz-grad__vgg16-cats-dogs.ipynb │ ├── cnn-viz-guided-backprop__vgg16-cats-dogs.ipynb │ ├── datautils.py │ ├── gradient.png │ ├── images │ └── cats-and-dogs-download-all.png │ └── vgg16.py ├── templates └── pytorch_lightning │ ├── submit_command.sh │ ├── template_classification_basic.ipynb │ └── tune_classification_basic.py └── tensorflow1_ipynb ├── autoencoder ├── ae-basic.ipynb ├── ae-conv-nneighbor.ipynb └── ae-deconv.ipynb ├── basic-ml ├── logistic-regression.ipynb ├── perceptron.ipynb └── softmax-regression.ipynb ├── cnn ├── cnn-basic.ipynb └── cnn-vgg16.ipynb ├── data └── perceptron_toydata.txt ├── gan ├── gan-conv-smoothing.ipynb ├── gan-conv.ipynb └── gan.ipynb ├── helper.py ├── mechanics ├── dataset-api.ipynb ├── file-queues.ipynb ├── image-data-chunking-hdf5.ipynb ├── image-data-chunking-npz.ipynb ├── saving-and-reloading-models.ipynb └── tfrecords.ipynb ├── metric └── siamese-1.ipynb └── mlp ├── mlp-basic.ipynb ├── mlp-batchnorm.ipynb ├── mlp-dropout.ipynb └── mlp-lowlevel.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Other 2 | .pytorch-lightning_ipynb_new/ 3 | 4 | # Log files 5 | pytorch-lightning_ipynb/mlp/logs/ 6 | pytorch-lightning_ipynb/cnn/logs/ 7 | 8 | # Datasets 9 | png-files 10 | *-ubyte* 11 | pytorch-lightning_ipynb/*/data 12 | pytorch_ipynb/viz/cnns/cats-and-dogs/dogs-vs-cats 13 | pytorch_ipynb/gan/dogs-vs-cats 14 | pytorch_ipynb/viz/cnns/cats-and-dogs/dogs-vs-cats 15 | pytorch_ipynb/rnn/yelp_review_polarity_csv/ 16 | pytorch_ipynb/rnn/ag_news_csv/ 17 | pytorch_ipynb/rnn/amazon_review_polarity_csv/ 18 | HistoricalColor-ECCV2012* 19 | AFAD-Lite 20 | tarball* 21 | pytorch_ipynb/rnn/.data/ 22 | pytorch_ipynb/rnn/.vector_cache/ 23 | cifar-10-batches-py 24 | celeba_gender_attr_test.txt 25 | celeba_gender_attr_train.txt 26 | iris.h5 27 | test_32x32.mat 28 | train_32x32.mat 29 | code/model_zoo/pytorch_ipynb/svhn_cropped/ 30 | list_attr_celeba.txt 31 | list_eval_partition.txt 32 | img_align_celeba 33 | quickdraw-* 34 | *.csv 35 | *.zip 36 | *.npz 37 | *.npy 38 | *.tar.gz 39 | *ubyte.gz 40 | *archive.ics.uci.edu* 41 | code/model_zoo/cifar-10 42 | code/model_zoo/pytorch_ipynb/data 43 | 44 | # Binary PyTorch models 45 | *.pt 46 | *.state_dict 47 | 48 | # Temporary OS files 49 | .DS_Store 50 | 51 | # TensorFlow Checkpoint files 52 | checkpoint 53 | code/*/*.data-?????-of-????? 54 | code/*/*.index 55 | code/*/*.meta 56 | code/model_zoo/tensorflow_ipynb/*.data-?????-of-????? 57 | code/model_zoo/tensorflow_ipynb/*.index 58 | code/model_zoo/tensorflow_ipynb/*.meta 59 | code/model_zoo/tensorflow_ipynb/cifar-10/* 60 | 61 | # Byte-compiled / optimized / DLL files 62 | __pycache__/ 63 | *.py[cod] 64 | *$py.class 65 | 66 | # C extensions 67 | *.so 68 | 69 | # Distribution / packaging 70 | .Python 71 | env/ 72 | build/ 73 | develop-eggs/ 74 | dist/ 75 | downloads/ 76 | eggs/ 77 | .eggs/ 78 | lib/ 79 | lib64/ 80 | parts/ 81 | sdist/ 82 | var/ 83 | *.egg-info/ 84 | .installed.cfg 85 | *.egg 86 | 87 | # PyInstaller 88 | # Usually these files are written by a python script from a template 89 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 90 | *.manifest 91 | *.spec 92 | 93 | # Installer logs 94 | pip-log.txt 95 | pip-delete-this-directory.txt 96 | 97 | # Unit test / coverage reports 98 | htmlcov/ 99 | .tox/ 100 | .coverage 101 | .coverage.* 102 | .cache 103 | nosetests.xml 104 | coverage.xml 105 | *,cover 106 | .hypothesis/ 107 | 108 | # Translations 109 | *.mo 110 | *.pot 111 | 112 | # Django stuff: 113 | *.log 114 | local_settings.py 115 | 116 | # Flask stuff: 117 | instance/ 118 | .webassets-cache 119 | 120 | # Scrapy stuff: 121 | .scrapy 122 | 123 | # Sphinx documentation 124 | docs/_build/ 125 | 126 | # PyBuilder 127 | target/ 128 | 129 | # IPython Notebook 130 | .ipynb_checkpoints 131 | 132 | # pyenv 133 | .python-version 134 | 135 | # celery beat schedule file 136 | celerybeat-schedule 137 | 138 | # dotenv 139 | .env 140 | 141 | # virtualenv 142 | venv/ 143 | ENV/ 144 | 145 | # Spyder project settings 146 | .spyderproject 147 | 148 | # Rope project settings 149 | .ropeproject 150 | 151 | # Datasets 152 | MNIST* 153 | -------------------------------------------------------------------------------- /.pep8speaks.yml: -------------------------------------------------------------------------------- 1 | # File : .pep8speaks.yml 2 | 3 | scanner: 4 | diff_only: True # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned. 5 | linter: flake8 6 | 7 | flake8: 8 | max-line-length: 89 # Default is 79 in PEP 8 9 | ignore: # Errors and warnings to ignore 10 | - W504 # line break after binary operator 11 | - E402 # module level import not at top of file 12 | - E731 # do not assign a lambda expression, use a def 13 | - C406 # Unnecessary list literal - rewrite as a dict literal. 14 | - E741 # ambiguous variable name 15 | 16 | no_blank_comment: False # If True, no comment is made on PR without any errors. 17 | descending_issues_order: False # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file 18 | 19 | message: # Customize the comment made by the bot 20 | opened: # Messages when a new PR is submitted 21 | header: "Hello @{name}! Thanks for opening this PR. " 22 | # The keyword {name} is converted into the author's username 23 | footer: "" 24 | # The messages can be written as they would over GitHub 25 | updated: # Messages when new commits are added to the PR 26 | header: "Hello @{name}! Thanks for updating this PR. " 27 | footer: "" # Why to comment the link to the style guide everytime? :) 28 | no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: " -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019-2022 Sebastian Raschka 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pytorch-lightning_ipynb/data-augmentation/autoaugment/helper_utilities.py: -------------------------------------------------------------------------------- 1 | import lightning as L 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | import torch 5 | import torch.nn.functional as F 6 | import torchmetrics 7 | from torch.utils.data import DataLoader 8 | from torch.utils.data.dataset import random_split 9 | from torchvision import datasets, transforms 10 | 11 | 12 | class LightningModel(L.LightningModule): 13 | def __init__(self, model, learning_rate): 14 | super().__init__() 15 | 16 | self.learning_rate = learning_rate 17 | self.model = model 18 | 19 | self.save_hyperparameters(ignore=["model"]) 20 | 21 | self.train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=10) 22 | self.val_acc = torchmetrics.Accuracy(task="multiclass", num_classes=10) 23 | self.test_acc = torchmetrics.Accuracy(task="multiclass", num_classes=10) 24 | 25 | def forward(self, x): 26 | return self.model(x) 27 | 28 | def _shared_step(self, batch): 29 | features, true_labels = batch 30 | logits = self(features) 31 | 32 | loss = F.cross_entropy(logits, true_labels) 33 | predicted_labels = torch.argmax(logits, dim=1) 34 | return loss, true_labels, predicted_labels 35 | 36 | def training_step(self, batch, batch_idx): 37 | loss, true_labels, predicted_labels = self._shared_step(batch) 38 | 39 | self.log("train_loss", loss) 40 | self.train_acc(predicted_labels, true_labels) 41 | self.log( 42 | "train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False 43 | ) 44 | return loss 45 | 46 | def validation_step(self, batch, batch_idx): 47 | loss, true_labels, predicted_labels = self._shared_step(batch) 48 | 49 | self.log("val_loss", loss, prog_bar=True) 50 | self.val_acc(predicted_labels, true_labels) 51 | self.log("val_acc", self.val_acc, prog_bar=True) 52 | 53 | def test_step(self, batch, batch_idx): 54 | loss, true_labels, predicted_labels = self._shared_step(batch) 55 | self.test_acc(predicted_labels, true_labels) 56 | self.log("test_acc", self.test_acc) 57 | 58 | def configure_optimizers(self): 59 | optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate) 60 | return optimizer 61 | 62 | 63 | class Cifar10DataModule(L.LightningDataModule): 64 | def __init__( 65 | self, data_path="./", batch_size=64, num_workers=0, height_width=(32, 32), 66 | train_transform=None, test_transform=None 67 | ): 68 | super().__init__() 69 | self.batch_size = batch_size 70 | self.data_path = data_path 71 | self.num_workers = num_workers 72 | self.height_width = height_width 73 | self.train_transform = train_transform 74 | self.test_transform = test_transform 75 | 76 | def prepare_data(self): 77 | datasets.CIFAR10(root=self.data_path, download=True) 78 | 79 | if self.train_transform is None: 80 | self.train_transform = transforms.Compose( 81 | [ 82 | transforms.Resize(self.height_width), 83 | transforms.ToTensor(), 84 | ] 85 | ) 86 | 87 | if self.test_transform is None: 88 | self.test_transform = transforms.Compose( 89 | [ 90 | transforms.Resize(self.height_width), 91 | transforms.ToTensor(), 92 | ] 93 | ) 94 | return 95 | 96 | def setup(self, stage=None): 97 | train = datasets.CIFAR10( 98 | root=self.data_path, 99 | train=True, 100 | transform=self.train_transform, 101 | download=False, 102 | ) 103 | 104 | self.test = datasets.CIFAR10( 105 | root=self.data_path, 106 | train=False, 107 | transform=self.test_transform, 108 | download=False, 109 | ) 110 | 111 | self.train, self.valid = random_split(train, lengths=[45000, 5000]) 112 | 113 | def train_dataloader(self): 114 | train_loader = DataLoader( 115 | dataset=self.train, 116 | batch_size=self.batch_size, 117 | drop_last=True, 118 | shuffle=True, 119 | num_workers=self.num_workers, 120 | ) 121 | return train_loader 122 | 123 | def val_dataloader(self): 124 | valid_loader = DataLoader( 125 | dataset=self.valid, 126 | batch_size=self.batch_size, 127 | drop_last=False, 128 | shuffle=False, 129 | num_workers=self.num_workers, 130 | ) 131 | return valid_loader 132 | 133 | def test_dataloader(self): 134 | test_loader = DataLoader( 135 | dataset=self.test, 136 | batch_size=self.batch_size, 137 | drop_last=False, 138 | shuffle=False, 139 | num_workers=self.num_workers, 140 | ) 141 | return test_loader 142 | 143 | 144 | def plot_val_acc( 145 | log_dir, acc_ylim=(0.5, 1.0), save_loss=None, save_acc=None): 146 | 147 | metrics = pd.read_csv(f"{log_dir}/metrics.csv") 148 | 149 | aggreg_metrics = [] 150 | agg_col = "epoch" 151 | 152 | for i, dfg in metrics.groupby(agg_col): 153 | agg = dict(dfg.mean()) 154 | agg[agg_col] = i 155 | aggreg_metrics.append(agg) 156 | 157 | df_metrics = pd.DataFrame(aggreg_metrics) 158 | df_metrics[["val_acc"]].plot( 159 | grid=True, legend=True, xlabel="Epoch", ylabel="ACC" 160 | ) 161 | 162 | plt.ylim(acc_ylim) 163 | if save_acc is not None: 164 | plt.savefig(save_acc) 165 | 166 | 167 | def plot_loss_and_acc( 168 | log_dir, loss_ylim=(0.0, 0.9), acc_ylim=(0.3, 1.0), save_loss=None, save_acc=None 169 | ): 170 | 171 | metrics = pd.read_csv(f"{log_dir}/metrics.csv") 172 | 173 | aggreg_metrics = [] 174 | agg_col = "epoch" 175 | for i, dfg in metrics.groupby(agg_col): 176 | agg = dict(dfg.mean()) 177 | agg[agg_col] = i 178 | aggreg_metrics.append(agg) 179 | 180 | df_metrics = pd.DataFrame(aggreg_metrics) 181 | df_metrics[["train_loss"]].plot( 182 | grid=True, legend=True, xlabel="Epoch", ylabel="Loss" 183 | ) 184 | 185 | plt.ylim(loss_ylim) 186 | if save_loss is not None: 187 | plt.savefig(save_loss) 188 | 189 | df_metrics[["train_acc", "val_acc"]].plot( 190 | grid=True, legend=True, xlabel="Epoch", ylabel="ACC" 191 | ) 192 | 193 | plt.ylim(acc_ylim) 194 | if save_acc is not None: 195 | plt.savefig(save_acc) -------------------------------------------------------------------------------- /pytorch-lightning_ipynb/transformer/distilbert-finetuning-ii-amp/figures/finetuning-ii.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch-lightning_ipynb/transformer/distilbert-finetuning-ii-amp/figures/finetuning-ii.png -------------------------------------------------------------------------------- /pytorch-lightning_ipynb/transformer/figures/finetuning-i.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch-lightning_ipynb/transformer/figures/finetuning-i.png -------------------------------------------------------------------------------- /pytorch-lightning_ipynb/transformer/figures/finetuning-ii.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch-lightning_ipynb/transformer/figures/finetuning-ii.png -------------------------------------------------------------------------------- /pytorch_ipynb/basic-ml/softmax-regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n", 8 | "- Author: Sebastian Raschka\n", 9 | "- GitHub Repository: https://github.com/rasbt/deeplearning-models" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Sebastian Raschka \n", 22 | "\n", 23 | "CPython 3.6.8\n", 24 | "IPython 7.2.0\n", 25 | "\n", 26 | "torch 1.0.0\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "%load_ext watermark\n", 32 | "%watermark -a 'Sebastian Raschka' -v -p torch" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "- Runs on CPU or GPU (if available)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "# Model Zoo -- Softmax Regression" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "Implementation of softmax regression (multinomial logistic regression)." 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "## Imports" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 2, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "from torchvision import datasets\n", 70 | "from torchvision import transforms\n", 71 | "from torch.utils.data import DataLoader\n", 72 | "import torch.nn.functional as F\n", 73 | "import torch" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "## Settings and Dataset" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "Image batch dimensions: torch.Size([256, 1, 28, 28])\n", 93 | "Image label dimensions: torch.Size([256])\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "##########################\n", 99 | "### SETTINGS\n", 100 | "##########################\n", 101 | "\n", 102 | "# Device\n", 103 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 104 | "\n", 105 | "# Hyperparameters\n", 106 | "random_seed = 123\n", 107 | "learning_rate = 0.1\n", 108 | "num_epochs = 10\n", 109 | "batch_size = 256\n", 110 | "\n", 111 | "# Architecture\n", 112 | "num_features = 784\n", 113 | "num_classes = 10\n", 114 | "\n", 115 | "\n", 116 | "##########################\n", 117 | "### MNIST DATASET\n", 118 | "##########################\n", 119 | "\n", 120 | "train_dataset = datasets.MNIST(root='data', \n", 121 | " train=True, \n", 122 | " transform=transforms.ToTensor(), \n", 123 | " download=True)\n", 124 | "\n", 125 | "test_dataset = datasets.MNIST(root='data', \n", 126 | " train=False, \n", 127 | " transform=transforms.ToTensor())\n", 128 | "\n", 129 | "\n", 130 | "train_loader = DataLoader(dataset=train_dataset, \n", 131 | " batch_size=batch_size, \n", 132 | " shuffle=True)\n", 133 | "\n", 134 | "test_loader = DataLoader(dataset=test_dataset, \n", 135 | " batch_size=batch_size, \n", 136 | " shuffle=False)\n", 137 | "\n", 138 | "\n", 139 | "# Checking the dataset\n", 140 | "for images, labels in train_loader: \n", 141 | " print('Image batch dimensions:', images.shape)\n", 142 | " print('Image label dimensions:', labels.shape)\n", 143 | " break" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 4, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "##########################\n", 153 | "### MODEL\n", 154 | "##########################\n", 155 | "\n", 156 | "class SoftmaxRegression(torch.nn.Module):\n", 157 | "\n", 158 | " def __init__(self, num_features, num_classes):\n", 159 | " super(SoftmaxRegression, self).__init__()\n", 160 | " self.linear = torch.nn.Linear(num_features, num_classes)\n", 161 | " \n", 162 | " self.linear.weight.detach().zero_()\n", 163 | " self.linear.bias.detach().zero_()\n", 164 | " \n", 165 | " def forward(self, x):\n", 166 | " logits = self.linear(x)\n", 167 | " probas = F.softmax(logits, dim=1)\n", 168 | " return logits, probas\n", 169 | "\n", 170 | "model = SoftmaxRegression(num_features=num_features,\n", 171 | " num_classes=num_classes)\n", 172 | "\n", 173 | "model.to(device)\n", 174 | "\n", 175 | "##########################\n", 176 | "### COST AND OPTIMIZER\n", 177 | "##########################\n", 178 | "\n", 179 | "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) " 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 5, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "Epoch: 001/010 | Batch 000/234 | Cost: 2.3026\n", 192 | "Epoch: 001/010 | Batch 050/234 | Cost: 0.7941\n", 193 | "Epoch: 001/010 | Batch 100/234 | Cost: 0.5651\n", 194 | "Epoch: 001/010 | Batch 150/234 | Cost: 0.4603\n", 195 | "Epoch: 001/010 | Batch 200/234 | Cost: 0.4822\n", 196 | "Epoch: 001/010 training accuracy: 88.04%\n", 197 | "Epoch: 002/010 | Batch 000/234 | Cost: 0.4105\n", 198 | "Epoch: 002/010 | Batch 050/234 | Cost: 0.4415\n", 199 | "Epoch: 002/010 | Batch 100/234 | Cost: 0.4367\n", 200 | "Epoch: 002/010 | Batch 150/234 | Cost: 0.4289\n", 201 | "Epoch: 002/010 | Batch 200/234 | Cost: 0.3926\n", 202 | "Epoch: 002/010 training accuracy: 89.37%\n", 203 | "Epoch: 003/010 | Batch 000/234 | Cost: 0.4112\n", 204 | "Epoch: 003/010 | Batch 050/234 | Cost: 0.3579\n", 205 | "Epoch: 003/010 | Batch 100/234 | Cost: 0.3013\n", 206 | "Epoch: 003/010 | Batch 150/234 | Cost: 0.3258\n", 207 | "Epoch: 003/010 | Batch 200/234 | Cost: 0.4254\n", 208 | "Epoch: 003/010 training accuracy: 89.98%\n", 209 | "Epoch: 004/010 | Batch 000/234 | Cost: 0.3988\n", 210 | "Epoch: 004/010 | Batch 050/234 | Cost: 0.3690\n", 211 | "Epoch: 004/010 | Batch 100/234 | Cost: 0.3459\n", 212 | "Epoch: 004/010 | Batch 150/234 | Cost: 0.4030\n", 213 | "Epoch: 004/010 | Batch 200/234 | Cost: 0.3240\n", 214 | "Epoch: 004/010 training accuracy: 90.35%\n", 215 | "Epoch: 005/010 | Batch 000/234 | Cost: 0.3265\n", 216 | "Epoch: 005/010 | Batch 050/234 | Cost: 0.3673\n", 217 | "Epoch: 005/010 | Batch 100/234 | Cost: 0.3085\n", 218 | "Epoch: 005/010 | Batch 150/234 | Cost: 0.3183\n", 219 | "Epoch: 005/010 | Batch 200/234 | Cost: 0.3316\n", 220 | "Epoch: 005/010 training accuracy: 90.64%\n", 221 | "Epoch: 006/010 | Batch 000/234 | Cost: 0.4518\n", 222 | "Epoch: 006/010 | Batch 050/234 | Cost: 0.3863\n", 223 | "Epoch: 006/010 | Batch 100/234 | Cost: 0.3620\n", 224 | "Epoch: 006/010 | Batch 150/234 | Cost: 0.3733\n", 225 | "Epoch: 006/010 | Batch 200/234 | Cost: 0.3289\n", 226 | "Epoch: 006/010 training accuracy: 90.86%\n", 227 | "Epoch: 007/010 | Batch 000/234 | Cost: 0.3450\n", 228 | "Epoch: 007/010 | Batch 050/234 | Cost: 0.2289\n", 229 | "Epoch: 007/010 | Batch 100/234 | Cost: 0.3073\n", 230 | "Epoch: 007/010 | Batch 150/234 | Cost: 0.2750\n", 231 | "Epoch: 007/010 | Batch 200/234 | Cost: 0.3456\n", 232 | "Epoch: 007/010 training accuracy: 91.00%\n", 233 | "Epoch: 008/010 | Batch 000/234 | Cost: 0.4900\n", 234 | "Epoch: 008/010 | Batch 050/234 | Cost: 0.3479\n", 235 | "Epoch: 008/010 | Batch 100/234 | Cost: 0.2343\n", 236 | "Epoch: 008/010 | Batch 150/234 | Cost: 0.3059\n", 237 | "Epoch: 008/010 | Batch 200/234 | Cost: 0.3684\n", 238 | "Epoch: 008/010 training accuracy: 91.22%\n", 239 | "Epoch: 009/010 | Batch 000/234 | Cost: 0.3762\n", 240 | "Epoch: 009/010 | Batch 050/234 | Cost: 0.2976\n", 241 | "Epoch: 009/010 | Batch 100/234 | Cost: 0.2690\n", 242 | "Epoch: 009/010 | Batch 150/234 | Cost: 0.2610\n", 243 | "Epoch: 009/010 | Batch 200/234 | Cost: 0.3140\n", 244 | "Epoch: 009/010 training accuracy: 91.34%\n", 245 | "Epoch: 010/010 | Batch 000/234 | Cost: 0.2790\n", 246 | "Epoch: 010/010 | Batch 050/234 | Cost: 0.3070\n", 247 | "Epoch: 010/010 | Batch 100/234 | Cost: 0.3300\n", 248 | "Epoch: 010/010 | Batch 150/234 | Cost: 0.2520\n", 249 | "Epoch: 010/010 | Batch 200/234 | Cost: 0.3301\n", 250 | "Epoch: 010/010 training accuracy: 91.40%\n" 251 | ] 252 | } 253 | ], 254 | "source": [ 255 | "# Manual seed for deterministic data loader\n", 256 | "torch.manual_seed(random_seed)\n", 257 | "\n", 258 | "\n", 259 | "def compute_accuracy(model, data_loader):\n", 260 | " correct_pred, num_examples = 0, 0\n", 261 | " \n", 262 | " for features, targets in data_loader:\n", 263 | " features = features.view(-1, 28*28).to(device)\n", 264 | " targets = targets.to(device)\n", 265 | " logits, probas = model(features)\n", 266 | " _, predicted_labels = torch.max(probas, 1)\n", 267 | " num_examples += targets.size(0)\n", 268 | " correct_pred += (predicted_labels == targets).sum()\n", 269 | " \n", 270 | " return correct_pred.float() / num_examples * 100\n", 271 | " \n", 272 | "\n", 273 | "for epoch in range(num_epochs):\n", 274 | " for batch_idx, (features, targets) in enumerate(train_loader):\n", 275 | " \n", 276 | " features = features.view(-1, 28*28).to(device)\n", 277 | " targets = targets.to(device)\n", 278 | " \n", 279 | " ### FORWARD AND BACK PROP\n", 280 | " logits, probas = model(features)\n", 281 | " \n", 282 | " # note that the PyTorch implementation of\n", 283 | " # CrossEntropyLoss works with logits, not\n", 284 | " # probabilities\n", 285 | " cost = F.cross_entropy(logits, targets)\n", 286 | " optimizer.zero_grad()\n", 287 | " cost.backward()\n", 288 | " \n", 289 | " ### UPDATE MODEL PARAMETERS\n", 290 | " optimizer.step()\n", 291 | " \n", 292 | " ### LOGGING\n", 293 | " if not batch_idx % 50:\n", 294 | " print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' \n", 295 | " %(epoch+1, num_epochs, batch_idx, \n", 296 | " len(train_dataset)//batch_size, cost))\n", 297 | " \n", 298 | " with torch.set_grad_enabled(False):\n", 299 | " print('Epoch: %03d/%03d training accuracy: %.2f%%' % (\n", 300 | " epoch+1, num_epochs, \n", 301 | " compute_accuracy(model, train_loader)))" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 6, 307 | "metadata": {}, 308 | "outputs": [ 309 | { 310 | "name": "stdout", 311 | "output_type": "stream", 312 | "text": [ 313 | "Test accuracy: 91.77%\n" 314 | ] 315 | } 316 | ], 317 | "source": [ 318 | "print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 7, 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "name": "stdout", 328 | "output_type": "stream", 329 | "text": [ 330 | "torch 1.0.0\n", 331 | "\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "%watermark -iv" 337 | ] 338 | } 339 | ], 340 | "metadata": { 341 | "kernelspec": { 342 | "display_name": "Python 3", 343 | "language": "python", 344 | "name": "python3" 345 | }, 346 | "language_info": { 347 | "codemirror_mode": { 348 | "name": "ipython", 349 | "version": 3 350 | }, 351 | "file_extension": ".py", 352 | "mimetype": "text/x-python", 353 | "name": "python", 354 | "nbconvert_exporter": "python", 355 | "pygments_lexer": "ipython3", 356 | "version": "3.7.1" 357 | }, 358 | "toc": { 359 | "nav_menu": {}, 360 | "number_sections": true, 361 | "sideBar": true, 362 | "skip_h1_title": false, 363 | "title_cell": "Table of Contents", 364 | "title_sidebar": "Contents", 365 | "toc_cell": false, 366 | "toc_position": {}, 367 | "toc_section_display": true, 368 | "toc_window_display": false 369 | } 370 | }, 371 | "nbformat": 4, 372 | "nbformat_minor": 2 373 | } 374 | -------------------------------------------------------------------------------- /pytorch_ipynb/cnn/cnn-embetter-mobilenet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "4936d1e6-5e7d-4e22-ae35-8e888927ce2d", 6 | "metadata": {}, 7 | "source": [ 8 | "# Use Pre-trained CNN as feature extractor" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "bf9e9fb5-7383-475a-93e1-decdbd59c247", 14 | "metadata": {}, 15 | "source": [ 16 | "Use MobileNetv3 as a feature extractor via the [embetter](https://github.com/koaning/embetter) scikit-learn library and [timm](https://github.com/rwightman/pytorch-image-models). Train a logistic regression classifier in scikit-learn on the embeddings." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "96b717c7-54c9-40dc-ba80-0fb47da2c0bd", 22 | "metadata": {}, 23 | "source": [ 24 | "![](images/feature-extractor.png)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "id": "64d1dd64-c45b-4092-84d1-1bfcd0998f15", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import os\n", 35 | "\n", 36 | "# pip install gitpython\n", 37 | "from git import Repo\n", 38 | "\n", 39 | "if not os.path.exists(\"mnist-pngs\"):\n", 40 | " Repo.clone_from(\"https://github.com/rasbt/mnist-pngs\", \"mnist-pngs\")" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "id": "3a892538-8d9b-4420-9525-26d1a4b37ae3", 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "import os\n", 51 | "import pandas as pd\n", 52 | "\n", 53 | "for name in (\"train\", \"test\"):\n", 54 | "\n", 55 | " df = pd.read_csv(f\"mnist-pngs/{name}.csv\")\n", 56 | " df[\"filepath\"] = df[\"filepath\"].apply(lambda x: \"mnist-pngs/\" + x)\n", 57 | " df = df.sample(frac=1, random_state=123).reset_index(drop=True)\n", 58 | " df.to_csv(f\"mnist-pngs/{name}_shuffled.csv\", index=None)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 3, 64 | "id": "5885e9bb-d43f-46ca-83ae-e2d63edcbb37", 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "application/vnd.jupyter.widget-view+json": { 70 | "model_id": "1fba0fcb2b1f408f85013da0d1694dd3", 71 | "version_major": 2, 72 | "version_minor": 0 73 | }, 74 | "text/plain": [ 75 | " 0%| | 0/60 [00:00)" 106 | ] 107 | }, 108 | "execution_count": 18, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "torch.relu(fc(inputs.view(-1, 4)))" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Convolution with Kernels equal to the input size" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "![](../images/fc-to-conv/fc-to-conv-1.png)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "We can obtain the same outputs if we use convolutional layers where the kernel size is the same size as the input feature array:" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 19, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "torch.Size([2, 1, 2, 2])\n", 148 | "torch.Size([2])\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "conv = torch.nn.Conv2d(in_channels=1,\n", 154 | " out_channels=2,\n", 155 | " kernel_size=inputs.squeeze(dim=(0)).squeeze(dim=(0)).size())\n", 156 | "print(conv.weight.size())\n", 157 | "print(conv.bias.size())" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 20, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "conv.weight.data = weights.view(2, 1, 2, 2)\n", 167 | "conv.bias.data = bias" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 21, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/plain": [ 178 | "tensor([[[[14.9000]],\n", 179 | "\n", 180 | " [[19.0000]]]], grad_fn=)" 181 | ] 182 | }, 183 | "execution_count": 21, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "torch.relu(conv(inputs))" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "## Convolution with 1x1 Kernels" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "![](../images/fc-to-conv/fc-to-conv-2.png)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "Similarly, we can replace the fully connected layer using a convolutional layer when we reshape the input image into a num_inputs x 1 x 1 image:" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 23, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "text/plain": [ 221 | "tensor([[[[14.9000]],\n", 222 | "\n", 223 | " [[19.0000]]]], grad_fn=)" 224 | ] 225 | }, 226 | "execution_count": 23, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "conv = torch.nn.Conv2d(in_channels=4,\n", 233 | " out_channels=2,\n", 234 | " kernel_size=(1, 1))\n", 235 | "\n", 236 | "conv.weight.data = weights.view(2, 4, 1, 1)\n", 237 | "conv.bias.data = bias\n", 238 | "torch.relu(conv(inputs.view(1, 4, 1, 1)))" 239 | ] 240 | } 241 | ], 242 | "metadata": { 243 | "kernelspec": { 244 | "display_name": "Python 3", 245 | "language": "python", 246 | "name": "python3" 247 | }, 248 | "language_info": { 249 | "codemirror_mode": { 250 | "name": "ipython", 251 | "version": 3 252 | }, 253 | "file_extension": ".py", 254 | "mimetype": "text/x-python", 255 | "name": "python", 256 | "nbconvert_exporter": "python", 257 | "pygments_lexer": "ipython3", 258 | "version": "3.7.1" 259 | } 260 | }, 261 | "nbformat": 4, 262 | "nbformat_minor": 2 263 | } 264 | -------------------------------------------------------------------------------- /pytorch_ipynb/cnn/images/cats-and-dogs-download-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/cnn/images/cats-and-dogs-download-all.png -------------------------------------------------------------------------------- /pytorch_ipynb/cnn/images/celeba-files.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/cnn/images/celeba-files.png -------------------------------------------------------------------------------- /pytorch_ipynb/cnn/images/feature-extractor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/cnn/images/feature-extractor.png -------------------------------------------------------------------------------- /pytorch_ipynb/data/iris.data: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica 151 | 152 | -------------------------------------------------------------------------------- /pytorch_ipynb/data/perceptron_toydata.txt: -------------------------------------------------------------------------------- 1 | 0.77 -1.14 0 2 | -0.33 1.44 0 3 | 0.91 -3.07 0 4 | -0.37 -1.91 0 5 | -1.84 -1.13 0 6 | -1.50 0.34 0 7 | -0.63 -1.53 0 8 | -1.08 -1.23 0 9 | 0.39 -1.99 0 10 | -1.26 -2.90 0 11 | -5.27 -0.78 0 12 | -0.49 -2.74 0 13 | 1.48 -3.74 0 14 | -1.64 -1.96 0 15 | 0.45 0.36 0 16 | -1.48 -1.17 0 17 | -2.94 -4.47 0 18 | -2.19 -1.48 0 19 | 0.02 -0.02 0 20 | -2.24 -2.12 0 21 | -3.17 -3.69 0 22 | -4.09 1.03 0 23 | -2.41 -2.31 0 24 | -3.45 -0.61 0 25 | -3.96 -2.00 0 26 | -2.95 -1.16 0 27 | -2.42 -3.35 0 28 | -1.74 -1.10 0 29 | -1.61 -1.28 0 30 | -2.59 -2.21 0 31 | -2.64 -2.20 0 32 | -2.84 -4.12 0 33 | -1.45 -2.26 0 34 | -3.98 -1.05 0 35 | -2.97 -1.63 0 36 | -0.68 -1.52 0 37 | -0.10 -3.43 0 38 | -1.14 -2.66 0 39 | -2.92 -2.51 0 40 | -2.14 -1.62 0 41 | -3.33 -0.44 0 42 | -1.05 -3.85 0 43 | 0.38 0.95 0 44 | -0.05 -1.95 0 45 | -3.20 -0.22 0 46 | -2.26 0.01 0 47 | -1.41 -0.33 0 48 | -1.20 -0.71 0 49 | -1.69 0.80 0 50 | -1.52 -1.14 0 51 | 3.88 0.65 1 52 | 0.73 2.97 1 53 | 0.83 3.94 1 54 | 1.59 1.25 1 55 | 3.92 3.48 1 56 | 3.87 2.91 1 57 | 1.14 3.91 1 58 | 1.73 2.80 1 59 | 2.95 1.84 1 60 | 2.61 2.92 1 61 | 2.38 0.90 1 62 | 2.30 3.33 1 63 | 1.31 1.85 1 64 | 1.56 3.85 1 65 | 2.67 2.41 1 66 | 1.23 2.54 1 67 | 1.33 2.03 1 68 | 1.36 2.68 1 69 | 2.58 1.79 1 70 | 2.40 0.91 1 71 | 0.51 2.44 1 72 | 2.17 2.64 1 73 | 4.38 2.94 1 74 | 1.09 3.12 1 75 | 0.68 1.54 1 76 | 1.93 3.71 1 77 | 1.26 1.17 1 78 | 1.90 1.34 1 79 | 3.13 0.92 1 80 | 0.85 1.56 1 81 | 1.50 3.93 1 82 | 2.95 2.09 1 83 | 0.77 2.84 1 84 | 1.00 0.46 1 85 | 3.19 2.32 1 86 | 2.92 2.32 1 87 | 2.86 1.35 1 88 | 0.97 2.68 1 89 | 1.20 1.31 1 90 | 1.54 2.02 1 91 | 1.65 0.63 1 92 | 1.36 -0.22 1 93 | 2.63 0.40 1 94 | 0.90 2.05 1 95 | 1.26 3.54 1 96 | 0.71 2.27 1 97 | 1.96 0.83 1 98 | 2.52 1.83 1 99 | 2.77 2.82 1 100 | 4.16 3.34 1 101 | -------------------------------------------------------------------------------- /pytorch_ipynb/gan/images/screenshot-downl-celeba-aligned.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/gan/images/screenshot-downl-celeba-aligned.png -------------------------------------------------------------------------------- /pytorch_ipynb/gan/images/screenshot-radford-dcgan-generator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/gan/images/screenshot-radford-dcgan-generator.png -------------------------------------------------------------------------------- /pytorch_ipynb/helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import imageio 3 | import numpy as np 4 | 5 | 6 | def quickdraw_npy_to_imagefile(inpath, outpath, filetype='png', subset=None): 7 | """ 8 | Creates a folder with subfolders for each image class 9 | from the Quickdraw dataset (https://quickdraw.withgoogle.com) 10 | downloaded in .npy format. 11 | 12 | To download the .npy formatted dataset: 13 | gsutil -m cp gs://quickdraw_dataset/full/numpy_bitmap/*.npy quickdraw-png 14 | 15 | Usage example: 16 | quickdraw_npy_to_imagefile('quickdraw-npy', 'quickdraw-png') 17 | 18 | Parameters 19 | ---------- 20 | 21 | inpath : str 22 | string specifying the path to the input directory containing 23 | the .npy files 24 | 25 | outpath : str 26 | string specifying the path for the output images 27 | 28 | subset : tuple or list (default=None) 29 | A subset of categories to consider. E.g. 30 | `("lollipop", "binoculars", "mouse", "basket")` 31 | 32 | """ 33 | if not os.path.exists(outpath): 34 | os.mkdir(outpath) 35 | npy_list = [i for i in os.listdir(inpath) if i.endswith('.npy')] 36 | 37 | if subset: 38 | npy_list = [i for i in npy_list if i.split('.npy')[0] in subset] 39 | 40 | if not len(npy_list): 41 | raise ValueError('No .npy files found in %s' % inpath) 42 | 43 | npy_paths = [os.path.join(inpath, i) for i in npy_list] 44 | 45 | for i, j in zip(npy_list, npy_paths): 46 | 47 | label = (i.split('-')[-1]).split('.npy')[0] 48 | folder = os.path.join(outpath, label) 49 | if not os.path.exists(folder): 50 | os.mkdir(folder) 51 | X = np.load(j) 52 | 53 | cnt = 0 54 | for row in X: 55 | img_array = row.reshape(28, 28) 56 | assert cnt < 1000000 57 | outfile = os.path.join(folder, '%s_%06d.%s' % ( 58 | label, cnt, filetype)) 59 | imageio.imwrite(outfile, 60 | img_array[:, :]) 61 | cnt += 1 62 | -------------------------------------------------------------------------------- /pytorch_ipynb/helper_data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import sampler 3 | from torchvision import datasets 4 | from torch.utils.data import DataLoader 5 | from torch.utils.data import SubsetRandomSampler 6 | from torchvision import transforms 7 | 8 | 9 | class UnNormalize(object): 10 | def __init__(self, mean, std): 11 | self.mean = mean 12 | self.std = std 13 | 14 | def __call__(self, tensor): 15 | """ 16 | Parameters: 17 | ------------ 18 | tensor (Tensor): Tensor image of size (C, H, W) to be normalized. 19 | 20 | Returns: 21 | ------------ 22 | Tensor: Normalized image. 23 | """ 24 | for t, m, s in zip(tensor, self.mean, self.std): 25 | t.mul_(s).add_(m) 26 | return tensor 27 | 28 | 29 | def get_dataloaders_mnist(batch_size, num_workers=0, 30 | validation_fraction=None, 31 | train_transforms=None, 32 | test_transforms=None): 33 | 34 | if train_transforms is None: 35 | train_transforms = transforms.ToTensor() 36 | 37 | if test_transforms is None: 38 | test_transforms = transforms.ToTensor() 39 | 40 | train_dataset = datasets.MNIST(root='data', 41 | train=True, 42 | transform=train_transforms, 43 | download=True) 44 | 45 | valid_dataset = datasets.MNIST(root='data', 46 | train=True, 47 | transform=test_transforms) 48 | 49 | test_dataset = datasets.MNIST(root='data', 50 | train=False, 51 | transform=test_transforms) 52 | 53 | if validation_fraction is not None: 54 | num = int(validation_fraction * 60000) 55 | train_indices = torch.arange(0, 60000 - num) 56 | valid_indices = torch.arange(60000 - num, 60000) 57 | 58 | train_sampler = SubsetRandomSampler(train_indices) 59 | valid_sampler = SubsetRandomSampler(valid_indices) 60 | 61 | valid_loader = DataLoader(dataset=valid_dataset, 62 | batch_size=batch_size, 63 | num_workers=num_workers, 64 | sampler=valid_sampler) 65 | 66 | train_loader = DataLoader(dataset=train_dataset, 67 | batch_size=batch_size, 68 | num_workers=num_workers, 69 | drop_last=True, 70 | sampler=train_sampler) 71 | 72 | else: 73 | train_loader = DataLoader(dataset=train_dataset, 74 | batch_size=batch_size, 75 | num_workers=num_workers, 76 | drop_last=True, 77 | shuffle=True) 78 | 79 | test_loader = DataLoader(dataset=test_dataset, 80 | batch_size=batch_size, 81 | num_workers=num_workers, 82 | shuffle=False) 83 | 84 | if validation_fraction is None: 85 | return train_loader, test_loader 86 | else: 87 | return train_loader, valid_loader, test_loader 88 | 89 | 90 | def get_dataloaders_cifar10(batch_size, num_workers=0, 91 | validation_fraction=None, 92 | train_transforms=None, 93 | test_transforms=None): 94 | 95 | if train_transforms is None: 96 | train_transforms = transforms.ToTensor() 97 | 98 | if test_transforms is None: 99 | test_transforms = transforms.ToTensor() 100 | 101 | train_dataset = datasets.CIFAR10(root='data', 102 | train=True, 103 | transform=train_transforms, 104 | download=True) 105 | 106 | valid_dataset = datasets.CIFAR10(root='data', 107 | train=True, 108 | transform=test_transforms) 109 | 110 | test_dataset = datasets.CIFAR10(root='data', 111 | train=False, 112 | transform=test_transforms) 113 | 114 | if validation_fraction is not None: 115 | num = int(validation_fraction * 50000) 116 | train_indices = torch.arange(0, 50000 - num) 117 | valid_indices = torch.arange(50000 - num, 50000) 118 | 119 | train_sampler = SubsetRandomSampler(train_indices) 120 | valid_sampler = SubsetRandomSampler(valid_indices) 121 | 122 | valid_loader = DataLoader(dataset=valid_dataset, 123 | batch_size=batch_size, 124 | num_workers=num_workers, 125 | sampler=valid_sampler) 126 | 127 | train_loader = DataLoader(dataset=train_dataset, 128 | batch_size=batch_size, 129 | num_workers=num_workers, 130 | drop_last=True, 131 | sampler=train_sampler) 132 | 133 | else: 134 | train_loader = DataLoader(dataset=train_dataset, 135 | batch_size=batch_size, 136 | num_workers=num_workers, 137 | drop_last=True, 138 | shuffle=True) 139 | 140 | test_loader = DataLoader(dataset=test_dataset, 141 | batch_size=batch_size, 142 | num_workers=num_workers, 143 | shuffle=False) 144 | 145 | if validation_fraction is None: 146 | return train_loader, test_loader 147 | else: 148 | return train_loader, valid_loader, test_loader 149 | 150 | -------------------------------------------------------------------------------- /pytorch_ipynb/helper_evaluate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import numpy as np 4 | from itertools import product 5 | 6 | 7 | def compute_accuracy(model, data_loader, device): 8 | model.eval() 9 | with torch.no_grad(): 10 | correct_pred, num_examples = 0, 0 11 | for i, (features, targets) in enumerate(data_loader): 12 | 13 | features = features.to(device) 14 | targets = targets.to(device) 15 | 16 | logits = model(features) 17 | if isinstance(logits, torch.distributed.rpc.api.RRef): 18 | logits = logits.local_value() 19 | _, predicted_labels = torch.max(logits, 1) 20 | num_examples += targets.size(0) 21 | correct_pred += (predicted_labels == targets).sum() 22 | return correct_pred.float()/num_examples * 100 23 | 24 | 25 | def compute_epoch_loss(model, data_loader, device): 26 | model.eval() 27 | curr_loss, num_examples = 0., 0 28 | with torch.no_grad(): 29 | for features, targets in data_loader: 30 | features = features.to(device) 31 | targets = targets.to(device) 32 | logits = model(features) 33 | if isinstance(logits, torch.distributed.rpc.api.RRef): 34 | logits = logits.local_value() 35 | loss = F.cross_entropy(logits, targets, reduction='sum') 36 | num_examples += targets.size(0) 37 | curr_loss += loss 38 | 39 | curr_loss = curr_loss / num_examples 40 | return curr_loss 41 | 42 | 43 | def compute_confusion_matrix(model, data_loader, device): 44 | 45 | all_targets, all_predictions = [], [] 46 | with torch.no_grad(): 47 | 48 | for i, (features, targets) in enumerate(data_loader): 49 | 50 | features = features.to(device) 51 | targets = targets 52 | logits = model(features) 53 | _, predicted_labels = torch.max(logits, 1) 54 | all_targets.extend(targets.to('cpu')) 55 | all_predictions.extend(predicted_labels.to('cpu')) 56 | 57 | all_predictions = all_predictions 58 | all_predictions = np.array(all_predictions) 59 | all_targets = np.array(all_targets) 60 | 61 | class_labels = np.unique(np.concatenate((all_targets, all_predictions))) 62 | if class_labels.shape[0] == 1: 63 | if class_labels[0] != 0: 64 | class_labels = np.array([0, class_labels[0]]) 65 | else: 66 | class_labels = np.array([class_labels[0], 1]) 67 | n_labels = class_labels.shape[0] 68 | lst = [] 69 | z = list(zip(all_targets, all_predictions)) 70 | for combi in product(class_labels, repeat=2): 71 | lst.append(z.count(combi)) 72 | mat = np.asarray(lst)[:, None].reshape(n_labels, n_labels) 73 | return mat -------------------------------------------------------------------------------- /pytorch_ipynb/helper_plotting.py: -------------------------------------------------------------------------------- 1 | # imports from installed libraries 2 | import os 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import torch 6 | 7 | 8 | def plot_training_loss(minibatch_loss_list, num_epochs, iter_per_epoch, 9 | results_dir=None, averaging_iterations=100): 10 | 11 | plt.figure() 12 | ax1 = plt.subplot(1, 1, 1) 13 | ax1.plot(range(len(minibatch_loss_list)), 14 | (minibatch_loss_list), label='Minibatch Loss') 15 | 16 | if len(minibatch_loss_list) > 1000: 17 | ax1.set_ylim([ 18 | 0, np.max(minibatch_loss_list[1000:])*1.5 19 | ]) 20 | ax1.set_xlabel('Iterations') 21 | ax1.set_ylabel('Loss') 22 | 23 | ax1.plot(np.convolve(minibatch_loss_list, 24 | np.ones(averaging_iterations,)/averaging_iterations, 25 | mode='valid'), 26 | label='Running Average') 27 | ax1.legend() 28 | 29 | ################### 30 | # Set scond x-axis 31 | ax2 = ax1.twiny() 32 | newlabel = list(range(num_epochs+1)) 33 | 34 | newpos = [e*iter_per_epoch for e in newlabel] 35 | 36 | ax2.set_xticks(newpos[::10]) 37 | ax2.set_xticklabels(newlabel[::10]) 38 | 39 | ax2.xaxis.set_ticks_position('bottom') 40 | ax2.xaxis.set_label_position('bottom') 41 | ax2.spines['bottom'].set_position(('outward', 45)) 42 | ax2.set_xlabel('Epochs') 43 | ax2.set_xlim(ax1.get_xlim()) 44 | ################### 45 | 46 | plt.tight_layout() 47 | 48 | if results_dir is not None: 49 | image_path = os.path.join(results_dir, 'plot_training_loss.pdf') 50 | plt.savefig(image_path) 51 | 52 | 53 | def plot_accuracy(train_acc_list, valid_acc_list, results_dir): 54 | 55 | num_epochs = len(train_acc_list) 56 | 57 | plt.plot(np.arange(1, num_epochs+1), 58 | train_acc_list, label='Training') 59 | plt.plot(np.arange(1, num_epochs+1), 60 | valid_acc_list, label='Validation') 61 | 62 | plt.xlabel('Epoch') 63 | plt.ylabel('Accuracy') 64 | plt.legend() 65 | 66 | plt.tight_layout() 67 | 68 | if results_dir is not None: 69 | image_path = os.path.join( 70 | results_dir, 'plot_acc_training_validation.pdf') 71 | plt.savefig(image_path) 72 | 73 | 74 | def show_examples(model, data_loader, unnormalizer=None, class_dict=None): 75 | 76 | 77 | for batch_idx, (features, targets) in enumerate(data_loader): 78 | 79 | with torch.no_grad(): 80 | features = features 81 | targets = targets 82 | logits = model(features) 83 | predictions = torch.argmax(logits, dim=1) 84 | break 85 | 86 | fig, axes = plt.subplots(nrows=3, ncols=5, 87 | sharex=True, sharey=True) 88 | 89 | if unnormalizer is not None: 90 | for idx in range(features.shape[0]): 91 | features[idx] = unnormalizer(features[idx]) 92 | nhwc_img = np.transpose(features, axes=(0, 2, 3, 1)) 93 | 94 | if nhwc_img.shape[-1] == 1: 95 | nhw_img = np.squeeze(nhwc_img.numpy(), axis=3) 96 | 97 | for idx, ax in enumerate(axes.ravel()): 98 | ax.imshow(nhw_img[idx], cmap='binary') 99 | if class_dict is not None: 100 | ax.title.set_text(f'P: {class_dict[predictions[idx].item()]}' 101 | f'\nT: {class_dict[targets[idx].item()]}') 102 | else: 103 | ax.title.set_text(f'P: {predictions[idx]} | T: {targets[idx]}') 104 | ax.axison = False 105 | 106 | else: 107 | 108 | for idx, ax in enumerate(axes.ravel()): 109 | ax.imshow(nhwc_img[idx]) 110 | if class_dict is not None: 111 | ax.title.set_text(f'P: {class_dict[predictions[idx].item()]}' 112 | f'\nT: {class_dict[targets[idx].item()]}') 113 | else: 114 | ax.title.set_text(f'P: {predictions[idx]} | T: {targets[idx]}') 115 | ax.axison = False 116 | plt.tight_layout() 117 | plt.show() 118 | 119 | 120 | def plot_confusion_matrix(conf_mat, 121 | hide_spines=False, 122 | hide_ticks=False, 123 | figsize=None, 124 | cmap=None, 125 | colorbar=False, 126 | show_absolute=True, 127 | show_normed=False, 128 | class_names=None): 129 | 130 | if not (show_absolute or show_normed): 131 | raise AssertionError('Both show_absolute and show_normed are False') 132 | if class_names is not None and len(class_names) != len(conf_mat): 133 | raise AssertionError('len(class_names) should be equal to number of' 134 | 'classes in the dataset') 135 | 136 | total_samples = conf_mat.sum(axis=1)[:, np.newaxis] 137 | normed_conf_mat = conf_mat.astype('float') / total_samples 138 | 139 | fig, ax = plt.subplots(figsize=figsize) 140 | ax.grid(False) 141 | if cmap is None: 142 | cmap = plt.cm.Blues 143 | 144 | if figsize is None: 145 | figsize = (len(conf_mat)*1.25, len(conf_mat)*1.25) 146 | 147 | if show_normed: 148 | matshow = ax.matshow(normed_conf_mat, cmap=cmap) 149 | else: 150 | matshow = ax.matshow(conf_mat, cmap=cmap) 151 | 152 | if colorbar: 153 | fig.colorbar(matshow) 154 | 155 | for i in range(conf_mat.shape[0]): 156 | for j in range(conf_mat.shape[1]): 157 | cell_text = "" 158 | if show_absolute: 159 | num = conf_mat[i, j].astype(np.int64) 160 | cell_text += format(num, 'd') 161 | if show_normed: 162 | cell_text += "\n" + '(' 163 | cell_text += format(normed_conf_mat[i, j], '.2f') + ')' 164 | else: 165 | cell_text += format(normed_conf_mat[i, j], '.2f') 166 | ax.text(x=j, 167 | y=i, 168 | s=cell_text, 169 | va='center', 170 | ha='center', 171 | color="white" if normed_conf_mat[i, j] > 0.5 else "black") 172 | 173 | if class_names is not None: 174 | tick_marks = np.arange(len(class_names)) 175 | plt.xticks(tick_marks, class_names, rotation=90) 176 | plt.yticks(tick_marks, class_names) 177 | 178 | if hide_spines: 179 | ax.spines['right'].set_visible(False) 180 | ax.spines['top'].set_visible(False) 181 | ax.spines['left'].set_visible(False) 182 | ax.spines['bottom'].set_visible(False) 183 | ax.yaxis.set_ticks_position('left') 184 | ax.xaxis.set_ticks_position('bottom') 185 | if hide_ticks: 186 | ax.axes.get_yaxis().set_ticks([]) 187 | ax.axes.get_xaxis().set_ticks([]) 188 | 189 | plt.xlabel('predicted label') 190 | plt.ylabel('true label') 191 | return fig, ax -------------------------------------------------------------------------------- /pytorch_ipynb/helper_train.py: -------------------------------------------------------------------------------- 1 | from helper_evaluate import compute_accuracy 2 | from helper_evaluate import compute_epoch_loss 3 | 4 | import time 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | from collections import OrderedDict 9 | import json 10 | import subprocess 11 | import sys 12 | import xml.etree.ElementTree 13 | 14 | 15 | def train_classifier_simple_v1(num_epochs, model, optimizer, device, 16 | train_loader, valid_loader=None, 17 | loss_fn=None, logging_interval=100, 18 | skip_epoch_stats=False): 19 | 20 | log_dict = {'train_loss_per_batch': [], 21 | 'train_acc_per_epoch': [], 22 | 'train_loss_per_epoch': [], 23 | 'valid_acc_per_epoch': [], 24 | 'valid_loss_per_epoch': []} 25 | 26 | if loss_fn is None: 27 | loss_fn = F.cross_entropy 28 | 29 | start_time = time.time() 30 | for epoch in range(num_epochs): 31 | 32 | model.train() 33 | for batch_idx, (features, targets) in enumerate(train_loader): 34 | 35 | features = features.to(device) 36 | targets = targets.to(device) 37 | 38 | # FORWARD AND BACK PROP 39 | logits = model(features) 40 | if isinstance(logits, torch.distributed.rpc.api.RRef): 41 | logits = logits.local_value() 42 | loss = loss_fn(logits, targets) 43 | optimizer.zero_grad() 44 | 45 | loss.backward() 46 | 47 | # UPDATE MODEL PARAMETERS 48 | optimizer.step() 49 | 50 | # LOGGING 51 | log_dict['train_loss_per_batch'].append(loss.item()) 52 | 53 | if not batch_idx % logging_interval: 54 | print('Epoch: %03d/%03d | Batch %04d/%04d | Loss: %.4f' 55 | % (epoch+1, num_epochs, batch_idx, 56 | len(train_loader), loss)) 57 | 58 | if not skip_epoch_stats: 59 | model.eval() 60 | 61 | with torch.set_grad_enabled(False): # save memory during inference 62 | 63 | train_acc = compute_accuracy(model, train_loader, device) 64 | train_loss = compute_epoch_loss(model, train_loader, device) 65 | print('***Epoch: %03d/%03d | Train. Acc.: %.3f%% | Loss: %.3f' % ( 66 | epoch+1, num_epochs, train_acc, train_loss)) 67 | log_dict['train_loss_per_epoch'].append(train_loss.item()) 68 | log_dict['train_acc_per_epoch'].append(train_acc.item()) 69 | 70 | if valid_loader is not None: 71 | valid_acc = compute_accuracy(model, valid_loader, device) 72 | valid_loss = compute_epoch_loss(model, valid_loader, device) 73 | print('***Epoch: %03d/%03d | Valid. Acc.: %.3f%% | Loss: %.3f' % ( 74 | epoch+1, num_epochs, valid_acc, valid_loss)) 75 | log_dict['valid_loss_per_epoch'].append(valid_loss.item()) 76 | log_dict['valid_acc_per_epoch'].append(valid_acc.item()) 77 | 78 | print('Time elapsed: %.2f min' % ((time.time() - start_time)/60)) 79 | 80 | print('Total Training Time: %.2f min' % ((time.time() - start_time)/60)) 81 | 82 | return log_dict 83 | 84 | 85 | def train_classifier_simple_v2( 86 | model, num_epochs, train_loader, 87 | valid_loader, test_loader, optimizer, 88 | device, logging_interval=50, 89 | best_model_save_path=None, 90 | scheduler=None, 91 | skip_train_acc=False, 92 | scheduler_on='valid_acc'): 93 | 94 | start_time = time.time() 95 | minibatch_loss_list, train_acc_list, valid_acc_list = [], [], [] 96 | best_valid_acc, best_epoch = -float('inf'), 0 97 | 98 | for epoch in range(num_epochs): 99 | 100 | model.train() 101 | for batch_idx, (features, targets) in enumerate(train_loader): 102 | 103 | features = features.to(device) 104 | targets = targets.to(device) 105 | 106 | # ## FORWARD AND BACK PROP 107 | logits = model(features) 108 | loss = torch.nn.functional.cross_entropy(logits, targets) 109 | optimizer.zero_grad() 110 | 111 | loss.backward() 112 | 113 | # ## UPDATE MODEL PARAMETERS 114 | optimizer.step() 115 | 116 | # ## LOGGING 117 | minibatch_loss_list.append(loss.item()) 118 | if not batch_idx % logging_interval: 119 | print(f'Epoch: {epoch+1:03d}/{num_epochs:03d} ' 120 | f'| Batch {batch_idx:04d}/{len(train_loader):04d} ' 121 | f'| Loss: {loss:.4f}') 122 | 123 | model.eval() 124 | with torch.no_grad(): # save memory during inference 125 | if not skip_train_acc: 126 | train_acc = compute_accuracy(model, train_loader, device=device).item() 127 | else: 128 | train_acc = float('nan') 129 | valid_acc = compute_accuracy(model, valid_loader, device=device).item() 130 | train_acc_list.append(train_acc) 131 | valid_acc_list.append(valid_acc) 132 | 133 | if valid_acc > best_valid_acc: 134 | best_valid_acc, best_epoch = valid_acc, epoch+1 135 | if best_model_save_path: 136 | torch.save(model.state_dict(), best_model_save_path) 137 | 138 | print(f'Epoch: {epoch+1:03d}/{num_epochs:03d} ' 139 | f'| Train: {train_acc :.2f}% ' 140 | f'| Validation: {valid_acc :.2f}% ' 141 | f'| Best Validation ' 142 | f'(Ep. {best_epoch:03d}): {best_valid_acc :.2f}%') 143 | 144 | elapsed = (time.time() - start_time)/60 145 | print(f'Time elapsed: {elapsed:.2f} min') 146 | 147 | if scheduler is not None: 148 | 149 | if scheduler_on == 'valid_acc': 150 | scheduler.step(valid_acc_list[-1]) 151 | elif scheduler_on == 'minibatch_loss': 152 | scheduler.step(minibatch_loss_list[-1]) 153 | else: 154 | raise ValueError('Invalid `scheduler_on` choice.') 155 | 156 | elapsed = (time.time() - start_time)/60 157 | print(f'Total Training Time: {elapsed:.2f} min') 158 | 159 | test_acc = compute_accuracy(model, test_loader, device=device) 160 | print(f'Test accuracy {test_acc :.2f}%') 161 | 162 | return minibatch_loss_list, train_acc_list, valid_acc_list -------------------------------------------------------------------------------- /pytorch_ipynb/helper_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def set_all_seeds(seed): 8 | os.environ["PL_GLOBAL_SEED"] = str(seed) 9 | random.seed(seed) 10 | np.random.seed(seed) 11 | torch.manual_seed(seed) 12 | torch.cuda.manual_seed_all(seed) 13 | 14 | 15 | def set_deterministic(): 16 | if torch.cuda.is_available(): 17 | torch.backends.cudnn.benchmark = False 18 | torch.backends.cudnn.deterministic = True 19 | torch.set_deterministic(True) 20 | -------------------------------------------------------------------------------- /pytorch_ipynb/images/alexnet/alexnet-groups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/alexnet/alexnet-groups.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/alexnet/alexnet-paper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/alexnet/alexnet-paper.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/alexnet/grouped-convolutions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/alexnet/grouped-convolutions.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/alexnet/grouped-convolutions.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/alexnet/grouped-convolutions.pptx -------------------------------------------------------------------------------- /pytorch_ipynb/images/cyclical-learning-rate/cyclical-lr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/cyclical-learning-rate/cyclical-lr.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/dataparallel/dataparallel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/dataparallel/dataparallel.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/dataparallel/minibatch-update-dataparallel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/dataparallel/minibatch-update-dataparallel.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/dataparallel/minibatch-update.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/dataparallel/minibatch-update.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/densenet/densenet-fig-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/densenet/densenet-fig-2.jpg -------------------------------------------------------------------------------- /pytorch_ipynb/images/densenet/densenet-tab-1-dnet121.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/densenet/densenet-tab-1-dnet121.jpg -------------------------------------------------------------------------------- /pytorch_ipynb/images/fc-to-conv/fc-to-conv-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/fc-to-conv/fc-to-conv-1.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/fc-to-conv/fc-to-conv-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/fc-to-conv/fc-to-conv-2.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/lenet/lenet-5_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/lenet/lenet-5_1.jpg -------------------------------------------------------------------------------- /pytorch_ipynb/images/manual-gradients/graph_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/manual-gradients/graph_1.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/manual-gradients/graph_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/manual-gradients/graph_2.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/manual-gradients/graph_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/manual-gradients/graph_3.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/resnets/resnet-ex-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/resnets/resnet-ex-1-1.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/resnets/resnet-ex-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/resnets/resnet-ex-1-2.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/resnets/resnet-ex-1-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/resnets/resnet-ex-1-3.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/resnets/resnet101/resnet101-arch-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/resnets/resnet101/resnet101-arch-1.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/resnets/resnet152/resnet152-arch-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/resnets/resnet152/resnet152-arch-1.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/resnets/resnet34/resnet34-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/resnets/resnet34/resnet34-arch.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/resnets/resnet50/resnet-50-bottleneck.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/resnets/resnet50/resnet-50-bottleneck.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/resnets/resnet50/resnet50-arch-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/resnets/resnet50/resnet50-arch-1.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/vgg16/vgg16-arch-table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/vgg16/vgg16-arch-table.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/vgg19/vgg19-arch-table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/vgg19/vgg19-arch-table.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/weight-sharing/weight-sharing-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/weight-sharing/weight-sharing-1.png -------------------------------------------------------------------------------- /pytorch_ipynb/images/weight-sharing/weight-sharing-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/images/weight-sharing/weight-sharing-2.png -------------------------------------------------------------------------------- /pytorch_ipynb/lightning/lightning-mlp-best-model_images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/lightning/lightning-mlp-best-model_images/1.png -------------------------------------------------------------------------------- /pytorch_ipynb/lightning/lightning-mlp_images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/lightning/lightning-mlp_images/1.png -------------------------------------------------------------------------------- /pytorch_ipynb/lightning/lightning-mlp_images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/lightning/lightning-mlp_images/2.png -------------------------------------------------------------------------------- /pytorch_ipynb/mechanics/baseline_memory_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/mechanics/baseline_memory_plot.png -------------------------------------------------------------------------------- /pytorch_ipynb/mechanics/custom-data-loader-csv.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n", 8 | "- Author: Sebastian Raschka\n", 9 | "- GitHub Repository: https://github.com/rasbt/deeplearning-models" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Sebastian Raschka \n", 22 | "\n", 23 | "CPython 3.7.1\n", 24 | "IPython 7.2.0\n", 25 | "\n", 26 | "torch 1.0.0\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "%load_ext watermark\n", 32 | "%watermark -a 'Sebastian Raschka' -v -p torch" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "# Model Zoo -- Using PyTorch Dataset Loading Utilities for Custom Datasets (CSV files converted to HDF5)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "This notebook provides an example for how to load a dataset from an HDF5 file created from a CSV file, using PyTorch's data loading utilities. For a more in-depth discussion, please see the official\n", 47 | "\n", 48 | "- [Data Loading and Processing Tutorial](http://pytorch.org/tutorials/beginner/data_loading_tutorial.html)\n", 49 | "- [torch.utils.data](http://pytorch.org/docs/master/data.html) API documentation\n", 50 | "\n", 51 | "An Hierarchical Data Format (HDF) is a convenient way that allows quick access to data instances during minibatch learning if a dataset is too large to fit into memory. The approach outlined in this notebook uses uses the common [HDF5](https://support.hdfgroup.org/HDF5/) format and should be accessible to any programming language or tool with an HDF5 API.\n", 52 | "\n", 53 | "**In this example, we are going to use the Iris dataset for illustrative purposes. Let's pretend it's our large training dataset that doesn't fit into memory**.\n", 54 | "\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Imports" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 2, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "import pandas as pd\n", 71 | "import numpy as np\n", 72 | "import h5py\n", 73 | "import torch\n", 74 | "from torch.utils.data import Dataset\n", 75 | "from torch.utils.data import DataLoader" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "## Converting a CSV file to HDF5" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "In this first step, we are going to process a CSV file (here, Iris) into an HDF5 database:" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 3, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "# suppose this is a large CSV that does not \n", 99 | "# fit into memory:\n", 100 | "csv_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'\n", 101 | "\n", 102 | "# Get number of lines in the CSV file if it's on your hard drive:\n", 103 | "#num_lines = subprocess.check_output(['wc', '-l', in_csv])\n", 104 | "#num_lines = int(nlines.split()[0]) \n", 105 | "num_lines = 150\n", 106 | "num_features = 4\n", 107 | "\n", 108 | "class_dict = {'Iris-setosa': 0,\n", 109 | " 'Iris-versicolor': 1,\n", 110 | " 'Iris-virginica': 2}\n", 111 | "\n", 112 | "# use 10,000 or 100,000 or so for large files\n", 113 | "chunksize = 10\n", 114 | "\n", 115 | "# this is your HDF5 database:\n", 116 | "with h5py.File('iris.h5', 'w') as h5f:\n", 117 | " \n", 118 | " # use num_features-1 if the csv file has a column header\n", 119 | " dset1 = h5f.create_dataset('features',\n", 120 | " shape=(num_lines, num_features),\n", 121 | " compression=None,\n", 122 | " dtype='float32')\n", 123 | " dset2 = h5f.create_dataset('labels',\n", 124 | " shape=(num_lines,),\n", 125 | " compression=None,\n", 126 | " dtype='int32')\n", 127 | "\n", 128 | " # change range argument from 0 -> 1 if your csv file contains a column header\n", 129 | " for i in range(0, num_lines, chunksize): \n", 130 | "\n", 131 | " df = pd.read_csv(csv_path, \n", 132 | " header=None, # no header, define column header manually later\n", 133 | " nrows=chunksize, # number of rows to read at each iteration\n", 134 | " skiprows=i) # skip rows that were already read\n", 135 | " \n", 136 | " df[4] = df[4].map(class_dict)\n", 137 | "\n", 138 | " features = df.values[:, :4]\n", 139 | " labels = df.values[:, -1]\n", 140 | " \n", 141 | " # use i-1 and i-1+10 if csv file has a column header\n", 142 | " dset1[i:i+10, :] = features\n", 143 | " dset2[i:i+10] = labels[0]" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "After creating the database, let's double-check that everything works correctly:" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 4, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "name": "stdout", 160 | "output_type": "stream", 161 | "text": [ 162 | "(150, 4)\n", 163 | "(150,)\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "with h5py.File('iris.h5', 'r') as h5f:\n", 169 | " print(h5f['features'].shape)\n", 170 | " print(h5f['labels'].shape)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 5, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "name": "stdout", 180 | "output_type": "stream", 181 | "text": [ 182 | "Features of entry no. 99: [5.7 2.8 4.1 1.3]\n", 183 | "Class label of entry no. 99: 1\n" 184 | ] 185 | } 186 | ], 187 | "source": [ 188 | "with h5py.File('iris.h5', 'r') as h5f:\n", 189 | " print('Features of entry no. 99:', h5f['features'][99])\n", 190 | " print('Class label of entry no. 99:', h5f['labels'][99])" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "## Implementing a Custom Dataset Class" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "Now, we implement a custom `Dataset` for reading the training examples. The `__getitem__` method will\n", 205 | "\n", 206 | "1. read a single training example from HDF5 based on an `index` (more on batching later)\n", 207 | "2. return a single training example and it's corresponding label\n", 208 | "\n", 209 | "Note that we will keep an open connection to the database for efficiency via `self.h5f = h5py.File(h5_path, 'r')` -- you may want to close it when you are done (more on this later)." 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 6, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "class Hdf5Dataset(Dataset):\n", 219 | " \"\"\"Custom Dataset for loading entries from HDF5 databases\"\"\"\n", 220 | "\n", 221 | " def __init__(self, h5_path, transform=None):\n", 222 | " \n", 223 | " self.h5f = h5py.File(h5_path, 'r')\n", 224 | " self.num_entries = self.h5f['labels'].shape[0]\n", 225 | " self.transform = transform\n", 226 | "\n", 227 | " def __getitem__(self, index):\n", 228 | " \n", 229 | " features = self.h5f['features'][index]\n", 230 | " label = self.h5f['labels'][index]\n", 231 | " if self.transform is not None:\n", 232 | " features = self.transform(features)\n", 233 | " return features, label\n", 234 | "\n", 235 | " def __len__(self):\n", 236 | " return self.num_entries" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "Now that we have created our custom Dataset class, we can initialize a Dataset instance for the training examples using the 'iris.h5' database file. Then, we initialize a `DataLoader` that allows us to read from the dataset." 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 7, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "train_dataset = Hdf5Dataset(h5_path='iris.h5',\n", 253 | " transform=None)\n", 254 | "\n", 255 | "train_loader = DataLoader(dataset=train_dataset,\n", 256 | " batch_size=50,\n", 257 | " shuffle=True,\n", 258 | " num_workers=4) " 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "That's it! Now we can iterate over an epoch using the train_loader as an iterator and use the features and labels from the training dataset for model training as shown in the next section" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "## Iterating Through the Custom Dataset" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 8, 278 | "metadata": {}, 279 | "outputs": [ 280 | { 281 | "name": "stdout", 282 | "output_type": "stream", 283 | "text": [ 284 | "Epoch: 1 | Batch index: 0 | Batch size: 50\n", 285 | "Epoch: 1 | Batch index: 1 | Batch size: 50\n", 286 | "Epoch: 1 | Batch index: 2 | Batch size: 50\n", 287 | "Epoch: 2 | Batch index: 0 | Batch size: 50\n", 288 | "Epoch: 2 | Batch index: 1 | Batch size: 50\n", 289 | "Epoch: 2 | Batch index: 2 | Batch size: 50\n", 290 | "Epoch: 3 | Batch index: 0 | Batch size: 50\n", 291 | "Epoch: 3 | Batch index: 1 | Batch size: 50\n", 292 | "Epoch: 3 | Batch index: 2 | Batch size: 50\n", 293 | "Epoch: 4 | Batch index: 0 | Batch size: 50\n", 294 | "Epoch: 4 | Batch index: 1 | Batch size: 50\n", 295 | "Epoch: 4 | Batch index: 2 | Batch size: 50\n", 296 | "Epoch: 5 | Batch index: 0 | Batch size: 50\n", 297 | "Epoch: 5 | Batch index: 1 | Batch size: 50\n", 298 | "Epoch: 5 | Batch index: 2 | Batch size: 50\n" 299 | ] 300 | } 301 | ], 302 | "source": [ 303 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 304 | "torch.manual_seed(0)\n", 305 | "\n", 306 | "num_epochs = 5\n", 307 | "for epoch in range(num_epochs):\n", 308 | "\n", 309 | " for batch_idx, (x, y) in enumerate(train_loader):\n", 310 | " \n", 311 | " print('Epoch:', epoch+1, end='')\n", 312 | " print(' | Batch index:', batch_idx, end='')\n", 313 | " print(' | Batch size:', y.size()[0])\n", 314 | " \n", 315 | " x = x.to(device)\n", 316 | " y = y.to(device)\n", 317 | "\n", 318 | " # do model training on x and y here" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "**Remember that we kept an open connection to the HDF5 database in the `Hdf5Dataset` (via `self.h5f = h5py.File(h5_path, 'r')`). Once we are done, we may want to close this connection:**" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 9, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "train_dataset.h5f.close()" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 10, 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "name": "stdout", 344 | "output_type": "stream", 345 | "text": [ 346 | "torch 1.0.0\n", 347 | "pandas 0.23.4\n", 348 | "numpy 1.15.4\n", 349 | "h5py 2.8.0\n", 350 | "\n" 351 | ] 352 | } 353 | ], 354 | "source": [ 355 | "%watermark -iv" 356 | ] 357 | } 358 | ], 359 | "metadata": { 360 | "kernelspec": { 361 | "display_name": "Python 3", 362 | "language": "python", 363 | "name": "python3" 364 | }, 365 | "language_info": { 366 | "codemirror_mode": { 367 | "name": "ipython", 368 | "version": 3 369 | }, 370 | "file_extension": ".py", 371 | "mimetype": "text/x-python", 372 | "name": "python", 373 | "nbconvert_exporter": "python", 374 | "pygments_lexer": "ipython3", 375 | "version": "3.7.1" 376 | }, 377 | "toc": { 378 | "nav_menu": {}, 379 | "number_sections": true, 380 | "sideBar": true, 381 | "skip_h1_title": false, 382 | "title_cell": "Table of Contents", 383 | "title_sidebar": "Contents", 384 | "toc_cell": false, 385 | "toc_position": {}, 386 | "toc_section_display": true, 387 | "toc_window_display": false 388 | } 389 | }, 390 | "nbformat": 4, 391 | "nbformat_minor": 2 392 | } 393 | -------------------------------------------------------------------------------- /pytorch_ipynb/mechanics/dataloader-nesting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "561ef1d6-e8fc-431f-9c58-4c862b2813ec", 6 | "metadata": {}, 7 | "source": [ 8 | "# PyTorch DataLoader State and Nested Iterations" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "37d57e04-facc-4543-9939-c724a57ce9c6", 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "'2.1.0'" 21 | ] 22 | }, 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "import torch\n", 30 | "torch.__version__" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "id": "9fb9f618-f274-4f76-951f-40508cb85c1d", 36 | "metadata": {}, 37 | "source": [ 38 | "Iterating over a dataloader in a separate function will not affect its state in the main training loop. In PyTorch, a DataLoader is typically an iterable that can be iterated over multiple times independently. Each iteration over the DataLoader starts from the beginning and goes through the dataset in a fresh sequence (if shuffle is true, the sequence will be different each time).\n" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "id": "1813f041-0366-4be3-890f-99c1a9c9d831", 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "main loop: 1\n", 52 | "nested loop: 1\n", 53 | "nested loop: 2\n", 54 | "nested loop: 3\n", 55 | "nested loop: 4\n", 56 | "nested loop: 5\n", 57 | "main loop: 2\n", 58 | "nested loop: 1\n", 59 | "nested loop: 2\n", 60 | "nested loop: 3\n", 61 | "nested loop: 4\n", 62 | "nested loop: 5\n", 63 | "main loop: 3\n", 64 | "nested loop: 1\n", 65 | "nested loop: 2\n", 66 | "nested loop: 3\n", 67 | "nested loop: 4\n", 68 | "nested loop: 5\n", 69 | "main loop: 4\n", 70 | "nested loop: 1\n", 71 | "nested loop: 2\n", 72 | "nested loop: 3\n", 73 | "nested loop: 4\n", 74 | "nested loop: 5\n", 75 | "main loop: 5\n", 76 | "nested loop: 1\n", 77 | "nested loop: 2\n", 78 | "nested loop: 3\n", 79 | "nested loop: 4\n", 80 | "nested loop: 5\n", 81 | "main loop: 6\n", 82 | "nested loop: 1\n", 83 | "nested loop: 2\n", 84 | "nested loop: 3\n", 85 | "nested loop: 4\n", 86 | "nested loop: 5\n", 87 | "main loop: 7\n", 88 | "nested loop: 1\n", 89 | "nested loop: 2\n", 90 | "nested loop: 3\n", 91 | "nested loop: 4\n", 92 | "nested loop: 5\n", 93 | "main loop: 8\n", 94 | "nested loop: 1\n", 95 | "nested loop: 2\n", 96 | "nested loop: 3\n", 97 | "nested loop: 4\n", 98 | "nested loop: 5\n", 99 | "main loop: 9\n", 100 | "nested loop: 1\n", 101 | "nested loop: 2\n", 102 | "nested loop: 3\n", 103 | "nested loop: 4\n", 104 | "nested loop: 5\n", 105 | "main loop: 10\n", 106 | "nested loop: 1\n", 107 | "nested loop: 2\n", 108 | "nested loop: 3\n", 109 | "nested loop: 4\n", 110 | "nested loop: 5\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "from torch.utils.data import Dataset, DataLoader\n", 116 | "\n", 117 | "# Custom Dataset class\n", 118 | "class IntegerDataset(Dataset):\n", 119 | " def __init__(self, start, end):\n", 120 | " self.data = list(range(start, end + 1))\n", 121 | "\n", 122 | " def __len__(self):\n", 123 | " return len(self.data)\n", 124 | "\n", 125 | " def __getitem__(self, idx):\n", 126 | " return self.data[idx]\n", 127 | "\n", 128 | "# Create a Dataset for integers 1 to 10\n", 129 | "integer_dataset = IntegerDataset(1, 10)\n", 130 | "\n", 131 | "# Create a DataLoader\n", 132 | "integer_loader = DataLoader(integer_dataset, batch_size=1, shuffle=False)\n", 133 | "\n", 134 | "# A function to estimate the loss based on a subset of training examples\n", 135 | "def calc_loss(data_loader, iters):\n", 136 | " for j in integer_loader:\n", 137 | " print(\"nested loop:\", j.item())\n", 138 | " if j >= iters: \n", 139 | " break\n", 140 | "\n", 141 | "# Example: Iterate over the DataLoader\n", 142 | "for i in integer_loader:\n", 143 | " print(\"main loop:\", i.item())\n", 144 | " calc_loss(integer_loader, iters=5)" 145 | ] 146 | } 147 | ], 148 | "metadata": { 149 | "kernelspec": { 150 | "display_name": "Python 3 (ipykernel)", 151 | "language": "python", 152 | "name": "python3" 153 | }, 154 | "language_info": { 155 | "codemirror_mode": { 156 | "name": "ipython", 157 | "version": 3 158 | }, 159 | "file_extension": ".py", 160 | "mimetype": "text/x-python", 161 | "name": "python", 162 | "nbconvert_exporter": "python", 163 | "pygments_lexer": "ipython3", 164 | "version": "3.10.12" 165 | } 166 | }, 167 | "nbformat": 4, 168 | "nbformat_minor": 5 169 | } 170 | -------------------------------------------------------------------------------- /pytorch_ipynb/mechanics/deterministic_benchmark_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | 4 | import torch.nn.functional as F 5 | import torch.nn as nn 6 | 7 | from torchvision import datasets 8 | from torchvision import transforms 9 | from torch.utils.data import DataLoader 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | def conv3x3(in_planes, out_planes, stride=1): 21 | """3x3 convolution with padding""" 22 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 23 | padding=1, bias=False) 24 | 25 | 26 | class Bottleneck(nn.Module): 27 | expansion = 4 28 | 29 | def __init__(self, inplanes, planes, stride=1, downsample=None): 30 | super(Bottleneck, self).__init__() 31 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 32 | self.bn1 = nn.BatchNorm2d(planes) 33 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 34 | padding=1, bias=False) 35 | self.bn2 = nn.BatchNorm2d(planes) 36 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 37 | self.bn3 = nn.BatchNorm2d(planes * 4) 38 | self.relu = nn.ReLU(inplace=True) 39 | self.downsample = downsample 40 | self.stride = stride 41 | 42 | def forward(self, x): 43 | residual = x 44 | 45 | out = self.conv1(x) 46 | out = self.bn1(out) 47 | out = self.relu(out) 48 | 49 | out = self.conv2(out) 50 | out = self.bn2(out) 51 | out = self.relu(out) 52 | 53 | out = self.conv3(out) 54 | out = self.bn3(out) 55 | 56 | if self.downsample is not None: 57 | residual = self.downsample(x) 58 | 59 | out += residual 60 | out = self.relu(out) 61 | 62 | return out 63 | 64 | 65 | 66 | 67 | class ResNet(nn.Module): 68 | 69 | def __init__(self, block, layers, num_classes, grayscale): 70 | self.inplanes = 64 71 | if grayscale: 72 | in_dim = 1 73 | else: 74 | in_dim = 3 75 | super(ResNet, self).__init__() 76 | self.conv1 = nn.Conv2d(in_dim, 64, kernel_size=7, stride=2, padding=3, 77 | bias=False) 78 | self.bn1 = nn.BatchNorm2d(64) 79 | self.relu = nn.ReLU(inplace=True) 80 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 81 | self.layer1 = self._make_layer(block, 64, layers[0]) 82 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 83 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 84 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 85 | self.avgpool = nn.AvgPool2d(7, stride=1, padding=2) 86 | #self.fc = nn.Linear(2048 * block.expansion, num_classes) 87 | self.fc = nn.Linear(2048, num_classes) 88 | 89 | for m in self.modules(): 90 | if isinstance(m, nn.Conv2d): 91 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 92 | m.weight.data.normal_(0, (2. / n)**.5) 93 | elif isinstance(m, nn.BatchNorm2d): 94 | m.weight.data.fill_(1) 95 | m.bias.data.zero_() 96 | 97 | def _make_layer(self, block, planes, blocks, stride=1): 98 | downsample = None 99 | if stride != 1 or self.inplanes != planes * block.expansion: 100 | downsample = nn.Sequential( 101 | nn.Conv2d(self.inplanes, planes * block.expansion, 102 | kernel_size=1, stride=stride, bias=False), 103 | nn.BatchNorm2d(planes * block.expansion), 104 | ) 105 | 106 | layers = [] 107 | layers.append(block(self.inplanes, planes, stride, downsample)) 108 | self.inplanes = planes * block.expansion 109 | for i in range(1, blocks): 110 | layers.append(block(self.inplanes, planes)) 111 | 112 | return nn.Sequential(*layers) 113 | 114 | def forward(self, x): 115 | x = self.conv1(x) 116 | x = self.bn1(x) 117 | x = self.relu(x) 118 | x = self.maxpool(x) 119 | 120 | x = self.layer1(x) 121 | x = self.layer2(x) 122 | x = self.layer3(x) 123 | x = self.layer4(x) 124 | 125 | #x = self.avgpool(x) 126 | x = x.view(x.size(0), -1) 127 | logits = self.fc(x) 128 | probas = F.softmax(logits, dim=1) 129 | return logits 130 | 131 | 132 | 133 | def resnet101(num_classes, grayscale): 134 | """Constructs a ResNet-101 model.""" 135 | model = ResNet(block=Bottleneck, 136 | layers=[3, 4, 23, 3], 137 | num_classes=num_classes, 138 | grayscale=grayscale) 139 | return model 140 | 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /pytorch_ipynb/mechanics/figures/fashion-mnist-sprite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/mechanics/figures/fashion-mnist-sprite.png -------------------------------------------------------------------------------- /pytorch_ipynb/mechanics/figures/gradient-checkpointing-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/mechanics/figures/gradient-checkpointing-1.png -------------------------------------------------------------------------------- /pytorch_ipynb/mechanics/figures/gradient-checkpointing-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/mechanics/figures/gradient-checkpointing-2.png -------------------------------------------------------------------------------- /pytorch_ipynb/mechanics/figures/gradient-checkpointing-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/mechanics/figures/gradient-checkpointing-3.png -------------------------------------------------------------------------------- /pytorch_ipynb/mechanics/manual-gradients.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n", 8 | "- Author: Sebastian Raschka\n", 9 | "- GitHub Repository: https://github.com/rasbt/deeplearning-models" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Sebastian Raschka \n", 22 | "\n", 23 | "CPython 3.6.8\n", 24 | "IPython 7.2.0\n", 25 | "\n", 26 | "torch 1.0.0\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "%load_ext watermark\n", 32 | "%watermark -a 'Sebastian Raschka' -v -p torch" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "# Model Zoo -- Getting Gradients of an Intermediate Variable in PyTorch" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "This notebook illustrates how we can fetch the intermediate gradients of a function that is composed of multiple inputs and multiple computation steps in PyTorch. Note that gradient is simply a vector listing the derivatives of a function with respect\n", 47 | "to each argument of the function. So, strictly speaking, we are discussing how to obtain the partial derivatives here." 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "Assume we have this simple toy graph:\n", 55 | " \n", 56 | "![](../images/manual-gradients/graph_1.png)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "Now, we provide the following values to b, x, and w; the red numbers indicate the intermediate values of the computation and the end result:\n", 64 | "\n", 65 | "![](../images/manual-gradients/graph_2.png)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "Now, the next image shows the partial derivatives of the output node, a, with respect to the input nodes (b, x, and w) as well as all the intermediate partial derivatives:\n", 73 | "\n", 74 | "\n", 75 | "![](../images/manual-gradients/graph_3.png)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "(The images were taken from my PyData Talk in August 2017, for more information of how to arrive at these derivatives, please see the talk/slides at https://github.com/rasbt/pydata-annarbor2017-dl-tutorial; also, I put up a little calculus and differentiation primer if helpful: https://sebastianraschka.com/pdf/books/dlb/appendix_d_calculus.pdf)\n", 83 | "\n" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "For instance, if we are interested in obtaining the partial derivative of the output a with respect to each of the input and intermediate nodes, we could do the following in TensorFlow, where `d_a_b` denotes \"partial derivative of a with respect to b\" and so forth:" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 2, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "[[2.0], [3.0], [1.0], [1.0], [1.0]]\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "import tensorflow as tf\n", 108 | "\n", 109 | "g = tf.Graph()\n", 110 | "with g.as_default() as g:\n", 111 | " \n", 112 | " x = tf.placeholder(dtype=tf.float32, shape=None, name='x')\n", 113 | " w = tf.Variable(initial_value=2, dtype=tf.float32, name='w')\n", 114 | " b = tf.Variable(initial_value=1, dtype=tf.float32, name='b')\n", 115 | " \n", 116 | " u = x * w\n", 117 | " v = u + b\n", 118 | " a = tf.nn.relu(v)\n", 119 | " \n", 120 | " d_a_x = tf.gradients(a, x)\n", 121 | " d_a_w = tf.gradients(a, w)\n", 122 | " d_a_b = tf.gradients(a, b)\n", 123 | " d_a_u = tf.gradients(a, u)\n", 124 | " d_a_v = tf.gradients(a, v)\n", 125 | "\n", 126 | "\n", 127 | "with tf.Session(graph=g) as sess:\n", 128 | " sess.run(tf.global_variables_initializer())\n", 129 | " grads = sess.run([d_a_x, d_a_w, d_a_b, d_a_u, d_a_v], feed_dict={'x:0': 3})\n", 130 | "\n", 131 | "print(grads)\n" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "## Intermediate Gradients in PyTorch via autograd's `grad`" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "In PyTorch, there are multiple ways to compute partial derivatives or gradients. If the goal is to just compute partial derivatives, the most straight-forward way would be using autograd's `grad` function. By default, the `retain_graph` parameter of the `grad` function is set to `False`, which will free the graph after computing the partial derivative. Thus, if we want to obtain multiple partial derivatives, we need to set `retain_graph=True`. Note that this is a very inefficient solution though, as multiple passes over the graph are being made where intermediate results are being recalculated:" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 3, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "d_a_x: (tensor([2.]),)\n", 158 | "d_a_w: (tensor([3.]),)\n", 159 | "d_a_b: (tensor([1.]),)\n", 160 | "d_a_u: (tensor([1.]),)\n", 161 | "d_a_v: (tensor([1.]),)\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "import torch\n", 167 | "import torch.nn.functional as F\n", 168 | "from torch.autograd import grad\n", 169 | "\n", 170 | "\n", 171 | "x = torch.tensor([3.], requires_grad=True)\n", 172 | "w = torch.tensor([2.], requires_grad=True)\n", 173 | "b = torch.tensor([1.], requires_grad=True)\n", 174 | "\n", 175 | "u = x * w\n", 176 | "v = u + b\n", 177 | "a = F.relu(v)\n", 178 | "\n", 179 | "d_a_b = grad(a, b, retain_graph=True)\n", 180 | "d_a_u = grad(a, u, retain_graph=True)\n", 181 | "d_a_v = grad(a, v, retain_graph=True)\n", 182 | "d_a_w = grad(a, w, retain_graph=True)\n", 183 | "d_a_x = grad(a, x)\n", 184 | " \n", 185 | "\n", 186 | "for name, grad in zip(\"xwbuv\", (d_a_x, d_a_w, d_a_b, d_a_u, d_a_v)):\n", 187 | " print('d_a_%s:' % name, grad)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "As suggested by Adam Paszke, this can be made rewritten in a more efficient manner by passing a tuple to the `grad` function so that it can reuse intermediate results and only require one pass over the graph:" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 4, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | "d_a_x: tensor([2.])\n", 207 | "d_a_w: tensor([3.])\n", 208 | "d_a_b: tensor([1.])\n", 209 | "d_a_u: tensor([1.])\n", 210 | "d_a_v: tensor([1.])\n" 211 | ] 212 | } 213 | ], 214 | "source": [ 215 | "import torch\n", 216 | "import torch.nn.functional as F\n", 217 | "from torch.autograd import grad\n", 218 | "\n", 219 | "\n", 220 | "x = torch.tensor([3.], requires_grad=True)\n", 221 | "w = torch.tensor([2.], requires_grad=True)\n", 222 | "b = torch.tensor([1.], requires_grad=True)\n", 223 | "\n", 224 | "u = x * w\n", 225 | "v = u + b\n", 226 | "a = F.relu(v)\n", 227 | "\n", 228 | "partial_derivatives = grad(a, (x, w, b, u, v))\n", 229 | "\n", 230 | "for name, grad in zip(\"xwbuv\", (partial_derivatives)):\n", 231 | " print('d_a_%s:' % name, grad)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "## Intermediate Gradients in PyTorch via `retain_grad`" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "In PyTorch, we most often use the `backward()` method on an output variable to compute its partial derivative (or gradient) with respect to its inputs (typically, the weights and bias units of a neural network). By default, PyTorch only stores the gradients of the leaf variables (e.g., the weights and biases) via their `grad` attribute to save memory. So, if we are interested in the intermediate results in a computational graph, we can use the `retain_grad` method to store gradients of non-leaf variables as follows:" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 5, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "name": "stdout", 255 | "output_type": "stream", 256 | "text": [ 257 | "d_a_x: tensor([2.])\n", 258 | "d_a_w: tensor([3.])\n", 259 | "d_a_b: tensor([1.])\n", 260 | "d_a_u: tensor([1.])\n", 261 | "d_a_v: tensor([1.])\n" 262 | ] 263 | } 264 | ], 265 | "source": [ 266 | "import torch\n", 267 | "import torch.nn.functional as F\n", 268 | "from torch.autograd import Variable\n", 269 | "\n", 270 | "\n", 271 | "x = torch.tensor([3.], requires_grad=True)\n", 272 | "w = torch.tensor([2.], requires_grad=True)\n", 273 | "b = torch.tensor([1.], requires_grad=True)\n", 274 | "\n", 275 | "u = x * w\n", 276 | "v = u + b\n", 277 | "a = F.relu(v)\n", 278 | "\n", 279 | "u.retain_grad()\n", 280 | "v.retain_grad()\n", 281 | "\n", 282 | "a.backward()\n", 283 | "\n", 284 | "for name, var in zip(\"xwbuv\", (x, w, b, u, v)):\n", 285 | " print('d_a_%s:' % name, var.grad)" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "## Intermediate Gradients in PyTorch Using Hooks" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "Finally, and this is a not-recommended workaround, we can use hooks to obtain intermediate gradients. While the two other approaches explained above should be preferred, this approach highlights the use of hooks, which may come in handy in certain situations.\n", 300 | "\n", 301 | "> The hook will be called every time a gradient with respect to the variable is computed. (http://pytorch.org/docs/master/autograd.html#torch.autograd.Variable.register_hook)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "Based on the suggestion by Adam Paszke (https://discuss.pytorch.org/t/why-cant-i-see-grad-of-an-intermediate-variable/94/7?u=rasbt), we can use these hooks in a combintation with a little helper function, `save_grad` and a `hook` closure writing the partial derivatives or gradients to a global variable `grads`. So, if we invoke the `backward` method on the output node `a`, all the intermediate results will be collected in `grads`, as illustrated below:" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 6, 314 | "metadata": {}, 315 | "outputs": [ 316 | { 317 | "data": { 318 | "text/plain": [ 319 | "{'d_a_v': tensor([1.]),\n", 320 | " 'd_a_b': tensor([1.]),\n", 321 | " 'd_a_u': tensor([1.]),\n", 322 | " 'd_a_x': tensor([2.]),\n", 323 | " 'd_a_w': tensor([3.])}" 324 | ] 325 | }, 326 | "execution_count": 6, 327 | "metadata": {}, 328 | "output_type": "execute_result" 329 | } 330 | ], 331 | "source": [ 332 | "import torch\n", 333 | "import torch.nn.functional as F\n", 334 | "\n", 335 | "\n", 336 | "grads = {}\n", 337 | "def save_grad(name):\n", 338 | " def hook(grad):\n", 339 | " grads[name] = grad\n", 340 | " return hook\n", 341 | "\n", 342 | "\n", 343 | "x = torch.tensor([3.], requires_grad=True)\n", 344 | "w = torch.tensor([2.], requires_grad=True)\n", 345 | "b = torch.tensor([1.], requires_grad=True)\n", 346 | "\n", 347 | "u = x * w\n", 348 | "v = u + b\n", 349 | "\n", 350 | "x.register_hook(save_grad('d_a_x'))\n", 351 | "w.register_hook(save_grad('d_a_w'))\n", 352 | "b.register_hook(save_grad('d_a_b'))\n", 353 | "u.register_hook(save_grad('d_a_u'))\n", 354 | "v.register_hook(save_grad('d_a_v'))\n", 355 | "\n", 356 | "a = F.relu(v)\n", 357 | "\n", 358 | "a.backward()\n", 359 | "\n", 360 | "grads" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 7, 366 | "metadata": {}, 367 | "outputs": [ 368 | { 369 | "name": "stdout", 370 | "output_type": "stream", 371 | "text": [ 372 | "tensorflow 1.12.0\n", 373 | "torch 1.0.0\n", 374 | "\n" 375 | ] 376 | } 377 | ], 378 | "source": [ 379 | "%watermark -iv" 380 | ] 381 | } 382 | ], 383 | "metadata": { 384 | "kernelspec": { 385 | "display_name": "Python 3", 386 | "language": "python", 387 | "name": "python3" 388 | }, 389 | "language_info": { 390 | "codemirror_mode": { 391 | "name": "ipython", 392 | "version": 3 393 | }, 394 | "file_extension": ".py", 395 | "mimetype": "text/x-python", 396 | "name": "python", 397 | "nbconvert_exporter": "python", 398 | "pygments_lexer": "ipython3", 399 | "version": "3.7.1" 400 | }, 401 | "toc": { 402 | "nav_menu": {}, 403 | "number_sections": true, 404 | "sideBar": true, 405 | "skip_h1_title": false, 406 | "title_cell": "Table of Contents", 407 | "title_sidebar": "Contents", 408 | "toc_cell": false, 409 | "toc_position": {}, 410 | "toc_section_display": true, 411 | "toc_window_display": false 412 | } 413 | }, 414 | "nbformat": 4, 415 | "nbformat_minor": 2 416 | } 417 | -------------------------------------------------------------------------------- /pytorch_ipynb/transformer/distilbert-benchmark/1_just-hf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import time 4 | 5 | # from local shared.py 6 | from shared import download_data, prepare_data 7 | 8 | # HF libraries 9 | import transformers 10 | from transformers import AutoTokenizer 11 | from transformers import AutoModelForSequenceClassification 12 | from transformers import Trainer, TrainingArguments 13 | from datasets import load_metric, load_dataset 14 | 15 | 16 | def tokenize_text(batch): 17 | return tokenizer(batch["text"], truncation=True, padding=True) 18 | 19 | 20 | def compute_metrics(eval_pred): 21 | predictions, labels = eval_pred 22 | predictions = np.argmax(predictions, axis=1) 23 | acc = metric.compute(predictions=predictions, references=labels) 24 | return {"accuracy": acc} 25 | 26 | if __name__ == "__main__": 27 | download_data() 28 | prepare_data() 29 | 30 | device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") 31 | 32 | print("======================") 33 | print("Device", device) 34 | print("======================") 35 | 36 | imdb_dataset = load_dataset( 37 | "csv", 38 | data_files={ 39 | "train": "train.csv", 40 | "validation": "validation.csv", 41 | "test": "test.csv", 42 | }, 43 | ) 44 | 45 | tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") 46 | imdb_tokenized = imdb_dataset.map(tokenize_text, batched=True, batch_size=12) 47 | del imdb_dataset 48 | 49 | model = AutoModelForSequenceClassification.from_pretrained( 50 | "distilbert-base-uncased", num_labels=2) 51 | model.to(device) 52 | 53 | metric = load_metric("accuracy") 54 | 55 | trainer_args = TrainingArguments(output_dir="distilbert-v1", 56 | num_train_epochs=3, 57 | evaluation_strategy="epoch", 58 | per_device_train_batch_size=12, 59 | per_device_eval_batch_size=12, 60 | learning_rate=1e-5) 61 | 62 | trainer = Trainer(model=model, 63 | args=trainer_args, 64 | compute_metrics=compute_metrics, 65 | train_dataset=imdb_tokenized["train"], 66 | eval_dataset=imdb_tokenized["validation"], 67 | tokenizer=tokenizer) 68 | 69 | start = time.time() 70 | trainer.train() 71 | train_time = (time.time()-start)/60 72 | 73 | start = time.time() 74 | outputs = trainer.predict(imdb_tokenized["test"]) 75 | inf_time = (time.time()-start)/60 76 | print(outputs.metrics) 77 | 78 | print("======================") 79 | print(f"Training time: {train_time:.2f}") 80 | print(f"Inference time: {inf_time:.2f}") 81 | 82 | print("======================") 83 | print("Transformers", transformers.__version__) -------------------------------------------------------------------------------- /pytorch_ipynb/transformer/distilbert-benchmark/2_hf_with_lightning.py: -------------------------------------------------------------------------------- 1 | import lightning as L 2 | import os 3 | import torch 4 | import time 5 | import torchmetrics 6 | from torch.utils.data import DataLoader, Dataset 7 | 8 | # from local shared.py 9 | from shared import download_data, prepare_data 10 | 11 | # HF libraries 12 | from transformers import AutoTokenizer 13 | from transformers import AutoModelForSequenceClassification 14 | from datasets import load_dataset 15 | 16 | 17 | def tokenize_text(batch): 18 | return tokenizer(batch["text"], truncation=True, padding=True) 19 | 20 | 21 | class IMDBDataset(Dataset): 22 | def __init__(self, dataset_dict, partition_key="train"): 23 | self.partition = dataset_dict[partition_key] 24 | 25 | def __getitem__(self, index): 26 | return self.partition[index] 27 | 28 | def __len__(self): 29 | return self.partition.num_rows 30 | 31 | 32 | if __name__ == "__main__": 33 | download_data() 34 | prepare_data() 35 | 36 | 37 | imdb_dataset = load_dataset( 38 | "csv", 39 | data_files={ 40 | "train": "train.csv", 41 | "validation": "validation.csv", 42 | "test": "test.csv", 43 | }, 44 | ) 45 | 46 | tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") 47 | imdb_tokenized = imdb_dataset.map(tokenize_text, batched=True, batch_size=None) 48 | del imdb_dataset 49 | 50 | ########################## 51 | ## NEW: Dataloaders 52 | ########################## 53 | 54 | imdb_tokenized.set_format("torch", columns=["input_ids", "attention_mask", "label"]) 55 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 56 | 57 | train_dataset = IMDBDataset(imdb_tokenized, partition_key="train") 58 | val_dataset = IMDBDataset(imdb_tokenized, partition_key="validation") 59 | test_dataset = IMDBDataset(imdb_tokenized, partition_key="test") 60 | 61 | train_loader = DataLoader( 62 | dataset=train_dataset, 63 | batch_size=12, 64 | shuffle=True, 65 | num_workers=4) 66 | 67 | val_loader = DataLoader( 68 | dataset=val_dataset, 69 | batch_size=12, 70 | num_workers=4 71 | ) 72 | 73 | test_loader = DataLoader( 74 | dataset=test_dataset, 75 | batch_size=12, 76 | num_workers=4 77 | ) 78 | 79 | ############################### 80 | ## NEW: Lightning Model 81 | ############################### 82 | 83 | model = AutoModelForSequenceClassification.from_pretrained( 84 | "distilbert-base-uncased", num_labels=2) 85 | 86 | class LightningModel(L.LightningModule): 87 | def __init__(self, model, learning_rate=5e-5): 88 | super().__init__() 89 | 90 | self.learning_rate = learning_rate 91 | self.model = model 92 | 93 | self.val_acc = torchmetrics.Accuracy() 94 | self.test_acc = torchmetrics.Accuracy() 95 | 96 | def forward(self, input_ids, attention_mask, labels): 97 | return self.model(input_ids, attention_mask=attention_mask, labels=labels) 98 | 99 | def training_step(self, batch, batch_idx): 100 | outputs = self(batch["input_ids"], attention_mask=batch["attention_mask"], 101 | labels=batch["label"]) 102 | self.log("train_loss", outputs["loss"]) 103 | return outputs["loss"] # this is passed to the optimizer for training 104 | 105 | def validation_step(self, batch, batch_idx): 106 | outputs = self(batch["input_ids"], attention_mask=batch["attention_mask"], 107 | labels=batch["label"]) 108 | self.log("val_loss", outputs["loss"], prog_bar=True) 109 | 110 | logits = outputs["logits"] 111 | predicted_labels = torch.argmax(logits, 1) 112 | self.val_acc(predicted_labels, batch["label"]) 113 | self.log("val_acc", self.val_acc, prog_bar=True) 114 | 115 | def test_step(self, batch, batch_idx): 116 | outputs = self(batch["input_ids"], attention_mask=batch["attention_mask"], 117 | labels=batch["label"]) 118 | 119 | logits = outputs["logits"] 120 | predicted_labels = torch.argmax(logits, 1) 121 | self.test_acc(predicted_labels, batch["label"]) 122 | self.log("accuracy", self.test_acc, prog_bar=True) 123 | 124 | def configure_optimizers(self): 125 | optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate) 126 | return optimizer 127 | 128 | lightning_model = LightningModel(model) 129 | 130 | trainer = L.Trainer( 131 | max_epochs=3, 132 | accelerator="gpu", 133 | devices="auto", 134 | strategy="ddp" 135 | ) 136 | 137 | start = time.time() 138 | trainer.fit(model=lightning_model, 139 | train_dataloaders=train_loader, 140 | val_dataloaders=val_loader) 141 | train_time = (time.time()-start)/60 142 | 143 | start = time.time() 144 | outputs = trainer.test(lightning_model, dataloaders=test_loader) 145 | inf_time = (time.time()-start)/60 146 | print(outputs) 147 | 148 | print("======================") 149 | print(f"Training time: {train_time:.2f}") 150 | print(f"Inference time: {inf_time:.2f}") 151 | 152 | print("======================") 153 | print("Lightning", L.__version__) 154 | -------------------------------------------------------------------------------- /pytorch_ipynb/transformer/distilbert-benchmark/shared.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from packaging import version 4 | import pandas as pd 5 | import sys 6 | import tarfile 7 | import time 8 | from tqdm import tqdm 9 | import urllib.request 10 | 11 | 12 | def download_data(): 13 | 14 | source = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz" 15 | target = "aclImdb_v1.tar.gz" 16 | 17 | if os.path.exists(target): 18 | return 19 | 20 | def reporthook(count, block_size, total_size): 21 | global start_time 22 | if count == 0: 23 | start_time = time.time() 24 | return 25 | duration = time.time() - start_time 26 | progress_size = int(count * block_size) 27 | speed = progress_size / (1024.0**2 * duration) 28 | percent = count * block_size * 100.0 / total_size 29 | 30 | sys.stdout.write( 31 | f"\r{int(percent)}% | {progress_size / (1024.**2):.2f} MB " 32 | f"| {speed:.2f} MB/s | {duration:.2f} sec elapsed" 33 | ) 34 | sys.stdout.flush() 35 | 36 | if not os.path.isdir("aclImdb") and not os.path.isfile("aclImdb_v1.tar.gz"): 37 | urllib.request.urlretrieve(source, target, reporthook) 38 | 39 | 40 | def prepare_data(): 41 | if os.path.exists("train.csv"): 42 | return 43 | 44 | target = "aclImdb_v1.tar.gz" 45 | basepath = "aclImdb" 46 | 47 | if not os.path.isdir(basepath): 48 | 49 | with tarfile.open(target, "r:gz") as tar: 50 | tar.extractall() 51 | 52 | labels = {"pos": 1, "neg": 0} 53 | 54 | df = pd.DataFrame() 55 | 56 | with tqdm(total=50000) as pbar: 57 | for s in ("test", "train"): 58 | for l in ("pos", "neg"): 59 | path = os.path.join(basepath, s, l) 60 | for file in sorted(os.listdir(path)): 61 | with open(os.path.join(path, file), "r", encoding="utf-8") as infile: 62 | txt = infile.read() 63 | 64 | if version.parse(pd.__version__) >= version.parse("1.3.2"): 65 | x = pd.DataFrame( 66 | [[txt, labels[l]]], columns=["review", "sentiment"] 67 | ) 68 | df = pd.concat([df, x], ignore_index=False) 69 | 70 | else: 71 | df = df.append([[txt, labels[l]]], ignore_index=True) 72 | pbar.update() 73 | df.columns = ["text", "label"] 74 | np.random.seed(0) 75 | df = df.reindex(np.random.permutation(df.index)) 76 | 77 | df_train = df.iloc[:35_000] 78 | df_val = df.iloc[35_000:40_000] 79 | df_test = df.iloc[40_000:] 80 | 81 | df_train.to_csv("train.csv", index=False, encoding="utf-8") 82 | df_val.to_csv("validation.csv", index=False, encoding="utf-8") 83 | df_test.to_csv("test.csv", index=False, encoding="utf-8") -------------------------------------------------------------------------------- /pytorch_ipynb/transformer/figures/feature-extractor.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/transformer/figures/feature-extractor.jpeg -------------------------------------------------------------------------------- /pytorch_ipynb/transformer/figures/finetuning-ii.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/transformer/figures/finetuning-ii.png -------------------------------------------------------------------------------- /pytorch_ipynb/viz/cnns/cats-and-dogs/datautils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from torch.utils.data import Dataset 4 | from torch.utils.data import DataLoader 5 | from PIL import Image 6 | 7 | 8 | class CatsDogsDataset(Dataset): 9 | """Custom Dataset for loading CelebA face images""" 10 | 11 | def __init__(self, img_dir, transform=None): 12 | 13 | self.img_dir = img_dir 14 | 15 | self.img_names = [i for i in 16 | os.listdir(img_dir) 17 | if i.endswith('.jpg')] 18 | 19 | self.y = [] 20 | for i in self.img_names: 21 | if i.split('.')[0] == 'cat': 22 | self.y.append(0) 23 | else: 24 | self.y.append(1) 25 | 26 | self.transform = transform 27 | 28 | def __getitem__(self, index): 29 | img = Image.open(os.path.join(self.img_dir, 30 | self.img_names[index])) 31 | 32 | if self.transform is not None: 33 | img = self.transform(img) 34 | 35 | label = self.y[index] 36 | return img, label 37 | 38 | def __len__(self): 39 | return len(self.y) 40 | 41 | 42 | 43 | 44 | 45 | def create_cats_and_dogs_dataloaders(batch_size, data_transforms, train_path, valid_path, test_path): 46 | train_dataset = CatsDogsDataset(img_dir=train_path, 47 | transform=data_transforms['train']) 48 | 49 | train_loader = DataLoader(dataset=train_dataset, 50 | batch_size=batch_size, 51 | drop_last=True, 52 | num_workers=4, 53 | shuffle=True) 54 | 55 | valid_dataset = CatsDogsDataset(img_dir=valid_path, 56 | transform=data_transforms['valid']) 57 | 58 | valid_loader = DataLoader(dataset=valid_dataset, 59 | batch_size=batch_size, 60 | num_workers=4, 61 | shuffle=False) 62 | 63 | test_dataset = CatsDogsDataset(img_dir=test_path, 64 | transform=data_transforms['valid']) 65 | 66 | test_loader = DataLoader(dataset=test_dataset, 67 | batch_size=batch_size, 68 | num_workers=4, 69 | shuffle=False) 70 | 71 | return train_loader, valid_loader, test_loader 72 | 73 | 74 | class UnNormalize(object): 75 | def __init__(self, mean, std): 76 | self.mean = mean 77 | self.std = std 78 | 79 | def __call__(self, tensor): 80 | """ 81 | Parameters: 82 | ------------ 83 | tensor (Tensor): Tensor image of size (C, H, W) to be normalized. 84 | 85 | Returns: 86 | ------------ 87 | Tensor: Normalized image. 88 | 89 | """ 90 | for t, m, s in zip(tensor, self.mean, self.std): 91 | t.mul_(s).add_(m) 92 | return tensor 93 | 94 | 95 | def convert_rgb_to_grayscale(im_as_arr): 96 | """ 97 | Converts RGB image to grayscale 98 | Expects and returns CHW format. 99 | """ 100 | grayscale_im = np.sum(np.abs(im_as_arr), axis=0) 101 | im_max = np.percentile(grayscale_im, 99) 102 | im_min = np.min(grayscale_im) 103 | grayscale_im = (np.clip((grayscale_im - im_min) / (im_max - im_min), 0, 1)) 104 | grayscale_im = np.expand_dims(grayscale_im, axis=0) 105 | return grayscale_im 106 | 107 | -------------------------------------------------------------------------------- /pytorch_ipynb/viz/cnns/cats-and-dogs/gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/viz/cnns/cats-and-dogs/gradient.png -------------------------------------------------------------------------------- /pytorch_ipynb/viz/cnns/cats-and-dogs/images/cats-and-dogs-download-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/deeplearning-models/18e046926551378cd691fd871dda0f21dcd272ab/pytorch_ipynb/viz/cnns/cats-and-dogs/images/cats-and-dogs-download-all.png -------------------------------------------------------------------------------- /pytorch_ipynb/viz/cnns/cats-and-dogs/vgg16.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | ########################## 7 | ### MODEL 8 | ########################## 9 | 10 | class VGG16(torch.nn.Module): 11 | 12 | def __init__(self, num_classes): 13 | super(VGG16, self).__init__() 14 | 15 | # calculate same padding: 16 | # (w - k + 2*p)/s + 1 = o 17 | # => p = (s(o-1) - w + k)/2 18 | 19 | self.block_1 = nn.Sequential( 20 | nn.Conv2d(in_channels=3, 21 | out_channels=64, 22 | kernel_size=(3, 3), 23 | stride=(1, 1), 24 | # (1(32-1)- 32 + 3)/2 = 1 25 | padding=1), 26 | nn.ReLU(), 27 | nn.Conv2d(in_channels=64, 28 | out_channels=64, 29 | kernel_size=(3, 3), 30 | stride=(1, 1), 31 | padding=1), 32 | nn.ReLU(), 33 | nn.MaxPool2d(kernel_size=(2, 2), 34 | stride=(2, 2)) 35 | ) 36 | 37 | self.block_2 = nn.Sequential( 38 | nn.Conv2d(in_channels=64, 39 | out_channels=128, 40 | kernel_size=(3, 3), 41 | stride=(1, 1), 42 | padding=1), 43 | nn.ReLU(), 44 | nn.Conv2d(in_channels=128, 45 | out_channels=128, 46 | kernel_size=(3, 3), 47 | stride=(1, 1), 48 | padding=1), 49 | nn.ReLU(), 50 | nn.MaxPool2d(kernel_size=(2, 2), 51 | stride=(2, 2)) 52 | ) 53 | 54 | self.block_3 = nn.Sequential( 55 | nn.Conv2d(in_channels=128, 56 | out_channels=256, 57 | kernel_size=(3, 3), 58 | stride=(1, 1), 59 | padding=1), 60 | nn.ReLU(), 61 | nn.Conv2d(in_channels=256, 62 | out_channels=256, 63 | kernel_size=(3, 3), 64 | stride=(1, 1), 65 | padding=1), 66 | nn.ReLU(), 67 | nn.Conv2d(in_channels=256, 68 | out_channels=256, 69 | kernel_size=(3, 3), 70 | stride=(1, 1), 71 | padding=1), 72 | nn.ReLU(), 73 | nn.Conv2d(in_channels=256, 74 | out_channels=256, 75 | kernel_size=(3, 3), 76 | stride=(1, 1), 77 | padding=1), 78 | nn.ReLU(), 79 | nn.MaxPool2d(kernel_size=(2, 2), 80 | stride=(2, 2)) 81 | ) 82 | 83 | 84 | self.block_4 = nn.Sequential( 85 | nn.Conv2d(in_channels=256, 86 | out_channels=512, 87 | kernel_size=(3, 3), 88 | stride=(1, 1), 89 | padding=1), 90 | nn.ReLU(), 91 | nn.Conv2d(in_channels=512, 92 | out_channels=512, 93 | kernel_size=(3, 3), 94 | stride=(1, 1), 95 | padding=1), 96 | nn.ReLU(), 97 | nn.Conv2d(in_channels=512, 98 | out_channels=512, 99 | kernel_size=(3, 3), 100 | stride=(1, 1), 101 | padding=1), 102 | nn.ReLU(), 103 | nn.Conv2d(in_channels=512, 104 | out_channels=512, 105 | kernel_size=(3, 3), 106 | stride=(1, 1), 107 | padding=1), 108 | nn.ReLU(), 109 | nn.MaxPool2d(kernel_size=(2, 2), 110 | stride=(2, 2)) 111 | ) 112 | 113 | self.block_5 = nn.Sequential( 114 | nn.Conv2d(in_channels=512, 115 | out_channels=512, 116 | kernel_size=(3, 3), 117 | stride=(1, 1), 118 | padding=1), 119 | nn.ReLU(), 120 | nn.Conv2d(in_channels=512, 121 | out_channels=512, 122 | kernel_size=(3, 3), 123 | stride=(1, 1), 124 | padding=1), 125 | nn.ReLU(), 126 | nn.Conv2d(in_channels=512, 127 | out_channels=512, 128 | kernel_size=(3, 3), 129 | stride=(1, 1), 130 | padding=1), 131 | nn.ReLU(), 132 | nn.Conv2d(in_channels=512, 133 | out_channels=512, 134 | kernel_size=(3, 3), 135 | stride=(1, 1), 136 | padding=1), 137 | nn.ReLU(), 138 | nn.MaxPool2d(kernel_size=(2, 2), 139 | stride=(2, 2)) 140 | ) 141 | 142 | self.classifier = nn.Sequential( 143 | nn.Linear(512*2*2, 1024), # changed 4096 to 1024 144 | nn.ReLU(), 145 | nn.Linear(1024, 1024), # changed 4096 to 1024 146 | nn.ReLU(), 147 | nn.Linear(1024, num_classes) # changed 4096 to 1024 148 | ) 149 | 150 | for m in self.modules(): 151 | if isinstance(m, torch.nn.Conv2d): 152 | #n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 153 | #m.weight.data.normal_(0, np.sqrt(2. / n)) 154 | m.weight.detach().normal_(0, 0.05) 155 | if m.bias is not None: 156 | m.bias.detach().zero_() 157 | elif isinstance(m, torch.nn.Linear): 158 | m.weight.detach().normal_(0, 0.05) 159 | m.bias.detach().detach().zero_() 160 | 161 | def forward(self, x): 162 | 163 | x = self.block_1(x) 164 | x = self.block_2(x) 165 | x = self.block_3(x) 166 | x = self.block_4(x) 167 | x = self.block_5(x) 168 | 169 | logits = self.classifier(x.view(-1, 512*2*2)) 170 | probas = F.softmax(logits, dim=1) 171 | 172 | return logits, probas 173 | -------------------------------------------------------------------------------- /templates/pytorch_lightning/submit_command.sh: -------------------------------------------------------------------------------- 1 | grid run \ 2 | --instance_type 2_m60_8gb \ 3 | --framework lightning \ 4 | --gpus 2 \ 5 | tune_classification_basic.py \ 6 | --learning_rate "[0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05]" \ 7 | --batch_size "[64, 128, 256]" \ 8 | --epochs 20 9 | -------------------------------------------------------------------------------- /templates/pytorch_lightning/tune_classification_basic.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import subprocess 4 | import sys 5 | 6 | 7 | import pytorch_lightning as pl 8 | from pytorch_lightning.callbacks import ModelCheckpoint 9 | from pytorch_lightning.loggers import CSVLogger 10 | 11 | import torch 12 | from torch.utils.data import DataLoader 13 | from torchvision import transforms 14 | from torchvision import datasets 15 | from torch.utils.data.dataset import random_split 16 | 17 | 18 | def install(package): 19 | subprocess.check_call([sys.executable, "-m", "pip", "install", package]) 20 | 21 | 22 | install("torchmetrics") 23 | 24 | 25 | import torchmetrics 26 | 27 | 28 | # Argparse helper 29 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 30 | 31 | parser.add_argument("--batch_size", type=int, default=256, help="Batch size.") 32 | parser.add_argument("--epochs", type=int, default=10, help="Num. of epochs.") 33 | parser.add_argument("--workers", type=int, default=4, help="Num. of workers.") 34 | parser.add_argument("--learning_rate", type=int, default=0.005, help="Learning rate.") 35 | args = parser.parse_args() 36 | BATCH_SIZE = args.batch_size 37 | NUM_EPOCHS = args.epochs 38 | LEARNING_RATE = args.learning_rate 39 | NUM_WORKERS = args.workers 40 | 41 | 42 | ################################################################## 43 | # PYTORCH MODULE 44 | ################################################################## 45 | 46 | # Regular PyTorch Module 47 | class PyTorchModel(torch.nn.Module): 48 | def __init__(self, input_size, hidden_units, num_classes): 49 | super().__init__() 50 | 51 | # Initialize MLP layers 52 | all_layers = [] 53 | for hidden_unit in hidden_units: 54 | layer = torch.nn.Linear(input_size, hidden_unit, bias=False) 55 | all_layers.append(layer) 56 | all_layers.append(torch.nn.ReLU()) 57 | input_size = hidden_unit 58 | 59 | output_layer = torch.nn.Linear( 60 | in_features=hidden_units[-1], out_features=num_classes 61 | ) 62 | 63 | all_layers.append(output_layer) 64 | self.layers = torch.nn.Sequential(*all_layers) 65 | 66 | def forward(self, x): 67 | x = torch.flatten(x, start_dim=1) # to make it work for image inputs 68 | x = self.layers(x) 69 | return x # x are the model's logits 70 | 71 | 72 | ################################################################## 73 | # PYTORCH LIGHTNING MODULE 74 | ################################################################## 75 | 76 | # LightningModule that receives a PyTorch model as input 77 | class LightningModel(pl.LightningModule): 78 | def __init__(self, model, learning_rate): 79 | super().__init__() 80 | 81 | self.learning_rate = learning_rate 82 | # The inherited PyTorch module 83 | self.model = model 84 | if hasattr(model, "dropout_proba"): 85 | self.dropout_proba = model.dropout_proba 86 | 87 | # Save settings and hyperparameters to the log directory 88 | # but skip the model parameters 89 | self.save_hyperparameters(ignore=["model"]) 90 | 91 | # Set up attributes for computing the accuracy 92 | self.train_acc = torchmetrics.Accuracy() 93 | self.valid_acc = torchmetrics.Accuracy() 94 | self.test_acc = torchmetrics.Accuracy() 95 | 96 | # Defining the forward method is only necessary 97 | # if you want to use a Trainer's .predict() method (optional) 98 | def forward(self, x): 99 | return self.model(x) 100 | 101 | # A common forward step to compute the loss and labels 102 | # this is used for training, validation, and testing below 103 | def _shared_step(self, batch): 104 | features, true_labels = batch 105 | logits = self(features) 106 | loss = torch.nn.functional.cross_entropy(logits, true_labels) 107 | predicted_labels = torch.argmax(logits, dim=1) 108 | 109 | return loss, true_labels, predicted_labels 110 | 111 | def training_step(self, batch, batch_idx): 112 | loss, true_labels, predicted_labels = self._shared_step(batch) 113 | self.log("train_loss", loss) 114 | 115 | # Do another forward pass in .eval() mode to compute accuracy 116 | # while accountingfor Dropout, BatchNorm etc. behavior 117 | # during evaluation (inference) 118 | self.model.eval() 119 | with torch.no_grad(): 120 | _, true_labels, predicted_labels = self._shared_step(batch) 121 | self.train_acc(predicted_labels, true_labels) 122 | self.log("train_acc", self.train_acc, on_epoch=True, on_step=False) 123 | self.model.train() 124 | 125 | return loss # this is passed to the optimzer for training 126 | 127 | def validation_step(self, batch, batch_idx): 128 | loss, true_labels, predicted_labels = self._shared_step(batch) 129 | self.log("valid_loss", loss) 130 | self.valid_acc(predicted_labels, true_labels) 131 | self.log( 132 | "valid_acc", 133 | self.valid_acc, 134 | on_epoch=True, 135 | on_step=False, 136 | prog_bar=True, 137 | ) 138 | 139 | def test_step(self, batch, batch_idx): 140 | loss, true_labels, predicted_labels = self._shared_step(batch) 141 | self.test_acc(predicted_labels, true_labels) 142 | self.log("test_acc", self.test_acc, on_epoch=True, on_step=False) 143 | 144 | def configure_optimizers(self): 145 | optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate) 146 | return optimizer 147 | 148 | 149 | ################################################################## 150 | # DATA MODULE 151 | ################################################################## 152 | 153 | 154 | class DataModule(pl.LightningDataModule): 155 | def __init__(self, data_path="./"): 156 | super().__init__() 157 | self.data_path = data_path 158 | 159 | def prepare_data(self): 160 | datasets.MNIST(root=self.data_path, download=True) 161 | return 162 | 163 | def setup(self, stage=None): 164 | # Note transforms.ToTensor() scales input images 165 | # to 0-1 range 166 | train = datasets.MNIST( 167 | root=self.data_path, 168 | train=True, 169 | transform=transforms.ToTensor(), 170 | download=False, 171 | ) 172 | 173 | self.test = datasets.MNIST( 174 | root=self.data_path, 175 | train=False, 176 | transform=transforms.ToTensor(), 177 | download=False, 178 | ) 179 | 180 | self.train, self.valid = random_split(train, lengths=[55000, 5000]) 181 | 182 | def train_dataloader(self): 183 | train_loader = DataLoader( 184 | dataset=self.train, 185 | batch_size=BATCH_SIZE, 186 | drop_last=True, 187 | shuffle=True, 188 | num_workers=NUM_WORKERS, 189 | ) 190 | return train_loader 191 | 192 | def val_dataloader(self): 193 | valid_loader = DataLoader( 194 | dataset=self.valid, 195 | batch_size=BATCH_SIZE, 196 | drop_last=False, 197 | shuffle=False, 198 | num_workers=NUM_WORKERS, 199 | ) 200 | return valid_loader 201 | 202 | def test_dataloader(self): 203 | test_loader = DataLoader( 204 | dataset=self.test, 205 | batch_size=BATCH_SIZE, 206 | drop_last=False, 207 | shuffle=False, 208 | num_workers=NUM_WORKERS, 209 | ) 210 | return test_loader 211 | 212 | 213 | ################################################################## 214 | # INITIALIZE MODELS AND TRAINER 215 | ################################################################## 216 | 217 | pytorch_model = PyTorchModel(input_size=28 * 28, hidden_units=(128, 256), num_classes=10) 218 | 219 | lightning_model = LightningModel(pytorch_model, learning_rate=LEARNING_RATE) 220 | 221 | callbacks = [ 222 | ModelCheckpoint(save_top_k=1, mode="max", monitor="valid_acc") 223 | ] # save top 1 model 224 | logger = CSVLogger(save_dir="logs/", name="my-model") 225 | 226 | torch.manual_seed(1) 227 | data_module = DataModule(data_path="./data") 228 | 229 | trainer = pl.Trainer( 230 | max_epochs=NUM_EPOCHS, 231 | callbacks=callbacks, 232 | progress_bar_refresh_rate=50, # recommended for notebooks 233 | accelerator="auto", # Uses GPUs or TPUs if available 234 | devices="auto", # Uses all available GPUs/TPUs if applicable 235 | logger=logger, 236 | deterministic=True, 237 | log_every_n_steps=10, 238 | ) 239 | 240 | ################################################################## 241 | # TRAIN AND EVALUATE 242 | ################################################################## 243 | 244 | start_time = time.time() 245 | trainer.fit(model=lightning_model, datamodule=data_module) 246 | 247 | runtime = (time.time() - start_time) / 60 248 | print(f"Training took {runtime:.2f} min in total.") 249 | 250 | trainer.test(model=lightning_model, datamodule=data_module, ckpt_path="best") 251 | -------------------------------------------------------------------------------- /tensorflow1_ipynb/basic-ml/softmax-regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n", 8 | "- Author: Sebastian Raschka\n", 9 | "- GitHub Repository: https://github.com/rasbt/deeplearning-models" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Sebastian Raschka \n", 22 | "\n", 23 | "CPython 3.6.1\n", 24 | "IPython 6.0.0\n", 25 | "\n", 26 | "tensorflow 1.2.0\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "%load_ext watermark\n", 32 | "%watermark -a 'Sebastian Raschka' -v -p tensorflow" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "# Model Zoo -- Softmax Regression" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "Implementation of softmax regression (multinomial logistic regression)." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "Extracting ./train-images-idx3-ubyte.gz\n", 59 | "Extracting ./train-labels-idx1-ubyte.gz\n", 60 | "Extracting ./t10k-images-idx3-ubyte.gz\n", 61 | "Extracting ./t10k-labels-idx1-ubyte.gz\n", 62 | "Epoch: 001 | AvgCost: 0.476 | Train/Valid ACC: 0.903/0.909\n", 63 | "Epoch: 002 | AvgCost: 0.339 | Train/Valid ACC: 0.911/0.918\n", 64 | "Epoch: 003 | AvgCost: 0.320 | Train/Valid ACC: 0.915/0.922\n", 65 | "Epoch: 004 | AvgCost: 0.309 | Train/Valid ACC: 0.918/0.923\n", 66 | "Epoch: 005 | AvgCost: 0.301 | Train/Valid ACC: 0.918/0.922\n", 67 | "Epoch: 006 | AvgCost: 0.296 | Train/Valid ACC: 0.919/0.922\n", 68 | "Epoch: 007 | AvgCost: 0.291 | Train/Valid ACC: 0.921/0.925\n", 69 | "Epoch: 008 | AvgCost: 0.287 | Train/Valid ACC: 0.922/0.925\n", 70 | "Epoch: 009 | AvgCost: 0.286 | Train/Valid ACC: 0.922/0.926\n", 71 | "Epoch: 010 | AvgCost: 0.283 | Train/Valid ACC: 0.923/0.926\n", 72 | "Epoch: 011 | AvgCost: 0.282 | Train/Valid ACC: 0.923/0.924\n", 73 | "Epoch: 012 | AvgCost: 0.278 | Train/Valid ACC: 0.925/0.927\n", 74 | "Epoch: 013 | AvgCost: 0.278 | Train/Valid ACC: 0.925/0.928\n", 75 | "Epoch: 014 | AvgCost: 0.276 | Train/Valid ACC: 0.925/0.925\n", 76 | "Epoch: 015 | AvgCost: 0.276 | Train/Valid ACC: 0.926/0.928\n", 77 | "Epoch: 016 | AvgCost: 0.274 | Train/Valid ACC: 0.927/0.927\n", 78 | "Epoch: 017 | AvgCost: 0.270 | Train/Valid ACC: 0.927/0.925\n", 79 | "Epoch: 018 | AvgCost: 0.273 | Train/Valid ACC: 0.927/0.930\n", 80 | "Epoch: 019 | AvgCost: 0.270 | Train/Valid ACC: 0.927/0.929\n", 81 | "Epoch: 020 | AvgCost: 0.268 | Train/Valid ACC: 0.927/0.927\n", 82 | "Epoch: 021 | AvgCost: 0.268 | Train/Valid ACC: 0.927/0.926\n", 83 | "Epoch: 022 | AvgCost: 0.270 | Train/Valid ACC: 0.928/0.926\n", 84 | "Epoch: 023 | AvgCost: 0.268 | Train/Valid ACC: 0.927/0.926\n", 85 | "Epoch: 024 | AvgCost: 0.266 | Train/Valid ACC: 0.929/0.926\n", 86 | "Epoch: 025 | AvgCost: 0.261 | Train/Valid ACC: 0.927/0.926\n", 87 | "Epoch: 026 | AvgCost: 0.269 | Train/Valid ACC: 0.929/0.927\n", 88 | "Epoch: 027 | AvgCost: 0.265 | Train/Valid ACC: 0.928/0.928\n", 89 | "Epoch: 028 | AvgCost: 0.261 | Train/Valid ACC: 0.929/0.928\n", 90 | "Epoch: 029 | AvgCost: 0.266 | Train/Valid ACC: 0.930/0.926\n", 91 | "Epoch: 030 | AvgCost: 0.261 | Train/Valid ACC: 0.929/0.924\n", 92 | "Test ACC: 0.925\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "import tensorflow as tf\n", 98 | "from tensorflow.examples.tutorials.mnist import input_data\n", 99 | "\n", 100 | "\n", 101 | "##########################\n", 102 | "### DATASET\n", 103 | "##########################\n", 104 | "\n", 105 | "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n", 106 | "\n", 107 | "\n", 108 | "##########################\n", 109 | "### SETTINGS\n", 110 | "##########################\n", 111 | "\n", 112 | "# Hyperparameters\n", 113 | "learning_rate = 0.5\n", 114 | "training_epochs = 30\n", 115 | "batch_size = 256\n", 116 | "\n", 117 | "# Architecture\n", 118 | "n_features = 784\n", 119 | "n_classes = 10\n", 120 | "\n", 121 | "\n", 122 | "##########################\n", 123 | "### GRAPH DEFINITION\n", 124 | "##########################\n", 125 | "\n", 126 | "g = tf.Graph()\n", 127 | "with g.as_default():\n", 128 | "\n", 129 | " # Input data\n", 130 | " tf_x = tf.placeholder(tf.float32, [None, n_features])\n", 131 | " tf_y = tf.placeholder(tf.float32, [None, n_classes])\n", 132 | "\n", 133 | " # Model parameters\n", 134 | " params = {\n", 135 | " 'weights': tf.Variable(tf.zeros(shape=[n_features, n_classes],\n", 136 | " dtype=tf.float32), name='weights'),\n", 137 | " 'bias': tf.Variable([[n_classes]], dtype=tf.float32, name='bias')}\n", 138 | "\n", 139 | " # Softmax regression\n", 140 | " linear = tf.matmul(tf_x, params['weights']) + params['bias']\n", 141 | " pred_proba = tf.nn.softmax(linear, name='predict_probas')\n", 142 | " \n", 143 | " # Loss and optimizer\n", 144 | " cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\n", 145 | " logits=linear, labels=tf_y), name='cost')\n", 146 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", 147 | " train = optimizer.minimize(cost, name='train')\n", 148 | "\n", 149 | " # Class prediction\n", 150 | " pred_labels = tf.argmax(pred_proba, 1, name='predict_labels')\n", 151 | " correct_prediction = tf.equal(tf.argmax(tf_y, 1), pred_labels)\n", 152 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')\n", 153 | "\n", 154 | " \n", 155 | "##########################\n", 156 | "### TRAINING & EVALUATION\n", 157 | "##########################\n", 158 | "\n", 159 | "with tf.Session(graph=g) as sess:\n", 160 | " sess.run(tf.global_variables_initializer())\n", 161 | "\n", 162 | " for epoch in range(training_epochs):\n", 163 | " avg_cost = 0.\n", 164 | " total_batch = mnist.train.num_examples // batch_size\n", 165 | "\n", 166 | " for i in range(total_batch):\n", 167 | " batch_x, batch_y = mnist.train.next_batch(batch_size)\n", 168 | " _, c = sess.run(['train', 'cost:0'], feed_dict={tf_x: batch_x,\n", 169 | " tf_y: batch_y})\n", 170 | " avg_cost += c\n", 171 | " \n", 172 | " train_acc = sess.run('accuracy:0', feed_dict={tf_x: mnist.train.images,\n", 173 | " tf_y: mnist.train.labels})\n", 174 | " valid_acc = sess.run('accuracy:0', feed_dict={tf_x: mnist.validation.images,\n", 175 | " tf_y: mnist.validation.labels}) \n", 176 | " \n", 177 | " print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)), end=\"\")\n", 178 | " print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n", 179 | " \n", 180 | " test_acc = sess.run(accuracy, feed_dict={tf_x: mnist.test.images,\n", 181 | " tf_y: mnist.test.labels})\n", 182 | " print('Test ACC: %.3f' % test_acc)" 183 | ] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python 3", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.7.1" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 2 207 | } 208 | -------------------------------------------------------------------------------- /tensorflow1_ipynb/cnn/cnn-basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n", 8 | "- Author: Sebastian Raschka\n", 9 | "- GitHub Repository: https://github.com/rasbt/deeplearning-models" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Sebastian Raschka \n", 22 | "\n", 23 | "CPython 3.6.8\n", 24 | "IPython 7.2.0\n", 25 | "\n", 26 | "tensorflow 1.12.0\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "%load_ext watermark\n", 32 | "%watermark -a 'Sebastian Raschka' -v -p tensorflow" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "# Model Zoo -- Convolutional Neural Network" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "### Low-level Implementation" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "WARNING:tensorflow:From :10: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 59 | "Instructions for updating:\n", 60 | "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n", 61 | "WARNING:tensorflow:From /home/raschka/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n", 62 | "Instructions for updating:\n", 63 | "Please write your own downloading logic.\n", 64 | "WARNING:tensorflow:From /home/raschka/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:262: extract_images (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 65 | "Instructions for updating:\n", 66 | "Please use tf.data to implement this functionality.\n", 67 | "Extracting ./train-images-idx3-ubyte.gz\n", 68 | "WARNING:tensorflow:From /home/raschka/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 69 | "Instructions for updating:\n", 70 | "Please use tf.data to implement this functionality.\n", 71 | "Extracting ./train-labels-idx1-ubyte.gz\n", 72 | "WARNING:tensorflow:From /home/raschka/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:110: dense_to_one_hot (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 73 | "Instructions for updating:\n", 74 | "Please use tf.one_hot on tensors.\n", 75 | "Extracting ./t10k-images-idx3-ubyte.gz\n", 76 | "Extracting ./t10k-labels-idx1-ubyte.gz\n", 77 | "WARNING:tensorflow:From /home/raschka/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:290: DataSet.__init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 78 | "Instructions for updating:\n", 79 | "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | "import tensorflow as tf\n", 85 | "from functools import reduce\n", 86 | "from tensorflow.examples.tutorials.mnist import input_data\n", 87 | "\n", 88 | "\n", 89 | "##########################\n", 90 | "### DATASET\n", 91 | "##########################\n", 92 | "\n", 93 | "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n", 94 | "\n", 95 | "\n", 96 | "##########################\n", 97 | "### SETTINGS\n", 98 | "##########################\n", 99 | "\n", 100 | "# Hyperparameters\n", 101 | "learning_rate = 0.1\n", 102 | "dropout_keep_proba = 0.5\n", 103 | "epochs = 3\n", 104 | "batch_size = 32\n", 105 | "\n", 106 | "# Architecture\n", 107 | "input_size = 784\n", 108 | "image_width, image_height = 28, 28\n", 109 | "n_classes = 10\n", 110 | "\n", 111 | "# Other\n", 112 | "print_interval = 500\n", 113 | "random_seed = 123\n", 114 | "\n", 115 | "\n", 116 | "##########################\n", 117 | "### WRAPPER FUNCTIONS\n", 118 | "##########################\n", 119 | "\n", 120 | "def conv2d(input_tensor, output_channels,\n", 121 | " kernel_size=(5, 5), strides=(1, 1, 1, 1),\n", 122 | " padding='SAME', activation=None, seed=None,\n", 123 | " name='conv2d'):\n", 124 | "\n", 125 | " with tf.name_scope(name):\n", 126 | " input_channels = input_tensor.get_shape().as_list()[-1]\n", 127 | " weights_shape = (kernel_size[0], kernel_size[1],\n", 128 | " input_channels, output_channels)\n", 129 | "\n", 130 | " weights = tf.Variable(tf.truncated_normal(shape=weights_shape,\n", 131 | " mean=0.0,\n", 132 | " stddev=0.01,\n", 133 | " dtype=tf.float32,\n", 134 | " seed=seed),\n", 135 | " name='weights')\n", 136 | " biases = tf.Variable(tf.zeros(shape=(output_channels,)), name='biases')\n", 137 | " conv = tf.nn.conv2d(input=input_tensor,\n", 138 | " filter=weights,\n", 139 | " strides=strides,\n", 140 | " padding=padding)\n", 141 | "\n", 142 | " act = conv + biases\n", 143 | " if activation is not None:\n", 144 | " act = activation(conv + biases)\n", 145 | " return act\n", 146 | "\n", 147 | "\n", 148 | "def fully_connected(input_tensor, output_nodes,\n", 149 | " activation=None, seed=None,\n", 150 | " name='fully_connected'):\n", 151 | "\n", 152 | " with tf.name_scope(name):\n", 153 | " input_nodes = input_tensor.get_shape().as_list()[1]\n", 154 | " weights = tf.Variable(tf.truncated_normal(shape=(input_nodes,\n", 155 | " output_nodes),\n", 156 | " mean=0.0,\n", 157 | " stddev=0.01,\n", 158 | " dtype=tf.float32,\n", 159 | " seed=seed),\n", 160 | " name='weights')\n", 161 | " biases = tf.Variable(tf.zeros(shape=[output_nodes]), name='biases')\n", 162 | "\n", 163 | " act = tf.matmul(input_tensor, weights) + biases\n", 164 | " if activation is not None:\n", 165 | " act = activation(act)\n", 166 | " return act\n", 167 | "\n", 168 | " \n", 169 | "##########################\n", 170 | "### GRAPH DEFINITION\n", 171 | "##########################\n", 172 | "\n", 173 | "g = tf.Graph()\n", 174 | "with g.as_default():\n", 175 | " \n", 176 | " tf.set_random_seed(random_seed)\n", 177 | "\n", 178 | " # Input data\n", 179 | " tf_x = tf.placeholder(tf.float32, [None, input_size, 1], name='inputs')\n", 180 | " tf_y = tf.placeholder(tf.float32, [None, n_classes], name='targets')\n", 181 | " \n", 182 | " keep_proba = tf.placeholder(tf.float32, shape=None, name='keep_proba')\n", 183 | "\n", 184 | " # Convolutional Neural Network:\n", 185 | " # 2 convolutional layers with maxpool and ReLU activation\n", 186 | " input_layer = tf.reshape(tf_x, shape=[-1, image_width, image_height, 1])\n", 187 | " \n", 188 | " conv1 = conv2d(input_tensor=input_layer,\n", 189 | " output_channels=8,\n", 190 | " kernel_size=(3, 3),\n", 191 | " strides=(1, 1, 1, 1),\n", 192 | " activation=tf.nn.relu,\n", 193 | " name='conv1')\n", 194 | " \n", 195 | " pool1 = tf.nn.max_pool(conv1,\n", 196 | " ksize=(1, 2, 2, 1), \n", 197 | " strides=(1, 2, 2, 1),\n", 198 | " padding='SAME',\n", 199 | " name='maxpool1')\n", 200 | " \n", 201 | " conv2 = conv2d(input_tensor=pool1,\n", 202 | " output_channels=16,\n", 203 | " kernel_size=(3, 3),\n", 204 | " strides=(1, 1, 1, 1),\n", 205 | " activation=tf.nn.relu,\n", 206 | " name='conv2')\n", 207 | " \n", 208 | " pool2 = tf.nn.max_pool(conv2,\n", 209 | " ksize=(1, 2, 2, 1), \n", 210 | " strides=(1, 2, 2, 1),\n", 211 | " padding='SAME',\n", 212 | " name='maxpool2')\n", 213 | " \n", 214 | " dims = pool2.get_shape().as_list()[1:]\n", 215 | " dims = reduce(lambda x, y: x * y, dims, 1)\n", 216 | " flat = tf.reshape(pool2, shape=(-1, dims))\n", 217 | " \n", 218 | " out_layer = fully_connected(flat, n_classes, activation=None, \n", 219 | " name='logits')\n", 220 | "\n", 221 | " # Loss and optimizer\n", 222 | " loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=out_layer, labels=tf_y)\n", 223 | " cost = tf.reduce_mean(loss, name='cost')\n", 224 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", 225 | " train = optimizer.minimize(cost, name='train')\n", 226 | "\n", 227 | " # Prediction\n", 228 | " correct_prediction = tf.equal(tf.argmax(tf_y, 1), \n", 229 | " tf.argmax(out_layer, 1), \n", 230 | " name='correct_prediction')\n", 231 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, \n", 232 | " tf.float32), \n", 233 | " name='accuracy')" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 3, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "Minibatch: 001 | Cost: 2.303\n", 246 | "Minibatch: 501 | Cost: 0.225\n", 247 | "Minibatch: 1001 | Cost: 0.106\n", 248 | "Minibatch: 1501 | Cost: 0.039\n", 249 | "Epoch: 001 | AvgCost: 0.530 | Train/Valid ACC: 0.966/0.964\n", 250 | "Minibatch: 001 | Cost: 0.051\n", 251 | "Minibatch: 501 | Cost: 0.035\n", 252 | "Minibatch: 1001 | Cost: 0.043\n", 253 | "Minibatch: 1501 | Cost: 0.058\n", 254 | "Epoch: 002 | AvgCost: 0.102 | Train/Valid ACC: 0.967/0.968\n", 255 | "Minibatch: 001 | Cost: 0.019\n", 256 | "Minibatch: 501 | Cost: 0.132\n", 257 | "Minibatch: 1001 | Cost: 0.064\n", 258 | "Minibatch: 1501 | Cost: 0.011\n", 259 | "Epoch: 003 | AvgCost: 0.076 | Train/Valid ACC: 0.978/0.978\n", 260 | "Test ACC: 0.980\n" 261 | ] 262 | } 263 | ], 264 | "source": [ 265 | "import numpy as np\n", 266 | "\n", 267 | "##########################\n", 268 | "### TRAINING & EVALUATION\n", 269 | "##########################\n", 270 | "\n", 271 | "with tf.Session(graph=g) as sess:\n", 272 | " sess.run(tf.global_variables_initializer())\n", 273 | "\n", 274 | " np.random.seed(random_seed) # random seed for mnist iterator\n", 275 | " for epoch in range(1, epochs + 1):\n", 276 | " avg_cost = 0.\n", 277 | " total_batch = mnist.train.num_examples // batch_size\n", 278 | "\n", 279 | " for i in range(total_batch):\n", 280 | " batch_x, batch_y = mnist.train.next_batch(batch_size)\n", 281 | " batch_x = batch_x[:, :, None] # add \"missing\" color channel\n", 282 | " \n", 283 | " _, c = sess.run(['train', 'cost:0'], \n", 284 | " feed_dict={'inputs:0': batch_x,\n", 285 | " 'targets:0': batch_y,\n", 286 | " 'keep_proba:0': dropout_keep_proba})\n", 287 | " avg_cost += c\n", 288 | " if not i % print_interval:\n", 289 | " print(\"Minibatch: %03d | Cost: %.3f\" % (i + 1, c))\n", 290 | " \n", 291 | " train_acc = sess.run('accuracy:0', \n", 292 | " feed_dict={'inputs:0': mnist.train.images[:, :, None],\n", 293 | " 'targets:0': mnist.train.labels,\n", 294 | " 'keep_proba:0': 1.0})\n", 295 | " valid_acc = sess.run('accuracy:0', \n", 296 | " feed_dict={'inputs:0': mnist.validation.images[:, :, None],\n", 297 | " 'targets:0': mnist.validation.labels,\n", 298 | " 'keep_proba:0': 1.0})\n", 299 | " \n", 300 | " print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch, avg_cost / (i + 1)), end=\"\")\n", 301 | " print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n", 302 | " \n", 303 | " test_acc = sess.run('accuracy:0', \n", 304 | " feed_dict={'inputs:0': mnist.test.images[:, :, None],\n", 305 | " 'targets:0': mnist.test.labels,\n", 306 | " 'keep_proba:0': 1.0})\n", 307 | " \n", 308 | " print('Test ACC: %.3f' % test_acc)" 309 | ] 310 | } 311 | ], 312 | "metadata": { 313 | "kernelspec": { 314 | "display_name": "Python 3", 315 | "language": "python", 316 | "name": "python3" 317 | }, 318 | "language_info": { 319 | "codemirror_mode": { 320 | "name": "ipython", 321 | "version": 3 322 | }, 323 | "file_extension": ".py", 324 | "mimetype": "text/x-python", 325 | "name": "python", 326 | "nbconvert_exporter": "python", 327 | "pygments_lexer": "ipython3", 328 | "version": "3.6.8" 329 | } 330 | }, 331 | "nbformat": 4, 332 | "nbformat_minor": 2 333 | } 334 | -------------------------------------------------------------------------------- /tensorflow1_ipynb/data/perceptron_toydata.txt: -------------------------------------------------------------------------------- 1 | 0.77 -1.14 0 2 | -0.33 1.44 0 3 | 0.91 -3.07 0 4 | -0.37 -1.91 0 5 | -1.84 -1.13 0 6 | -1.50 0.34 0 7 | -0.63 -1.53 0 8 | -1.08 -1.23 0 9 | 0.39 -1.99 0 10 | -1.26 -2.90 0 11 | -5.27 -0.78 0 12 | -0.49 -2.74 0 13 | 1.48 -3.74 0 14 | -1.64 -1.96 0 15 | 0.45 0.36 0 16 | -1.48 -1.17 0 17 | -2.94 -4.47 0 18 | -2.19 -1.48 0 19 | 0.02 -0.02 0 20 | -2.24 -2.12 0 21 | -3.17 -3.69 0 22 | -4.09 1.03 0 23 | -2.41 -2.31 0 24 | -3.45 -0.61 0 25 | -3.96 -2.00 0 26 | -2.95 -1.16 0 27 | -2.42 -3.35 0 28 | -1.74 -1.10 0 29 | -1.61 -1.28 0 30 | -2.59 -2.21 0 31 | -2.64 -2.20 0 32 | -2.84 -4.12 0 33 | -1.45 -2.26 0 34 | -3.98 -1.05 0 35 | -2.97 -1.63 0 36 | -0.68 -1.52 0 37 | -0.10 -3.43 0 38 | -1.14 -2.66 0 39 | -2.92 -2.51 0 40 | -2.14 -1.62 0 41 | -3.33 -0.44 0 42 | -1.05 -3.85 0 43 | 0.38 0.95 0 44 | -0.05 -1.95 0 45 | -3.20 -0.22 0 46 | -2.26 0.01 0 47 | -1.41 -0.33 0 48 | -1.20 -0.71 0 49 | -1.69 0.80 0 50 | -1.52 -1.14 0 51 | 3.88 0.65 1 52 | 0.73 2.97 1 53 | 0.83 3.94 1 54 | 1.59 1.25 1 55 | 3.92 3.48 1 56 | 3.87 2.91 1 57 | 1.14 3.91 1 58 | 1.73 2.80 1 59 | 2.95 1.84 1 60 | 2.61 2.92 1 61 | 2.38 0.90 1 62 | 2.30 3.33 1 63 | 1.31 1.85 1 64 | 1.56 3.85 1 65 | 2.67 2.41 1 66 | 1.23 2.54 1 67 | 1.33 2.03 1 68 | 1.36 2.68 1 69 | 2.58 1.79 1 70 | 2.40 0.91 1 71 | 0.51 2.44 1 72 | 2.17 2.64 1 73 | 4.38 2.94 1 74 | 1.09 3.12 1 75 | 0.68 1.54 1 76 | 1.93 3.71 1 77 | 1.26 1.17 1 78 | 1.90 1.34 1 79 | 3.13 0.92 1 80 | 0.85 1.56 1 81 | 1.50 3.93 1 82 | 2.95 2.09 1 83 | 0.77 2.84 1 84 | 1.00 0.46 1 85 | 3.19 2.32 1 86 | 2.92 2.32 1 87 | 2.86 1.35 1 88 | 0.97 2.68 1 89 | 1.20 1.31 1 90 | 1.54 2.02 1 91 | 1.65 0.63 1 92 | 1.36 -0.22 1 93 | 2.63 0.40 1 94 | 0.90 2.05 1 95 | 1.26 3.54 1 96 | 0.71 2.27 1 97 | 1.96 0.83 1 98 | 2.52 1.83 1 99 | 2.77 2.82 1 100 | 4.16 3.34 1 101 | -------------------------------------------------------------------------------- /tensorflow1_ipynb/helper.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2016-2017 2 | # 3 | # Supporting code for the book 4 | # "Introduction to Artificial Neural Networks and Deep Learning: 5 | # A Practical Guide with Applications in Python" 6 | # 7 | # Source: https://github.com/rasbt/deep-learning-book 8 | # Author: Sebastian Raschka 9 | # License: MIT 10 | 11 | 12 | from urllib.request import urlretrieve 13 | import shutil 14 | import glob 15 | import tarfile 16 | import os 17 | import sys 18 | import pickle 19 | import numpy as np 20 | import scipy.misc 21 | from tensorflow.examples.tutorials.mnist import input_data 22 | 23 | 24 | def download_and_extract_cifar(target_dir, 25 | cifar_url='http://www.cs.toronto.edu/' 26 | '~kriz/cifar-10-python.tar.gz'): 27 | 28 | if not os.path.exists(target_dir): 29 | os.mkdir(target_dir) 30 | 31 | fbase = os.path.basename(cifar_url) 32 | fpath = os.path.join(target_dir, fbase) 33 | 34 | if not os.path.exists(fpath): 35 | def get_progress(count, block_size, total_size): 36 | sys.stdout.write('\rDownloading ... %s %d%%' % (fbase, 37 | float(count * block_size) / 38 | float(total_size) * 100.0)) 39 | sys.stdout.flush() 40 | local_filename, headers = urlretrieve(cifar_url, 41 | fpath, 42 | reporthook=get_progress) 43 | sys.stdout.write('\nDownloaded') 44 | 45 | else: 46 | sys.stdout.write('Found existing') 47 | 48 | statinfo = os.stat(fpath) 49 | file_size = statinfo.st_size / 1024**2 50 | sys.stdout.write(' %s (%.1f Mb)\n' % (fbase, file_size)) 51 | sys.stdout.write('Extracting %s ...\n' % fbase) 52 | sys.stdout.flush() 53 | 54 | with tarfile.open(fpath, 'r:gz') as t: 55 | def is_within_directory(directory, target): 56 | 57 | abs_directory = os.path.abspath(directory) 58 | abs_target = os.path.abspath(target) 59 | 60 | prefix = os.path.commonprefix([abs_directory, abs_target]) 61 | 62 | return prefix == abs_directory 63 | 64 | def safe_extract(tar, path=".", members=None, *, numeric_owner=False): 65 | 66 | for member in tar.getmembers(): 67 | member_path = os.path.join(path, member.name) 68 | if not is_within_directory(path, member_path): 69 | raise Exception("Attempted Path Traversal in Tar File") 70 | 71 | tar.extractall(path, members, numeric_owner=numeric_owner) 72 | 73 | 74 | safe_extract(t, target_dir) 75 | 76 | return fpath.replace('cifar-10-python.tar.gz', 'cifar-10-batches-py') 77 | 78 | 79 | def unpickle_cifar(fpath): 80 | with open(fpath, 'rb') as f: 81 | dct = pickle.load(f, encoding='bytes') 82 | return dct 83 | 84 | 85 | class Cifar10Loader(): 86 | def __init__(self, cifar_path, normalize=False, 87 | channel_mean_center=False, zero_center=False): 88 | self.cifar_path = cifar_path 89 | self.batchnames = [os.path.join(self.cifar_path, f) 90 | for f in os.listdir(self.cifar_path) 91 | if f.startswith('data_batch')] 92 | self.testname = os.path.join(self.cifar_path, 'test_batch') 93 | self.num_train = self.count_train() 94 | self.num_test = self.count_test() 95 | self.normalize = normalize 96 | self.channel_mean_center = channel_mean_center 97 | self.zero_center = zero_center 98 | self.train_mean = None 99 | 100 | def _compute_train_mean(self): 101 | 102 | cum_mean = np.zeros((1, 1, 1, 3)) 103 | 104 | for batch in self.batchnames: 105 | dct = unpickle_cifar(batch) 106 | dct[b'labels'] = np.array(dct[b'labels'], dtype=int) 107 | dct[b'data'] = dct[b'data'].reshape( 108 | dct[b'data'].shape[0], 3, 32, 32).transpose(0, 2, 3, 1) 109 | mean = dct[b'data'].mean(axis=(0, 1, 2), keepdims=True) 110 | cum_mean += mean 111 | 112 | self.train_mean = cum_mean / len(self.batchnames) 113 | 114 | return None 115 | 116 | def load_test(self, onehot=True): 117 | dct = unpickle_cifar(self.testname) 118 | dct[b'labels'] = np.array(dct[b'labels'], dtype=int) 119 | 120 | dct[b'data'] = dct[b'data'].reshape( 121 | dct[b'data'].shape[0], 3, 32, 32).transpose(0, 2, 3, 1) 122 | 123 | if onehot: 124 | dct[b'labels'] = (np.arange(10) == 125 | dct[b'labels'][:, None]).astype(int) 126 | 127 | if self.normalize: 128 | dct[b'data'] = dct[b'data'].astype(np.float32) 129 | dct[b'data'] = dct[b'data'] / 255.0 130 | 131 | if self.channel_mean_center: 132 | if self.train_mean is None: 133 | self._compute_train_mean() 134 | dct[b'data'] -= self.train_mean 135 | 136 | if self.zero_center: 137 | if self.normalize: 138 | dct[b'data'] -= .5 139 | else: 140 | dct[b'data'] -= 127.5 141 | 142 | return dct[b'data'], dct[b'labels'] 143 | 144 | def load_train_epoch(self, batch_size=50, onehot=True, 145 | shuffle=False, seed=None): 146 | 147 | rgen = np.random.RandomState(seed) 148 | 149 | for batch in self.batchnames: 150 | dct = unpickle_cifar(batch) 151 | dct[b'labels'] = np.array(dct[b'labels'], dtype=int) 152 | dct[b'data'] = dct[b'data'].reshape( 153 | dct[b'data'].shape[0], 3, 32, 32).transpose(0, 2, 3, 1) 154 | 155 | if onehot: 156 | dct[b'labels'] = (np.arange(10) == 157 | dct[b'labels'][:, None]).astype(int) 158 | 159 | if self.normalize: 160 | dct[b'data'] = dct[b'data'].astype(np.float32) 161 | dct[b'data'] = dct[b'data'] / 255.0 162 | 163 | if self.channel_mean_center: 164 | if self.train_mean is None: 165 | self._compute_train_mean() 166 | dct[b'data'] -= self.train_mean 167 | 168 | if self.zero_center: 169 | if self.normalize: 170 | dct[b'data'] -= .5 171 | else: 172 | dct[b'data'] -= 127.5 173 | 174 | arrays = [dct[b'data'], dct[b'labels']] 175 | del dct 176 | indices = np.arange(arrays[0].shape[0]) 177 | 178 | if shuffle: 179 | rgen.shuffle(indices) 180 | 181 | for start_idx in range(0, indices.shape[0] - batch_size + 1, 182 | batch_size): 183 | index_slice = indices[start_idx:start_idx + batch_size] 184 | yield (ary[index_slice] for ary in arrays) 185 | 186 | def count_train(self): 187 | cnt = 0 188 | for f in self.batchnames: 189 | dct = unpickle_cifar(f) 190 | cnt += len(dct[b'labels']) 191 | return cnt 192 | 193 | def count_test(self): 194 | dct = unpickle_cifar(self.testname) 195 | return len(dct[b'labels']) 196 | 197 | 198 | def mnist_export_to_jpg(path='./'): 199 | 200 | mnist = input_data.read_data_sets("./", one_hot=False) 201 | 202 | batch_x, batch_y = mnist.train.next_batch(50000) 203 | cnt = -1 204 | 205 | def remove_incomplete_existing(path_prefix, expect_files): 206 | dir_path = os.path.join(path, 'mnist_%s' % path_prefix) 207 | 208 | is_empty = False 209 | if not os.path.exists(dir_path): 210 | for i in range(10): 211 | outpath = os.path.join(path, dir_path, str(i)) 212 | if not os.path.exists(outpath): 213 | os.makedirs(outpath) 214 | is_empty = True 215 | else: 216 | num_existing_files = len(glob.glob('%s/*/*.jpg' % dir_path)) 217 | if num_existing_files > 0 and num_existing_files < expect_files: 218 | shutil.rmtree(dir_path) 219 | is_empty = True 220 | for i in range(10): 221 | outpath = os.path.join(path, dir_path, str(i)) 222 | if not os.path.exists(outpath): 223 | os.makedirs(outpath) 224 | return is_empty 225 | 226 | is_empty = remove_incomplete_existing(path_prefix='train', 227 | expect_files=45000) 228 | if is_empty: 229 | for data, label in zip(batch_x[:45000], batch_y[:45000]): 230 | cnt += 1 231 | outpath = os.path.join(path, 'mnist_train/%d/%05d.jpg' % 232 | (label, cnt)) 233 | scipy.misc.imsave(outpath, (data*255).reshape(28, 28)) 234 | 235 | is_empty = remove_incomplete_existing(path_prefix='valid', 236 | expect_files=5000) 237 | if is_empty: 238 | for data, label in zip(batch_x[45000:], batch_y[45000:]): 239 | cnt += 1 240 | outpath = os.path.join(path, 'mnist_valid/%d/%05d.jpg' % 241 | (label, cnt)) 242 | scipy.misc.imsave(outpath, (data*255).reshape(28, 28)) 243 | 244 | is_empty = remove_incomplete_existing(path_prefix='test', 245 | expect_files=10000) 246 | if is_empty: 247 | batch_x, batch_y = mnist.test.next_batch(10000) 248 | cnt = -1 249 | for data, label in zip(batch_x, batch_y): 250 | cnt += 1 251 | outpath = os.path.join(path, 'mnist_test/%d/%05d.jpg' % (label, cnt)) 252 | scipy.misc.imsave(outpath, (data*255).reshape(28, 28)) 253 | -------------------------------------------------------------------------------- /tensorflow1_ipynb/metric/siamese-1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n", 8 | "- Author: Sebastian Raschka\n", 9 | "- GitHub Repository: https://github.com/rasbt/deeplearning-models" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Sebastian Raschka \n", 22 | "\n", 23 | "CPython 3.6.1\n", 24 | "IPython 6.0.0\n", 25 | "\n", 26 | "tensorflow 1.2.0\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "%load_ext watermark\n", 32 | "%watermark -a 'Sebastian Raschka' -v -p tensorflow" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "# Model Zoo -- Siamese Network with Multilayer Perceptrons" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": { 46 | "scrolled": true 47 | }, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "Extracting ./train-images-idx3-ubyte.gz\n", 54 | "Extracting ./train-labels-idx1-ubyte.gz\n", 55 | "Extracting ./t10k-images-idx3-ubyte.gz\n", 56 | "Extracting ./t10k-labels-idx1-ubyte.gz\n", 57 | "Initializing variables:\n", 58 | "\n", 59 | "\n", 60 | "\n", 61 | "\n", 62 | "\n", 63 | "\n", 64 | "Epoch: 001 | AvgCost: 0.472\n", 65 | "Epoch: 002 | AvgCost: 0.258\n", 66 | "Epoch: 003 | AvgCost: 0.250\n", 67 | "Epoch: 004 | AvgCost: 0.250\n", 68 | "Epoch: 005 | AvgCost: 0.250\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "import numpy as np\n", 74 | "import tensorflow as tf\n", 75 | "from tensorflow.examples.tutorials.mnist import input_data\n", 76 | "\n", 77 | "\n", 78 | "##########################\n", 79 | "### SETTINGS\n", 80 | "##########################\n", 81 | "\n", 82 | "# General settings\n", 83 | "\n", 84 | "random_seed = 0\n", 85 | "\n", 86 | "# Hyperparameters\n", 87 | "learning_rate = 0.001\n", 88 | "training_epochs = 5\n", 89 | "batch_size = 100\n", 90 | "margin = 1.0\n", 91 | "\n", 92 | "# Architecture\n", 93 | "n_hidden_1 = 256\n", 94 | "n_hidden_2 = 256\n", 95 | "n_input = 784\n", 96 | "n_classes = 1 # for 'true' and 'false' matches\n", 97 | "\n", 98 | "\n", 99 | "def fully_connected(inputs, output_nodes, activation=None, seed=None):\n", 100 | "\n", 101 | " input_nodes = inputs.get_shape().as_list()[1]\n", 102 | " weights = tf.get_variable(name='weights', \n", 103 | " shape=(input_nodes, output_nodes),\n", 104 | " initializer=tf.truncated_normal_initializer(\n", 105 | " mean=0.0,\n", 106 | " stddev=0.001,\n", 107 | " dtype=tf.float32,\n", 108 | " seed=seed))\n", 109 | "\n", 110 | " biases = tf.get_variable(name='biases', \n", 111 | " shape=(output_nodes,),\n", 112 | " initializer=tf.constant_initializer(\n", 113 | " value=0.0, \n", 114 | " dtype=tf.float32))\n", 115 | " \n", 116 | " act = tf.matmul(inputs, weights) + biases\n", 117 | " if activation is not None:\n", 118 | " act = activation(act)\n", 119 | " return act\n", 120 | "\n", 121 | "\n", 122 | "def euclidean_distance(x_1, x_2):\n", 123 | " return tf.sqrt(tf.maximum(tf.sum(\n", 124 | " tf.square(x - y), axis=1, keepdims=True), 1e-06))\n", 125 | "\n", 126 | "def contrastive_loss(x_1, x_2, margin=1.0):\n", 127 | " return (x_1 * tf.square(x_2) +\n", 128 | " (1.0 - x_1) * tf.square(tf.maximum(margin - x_2, 0.)))\n", 129 | "\n", 130 | "\n", 131 | "##########################\n", 132 | "### GRAPH DEFINITION\n", 133 | "##########################\n", 134 | "\n", 135 | "g = tf.Graph()\n", 136 | "with g.as_default():\n", 137 | " \n", 138 | " tf.set_random_seed(random_seed)\n", 139 | "\n", 140 | " # Input data\n", 141 | " tf_x_1 = tf.placeholder(tf.float32, [None, n_input], name='inputs_1')\n", 142 | " tf_x_2 = tf.placeholder(tf.float32, [None, n_input], name='inputs_2')\n", 143 | " tf_y = tf.placeholder(tf.float32, [None], \n", 144 | " name='targets') # here: 'true' or 'false' valuess\n", 145 | "\n", 146 | " # Siamese Network\n", 147 | " def build_mlp(inputs):\n", 148 | " with tf.variable_scope('fc_1'):\n", 149 | " layer_1 = fully_connected(inputs, n_hidden_1, \n", 150 | " activation=tf.nn.relu)\n", 151 | " with tf.variable_scope('fc_2'):\n", 152 | " layer_2 = fully_connected(layer_1, n_hidden_2, \n", 153 | " activation=tf.nn.relu)\n", 154 | " with tf.variable_scope('fc_3'):\n", 155 | " out_layer = fully_connected(layer_2, n_classes, \n", 156 | " activation=tf.nn.relu)\n", 157 | "\n", 158 | " return out_layer\n", 159 | " \n", 160 | " \n", 161 | " with tf.variable_scope('siamese_net', reuse=False):\n", 162 | " pred_left = build_mlp(tf_x_1)\n", 163 | " with tf.variable_scope('siamese_net', reuse=True):\n", 164 | " pred_right = build_mlp(tf_x_2)\n", 165 | " \n", 166 | " # Loss and optimizer\n", 167 | " loss = contrastive_loss(pred_left, pred_right)\n", 168 | " cost = tf.reduce_mean(loss, name='cost')\n", 169 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", 170 | " train = optimizer.minimize(cost, name='train')\n", 171 | " \n", 172 | "##########################\n", 173 | "### TRAINING & EVALUATION\n", 174 | "##########################\n", 175 | "\n", 176 | "np.random.seed(random_seed) # set seed for mnist shuffling\n", 177 | "mnist = input_data.read_data_sets(\"./\", one_hot=False)\n", 178 | "\n", 179 | "with tf.Session(graph=g) as sess:\n", 180 | " \n", 181 | " print('Initializing variables:')\n", 182 | " sess.run(tf.global_variables_initializer())\n", 183 | " for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,\n", 184 | " scope='siamese_net'):\n", 185 | " print(i)\n", 186 | "\n", 187 | " for epoch in range(training_epochs):\n", 188 | " avg_cost = 0.\n", 189 | " \n", 190 | " total_batch = mnist.train.num_examples // batch_size // 2\n", 191 | "\n", 192 | " for i in range(total_batch):\n", 193 | " \n", 194 | " batch_x_1, batch_y_1 = mnist.train.next_batch(batch_size)\n", 195 | " batch_x_2, batch_y_2 = mnist.train.next_batch(batch_size)\n", 196 | " batch_y = (batch_y_1 == batch_y_2).astype('float32')\n", 197 | " \n", 198 | " _, c = sess.run(['train', 'cost:0'], feed_dict={'inputs_1:0': batch_x_1,\n", 199 | " 'inputs_2:0': batch_x_2,\n", 200 | " 'targets:0': batch_y})\n", 201 | " avg_cost += c\n", 202 | "\n", 203 | " print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)))" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "- Todo: add embedding visualization" 211 | ] 212 | } 213 | ], 214 | "metadata": { 215 | "kernelspec": { 216 | "display_name": "Python 3", 217 | "language": "python", 218 | "name": "python3" 219 | }, 220 | "language_info": { 221 | "codemirror_mode": { 222 | "name": "ipython", 223 | "version": 3 224 | }, 225 | "file_extension": ".py", 226 | "mimetype": "text/x-python", 227 | "name": "python", 228 | "nbconvert_exporter": "python", 229 | "pygments_lexer": "ipython3", 230 | "version": "3.7.1" 231 | } 232 | }, 233 | "nbformat": 4, 234 | "nbformat_minor": 2 235 | } 236 | -------------------------------------------------------------------------------- /tensorflow1_ipynb/mlp/mlp-basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n", 8 | "- Author: Sebastian Raschka\n", 9 | "- GitHub Repository: https://github.com/rasbt/deeplearning-models" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Sebastian Raschka \n", 22 | "\n", 23 | "CPython 3.6.0\n", 24 | "IPython 6.0.0\n", 25 | "\n", 26 | "tensorflow 1.1.0\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "%load_ext watermark\n", 32 | "%watermark -a 'Sebastian Raschka' -v -p tensorflow" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "# Model Zoo -- Multilayer Perceptron" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "### Low-level Implementation" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "Extracting ./train-images-idx3-ubyte.gz\n", 59 | "Extracting ./train-labels-idx1-ubyte.gz\n", 60 | "Extracting ./t10k-images-idx3-ubyte.gz\n", 61 | "Extracting ./t10k-labels-idx1-ubyte.gz\n", 62 | "Epoch: 001 | AvgCost: 0.349 | Train/Valid ACC: 0.945/0.944\n", 63 | "Epoch: 002 | AvgCost: 0.164 | Train/Valid ACC: 0.962/0.961\n", 64 | "Epoch: 003 | AvgCost: 0.118 | Train/Valid ACC: 0.973/0.969\n", 65 | "Epoch: 004 | AvgCost: 0.092 | Train/Valid ACC: 0.979/0.971\n", 66 | "Epoch: 005 | AvgCost: 0.075 | Train/Valid ACC: 0.983/0.974\n", 67 | "Epoch: 006 | AvgCost: 0.061 | Train/Valid ACC: 0.985/0.976\n", 68 | "Epoch: 007 | AvgCost: 0.052 | Train/Valid ACC: 0.988/0.976\n", 69 | "Epoch: 008 | AvgCost: 0.043 | Train/Valid ACC: 0.991/0.978\n", 70 | "Epoch: 009 | AvgCost: 0.037 | Train/Valid ACC: 0.993/0.980\n", 71 | "Epoch: 010 | AvgCost: 0.030 | Train/Valid ACC: 0.994/0.979\n", 72 | "Test ACC: 0.975\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "import tensorflow as tf\n", 78 | "from tensorflow.examples.tutorials.mnist import input_data\n", 79 | "\n", 80 | "\n", 81 | "##########################\n", 82 | "### DATASET\n", 83 | "##########################\n", 84 | "\n", 85 | "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n", 86 | "\n", 87 | "\n", 88 | "##########################\n", 89 | "### SETTINGS\n", 90 | "##########################\n", 91 | "\n", 92 | "# Hyperparameters\n", 93 | "learning_rate = 0.1\n", 94 | "training_epochs = 10\n", 95 | "batch_size = 64\n", 96 | "\n", 97 | "# Architecture\n", 98 | "n_hidden_1 = 128\n", 99 | "n_hidden_2 = 256\n", 100 | "n_input = 784\n", 101 | "n_classes = 10\n", 102 | "\n", 103 | "\n", 104 | "##########################\n", 105 | "### GRAPH DEFINITION\n", 106 | "##########################\n", 107 | "\n", 108 | "g = tf.Graph()\n", 109 | "with g.as_default():\n", 110 | "\n", 111 | " # Input data\n", 112 | " tf_x = tf.placeholder(tf.float32, [None, n_input], name='features')\n", 113 | " tf_y = tf.placeholder(tf.float32, [None, n_classes], name='targets')\n", 114 | "\n", 115 | " # Model parameters\n", 116 | " weights = {\n", 117 | " 'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden_1], stddev=0.1)),\n", 118 | " 'h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2], stddev=0.1)),\n", 119 | " 'out': tf.Variable(tf.truncated_normal([n_hidden_2, n_classes], stddev=0.1))\n", 120 | " }\n", 121 | " biases = {\n", 122 | " 'b1': tf.Variable(tf.zeros([n_hidden_1])),\n", 123 | " 'b2': tf.Variable(tf.zeros([n_hidden_2])),\n", 124 | " 'out': tf.Variable(tf.zeros([n_classes]))\n", 125 | " }\n", 126 | "\n", 127 | " # Multilayer perceptron\n", 128 | " layer_1 = tf.add(tf.matmul(tf_x, weights['h1']), biases['b1'])\n", 129 | " layer_1 = tf.nn.relu(layer_1)\n", 130 | " layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])\n", 131 | " layer_2 = tf.nn.relu(layer_2)\n", 132 | " out_layer = tf.matmul(layer_2, weights['out']) + biases['out']\n", 133 | "\n", 134 | " # Loss and optimizer\n", 135 | " loss = tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=tf_y)\n", 136 | " cost = tf.reduce_mean(loss, name='cost')\n", 137 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", 138 | " train = optimizer.minimize(cost, name='train')\n", 139 | "\n", 140 | " # Prediction\n", 141 | " correct_prediction = tf.equal(tf.argmax(tf_y, 1), tf.argmax(out_layer, 1))\n", 142 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')\n", 143 | "\n", 144 | " \n", 145 | "##########################\n", 146 | "### TRAINING & EVALUATION\n", 147 | "##########################\n", 148 | "\n", 149 | "with tf.Session(graph=g) as sess:\n", 150 | " sess.run(tf.global_variables_initializer())\n", 151 | "\n", 152 | " for epoch in range(training_epochs):\n", 153 | " avg_cost = 0.\n", 154 | " total_batch = mnist.train.num_examples // batch_size\n", 155 | "\n", 156 | " for i in range(total_batch):\n", 157 | " batch_x, batch_y = mnist.train.next_batch(batch_size)\n", 158 | " _, c = sess.run(['train', 'cost:0'], feed_dict={'features:0': batch_x,\n", 159 | " 'targets:0': batch_y})\n", 160 | " avg_cost += c\n", 161 | " \n", 162 | " train_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.train.images,\n", 163 | " 'targets:0': mnist.train.labels})\n", 164 | " valid_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.validation.images,\n", 165 | " 'targets:0': mnist.validation.labels}) \n", 166 | " \n", 167 | " print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)), end=\"\")\n", 168 | " print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n", 169 | " \n", 170 | " test_acc = sess.run(accuracy, feed_dict={'features:0': mnist.test.images,\n", 171 | " 'targets:0': mnist.test.labels})\n", 172 | " print('Test ACC: %.3f' % test_acc)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "### tensorflow.layers Abstraction" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 3, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "Extracting ./train-images-idx3-ubyte.gz\n", 192 | "Extracting ./train-labels-idx1-ubyte.gz\n", 193 | "Extracting ./t10k-images-idx3-ubyte.gz\n", 194 | "Extracting ./t10k-labels-idx1-ubyte.gz\n", 195 | "Epoch: 001 | AvgCost: 0.344 | Train/Valid ACC: 0.946/0.946\n", 196 | "Epoch: 002 | AvgCost: 0.159 | Train/Valid ACC: 0.965/0.965\n", 197 | "Epoch: 003 | AvgCost: 0.115 | Train/Valid ACC: 0.973/0.969\n", 198 | "Epoch: 004 | AvgCost: 0.090 | Train/Valid ACC: 0.979/0.973\n", 199 | "Epoch: 005 | AvgCost: 0.073 | Train/Valid ACC: 0.978/0.971\n", 200 | "Epoch: 006 | AvgCost: 0.062 | Train/Valid ACC: 0.985/0.975\n", 201 | "Epoch: 007 | AvgCost: 0.051 | Train/Valid ACC: 0.990/0.977\n", 202 | "Epoch: 008 | AvgCost: 0.043 | Train/Valid ACC: 0.992/0.979\n", 203 | "Epoch: 009 | AvgCost: 0.036 | Train/Valid ACC: 0.993/0.978\n", 204 | "Epoch: 010 | AvgCost: 0.030 | Train/Valid ACC: 0.991/0.975\n", 205 | "Test ACC: 0.975\n" 206 | ] 207 | } 208 | ], 209 | "source": [ 210 | "import tensorflow as tf\n", 211 | "from tensorflow.examples.tutorials.mnist import input_data\n", 212 | "\n", 213 | "\n", 214 | "##########################\n", 215 | "### DATASET\n", 216 | "##########################\n", 217 | "\n", 218 | "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n", 219 | "\n", 220 | "\n", 221 | "##########################\n", 222 | "### SETTINGS\n", 223 | "##########################\n", 224 | "\n", 225 | "# Hyperparameters\n", 226 | "learning_rate = 0.1\n", 227 | "training_epochs = 10\n", 228 | "batch_size = 64\n", 229 | "\n", 230 | "# Architecture\n", 231 | "n_hidden_1 = 128\n", 232 | "n_hidden_2 = 256\n", 233 | "n_input = 784\n", 234 | "n_classes = 10\n", 235 | "\n", 236 | "\n", 237 | "##########################\n", 238 | "### GRAPH DEFINITION\n", 239 | "##########################\n", 240 | "\n", 241 | "g = tf.Graph()\n", 242 | "with g.as_default():\n", 243 | "\n", 244 | " # Input data\n", 245 | " tf_x = tf.placeholder(tf.float32, [None, n_input], name='features')\n", 246 | " tf_y = tf.placeholder(tf.float32, [None, n_classes], name='targets')\n", 247 | "\n", 248 | " # Multilayer perceptron\n", 249 | " layer_1 = tf.layers.dense(tf_x, n_hidden_1, activation=tf.nn.relu, \n", 250 | " kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))\n", 251 | " layer_2 = tf.layers.dense(layer_1, n_hidden_2, activation=tf.nn.relu,\n", 252 | " kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))\n", 253 | " out_layer = tf.layers.dense(layer_2, n_classes, activation=None)\n", 254 | "\n", 255 | " # Loss and optimizer\n", 256 | " loss = tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=tf_y)\n", 257 | " cost = tf.reduce_mean(loss, name='cost')\n", 258 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", 259 | " train = optimizer.minimize(cost, name='train')\n", 260 | "\n", 261 | " # Prediction\n", 262 | " correct_prediction = tf.equal(tf.argmax(tf_y, 1), tf.argmax(out_layer, 1))\n", 263 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')\n", 264 | "\n", 265 | "\n", 266 | "##########################\n", 267 | "### TRAINING & EVALUATION\n", 268 | "##########################\n", 269 | " \n", 270 | "with tf.Session(graph=g) as sess:\n", 271 | " sess.run(tf.global_variables_initializer())\n", 272 | "\n", 273 | " for epoch in range(training_epochs):\n", 274 | " avg_cost = 0.\n", 275 | " total_batch = mnist.train.num_examples // batch_size\n", 276 | "\n", 277 | " for i in range(total_batch):\n", 278 | " batch_x, batch_y = mnist.train.next_batch(batch_size)\n", 279 | " _, c = sess.run(['train', 'cost:0'], feed_dict={'features:0': batch_x,\n", 280 | " 'targets:0': batch_y})\n", 281 | " avg_cost += c\n", 282 | " \n", 283 | " train_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.train.images,\n", 284 | " 'targets:0': mnist.train.labels})\n", 285 | " valid_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.validation.images,\n", 286 | " 'targets:0': mnist.validation.labels}) \n", 287 | " \n", 288 | " print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)), end=\"\")\n", 289 | " print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n", 290 | " \n", 291 | " test_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.test.images,\n", 292 | " 'targets:0': mnist.test.labels})\n", 293 | " print('Test ACC: %.3f' % test_acc)" 294 | ] 295 | } 296 | ], 297 | "metadata": { 298 | "kernelspec": { 299 | "display_name": "Python 3", 300 | "language": "python", 301 | "name": "python3" 302 | }, 303 | "language_info": { 304 | "codemirror_mode": { 305 | "name": "ipython", 306 | "version": 3 307 | }, 308 | "file_extension": ".py", 309 | "mimetype": "text/x-python", 310 | "name": "python", 311 | "nbconvert_exporter": "python", 312 | "pygments_lexer": "ipython3", 313 | "version": "3.7.1" 314 | } 315 | }, 316 | "nbformat": 4, 317 | "nbformat_minor": 2 318 | } 319 | -------------------------------------------------------------------------------- /tensorflow1_ipynb/mlp/mlp-batchnorm.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n", 8 | "- Author: Sebastian Raschka\n", 9 | "- GitHub Repository: https://github.com/rasbt/deeplearning-models" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Sebastian Raschka \n", 22 | "\n", 23 | "CPython 3.6.1\n", 24 | "IPython 6.0.0\n", 25 | "\n", 26 | "tensorflow 1.2.0\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "%load_ext watermark\n", 32 | "%watermark -a 'Sebastian Raschka' -v -p tensorflow" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "# Model Zoo -- Multilayer Perceptron with Batch Normalization" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "Extracting ./train-images-idx3-ubyte.gz\n", 52 | "Extracting ./train-labels-idx1-ubyte.gz\n", 53 | "Extracting ./t10k-images-idx3-ubyte.gz\n", 54 | "Extracting ./t10k-labels-idx1-ubyte.gz\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "import tensorflow as tf\n", 60 | "from tensorflow.examples.tutorials.mnist import input_data\n", 61 | "\n", 62 | "\n", 63 | "##########################\n", 64 | "### DATASET\n", 65 | "##########################\n", 66 | "\n", 67 | "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n", 68 | "\n", 69 | "\n", 70 | "##########################\n", 71 | "### SETTINGS\n", 72 | "##########################\n", 73 | "\n", 74 | "# Hyperparameters\n", 75 | "learning_rate = 0.1\n", 76 | "training_epochs = 10\n", 77 | "batch_size = 64\n", 78 | "\n", 79 | "# Architecture\n", 80 | "n_hidden_1 = 128\n", 81 | "n_hidden_2 = 256\n", 82 | "n_input = 784\n", 83 | "n_classes = 10\n", 84 | "\n", 85 | "# Other\n", 86 | "random_seed = 123\n", 87 | "\n", 88 | "\n", 89 | "##########################\n", 90 | "### GRAPH DEFINITION\n", 91 | "##########################\n", 92 | "\n", 93 | "g = tf.Graph()\n", 94 | "with g.as_default():\n", 95 | " \n", 96 | " tf.set_random_seed(random_seed)\n", 97 | " \n", 98 | " # Batchnorm settings\n", 99 | " training_phase = tf.placeholder(tf.bool, None, name='training_phase')\n", 100 | "\n", 101 | " # Input data\n", 102 | " tf_x = tf.placeholder(tf.float32, [None, n_input], name='features')\n", 103 | " tf_y = tf.placeholder(tf.float32, [None, n_classes], name='targets')\n", 104 | "\n", 105 | " # Multilayer perceptron\n", 106 | " layer_1 = tf.layers.dense(tf_x, n_hidden_1, \n", 107 | " activation=None, # Batchnorm comes before nonlinear activation\n", 108 | " use_bias=False, # Note that no bias unit is used in batchnorm\n", 109 | " kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))\n", 110 | " \n", 111 | " layer_1 = tf.layers.batch_normalization(layer_1, training=training_phase)\n", 112 | " layer_1 = tf.nn.relu(layer_1)\n", 113 | " \n", 114 | " layer_2 = tf.layers.dense(layer_1, n_hidden_2, \n", 115 | " activation=None,\n", 116 | " use_bias=False,\n", 117 | " kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))\n", 118 | " layer_2 = tf.layers.batch_normalization(layer_2, training=training_phase)\n", 119 | " layer_2 = tf.nn.relu(layer_2)\n", 120 | " \n", 121 | " out_layer = tf.layers.dense(layer_2, n_classes, activation=None, name='logits')\n", 122 | "\n", 123 | " # Loss and optimizer\n", 124 | " loss = tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=tf_y)\n", 125 | " cost = tf.reduce_mean(loss, name='cost')\n", 126 | " \n", 127 | " # control dependency to ensure that batchnorm parameters are also updated\n", 128 | " with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):\n", 129 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", 130 | " train = optimizer.minimize(cost, name='train')\n", 131 | "\n", 132 | " # Prediction\n", 133 | " correct_prediction = tf.equal(tf.argmax(tf_y, 1), tf.argmax(out_layer, 1))\n", 134 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 3, 140 | "metadata": {}, 141 | "outputs": [ 142 | { 143 | "name": "stdout", 144 | "output_type": "stream", 145 | "text": [ 146 | "Epoch: 001 | AvgCost: 0.280 | Train/Valid ACC: 0.962/0.960\n", 147 | "Epoch: 002 | AvgCost: 0.131 | Train/Valid ACC: 0.978/0.972\n", 148 | "Epoch: 003 | AvgCost: 0.095 | Train/Valid ACC: 0.984/0.973\n", 149 | "Epoch: 004 | AvgCost: 0.074 | Train/Valid ACC: 0.988/0.976\n", 150 | "Epoch: 005 | AvgCost: 0.059 | Train/Valid ACC: 0.992/0.980\n", 151 | "Epoch: 006 | AvgCost: 0.049 | Train/Valid ACC: 0.995/0.980\n", 152 | "Epoch: 007 | AvgCost: 0.039 | Train/Valid ACC: 0.996/0.979\n", 153 | "Epoch: 008 | AvgCost: 0.033 | Train/Valid ACC: 0.997/0.981\n", 154 | "Epoch: 009 | AvgCost: 0.030 | Train/Valid ACC: 0.997/0.977\n", 155 | "Epoch: 010 | AvgCost: 0.024 | Train/Valid ACC: 0.998/0.979\n", 156 | "Test ACC: 0.977\n" 157 | ] 158 | } 159 | ], 160 | "source": [ 161 | "import numpy as np\n", 162 | "\n", 163 | "##########################\n", 164 | "### TRAINING & EVALUATION\n", 165 | "##########################\n", 166 | " \n", 167 | "with tf.Session(graph=g) as sess:\n", 168 | " sess.run(tf.global_variables_initializer())\n", 169 | "\n", 170 | " np.random.seed(random_seed) # random seed for mnist iterator\n", 171 | " for epoch in range(training_epochs):\n", 172 | " avg_cost = 0.\n", 173 | " total_batch = mnist.train.num_examples // batch_size\n", 174 | "\n", 175 | " for i in range(total_batch):\n", 176 | " batch_x, batch_y = mnist.train.next_batch(batch_size)\n", 177 | " _, c = sess.run(['train', 'cost:0'], feed_dict={'features:0': batch_x,\n", 178 | " 'targets:0': batch_y,\n", 179 | " 'training_phase:0': True})\n", 180 | " avg_cost += c\n", 181 | " \n", 182 | " train_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.train.images,\n", 183 | " 'targets:0': mnist.train.labels,\n", 184 | " 'training_phase:0': False})\n", 185 | " valid_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.validation.images,\n", 186 | " 'targets:0': mnist.validation.labels,\n", 187 | " 'training_phase:0': False}) \n", 188 | " \n", 189 | " print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)), end=\"\")\n", 190 | " print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n", 191 | " \n", 192 | " test_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.test.images,\n", 193 | " 'targets:0': mnist.test.labels,\n", 194 | " 'training_phase:0': False})\n", 195 | " print('Test ACC: %.3f' % test_acc)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": { 202 | "collapsed": true 203 | }, 204 | "outputs": [], 205 | "source": [] 206 | } 207 | ], 208 | "metadata": { 209 | "kernelspec": { 210 | "display_name": "Python 3", 211 | "language": "python", 212 | "name": "python3" 213 | }, 214 | "language_info": { 215 | "codemirror_mode": { 216 | "name": "ipython", 217 | "version": 3 218 | }, 219 | "file_extension": ".py", 220 | "mimetype": "text/x-python", 221 | "name": "python", 222 | "nbconvert_exporter": "python", 223 | "pygments_lexer": "ipython3", 224 | "version": "3.7.1" 225 | } 226 | }, 227 | "nbformat": 4, 228 | "nbformat_minor": 2 229 | } 230 | --------------------------------------------------------------------------------