├── Readme.md ├── .gitignore ├── jupyter_hello.py ├── jupyter_hello.ipynb ├── linux ├── run-nvidia-docker ├── install-docker └── install-nvidia-docker ├── docker-compose.yml ├── mac └── install-docker ├── 1-Assignment.py ├── 1-5-torchvision.py ├── Dockerfile ├── 1-4-gpu.py ├── 2-1-perceptron.py ├── 2-2.py ├── 2-4.py ├── 3-1.py ├── 1-3.py ├── 2-3.py ├── 2-Assignment.py ├── 1-4-gradient.py ├── 4-3.py ├── 1-5-datasets.py ├── 4-Assignment.py ├── 4-5.py ├── 2-5.py ├── 4-4.py ├── readme.md ├── Iris.csv ├── 3-Assignment.py ├── 3-2_3.py ├── 3-4_5.py └── Admission_Predict.csv /Readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | .vscode/ 3 | -------------------------------------------------------------------------------- /jupyter_hello.py: -------------------------------------------------------------------------------- 1 | #%% 2 | import torch 3 | torch.__version__ -------------------------------------------------------------------------------- /jupyter_hello.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /linux/run-nvidia-docker: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | docker build --tag pytorch-gpu . 4 | docker run --runtime nvidia -p 8888:8888 --volume $(pwd):/src pytorch-gpu 5 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | notebook: 4 | build: ./ 5 | image: anaconda-pytorch-notebook 6 | volumes: 7 | - ./:/src 8 | ports: 9 | - "8888:8888" 10 | -------------------------------------------------------------------------------- /mac/install-docker: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # homebrew if you don't already have it 4 | which brew || ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" 5 | 6 | # docker goes on as a cask, it's a 'mac app bundle' 7 | brew cask install docker 8 | brew install docker-compose -------------------------------------------------------------------------------- /linux/install-docker: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # plain docker 5 | sudo apt install apt-transport-https ca-certificates curl software-properties-common 6 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 7 | # Add the package repositories 8 | sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" 9 | sudo apt update 10 | # You may need to pin a specific version with docker-ce=... if nvidia docker is lagging the current docker release 11 | sudo apt install docker-ce docker-compose 12 | sudo docker run hello-world 13 | 14 | 15 | -------------------------------------------------------------------------------- /linux/install-nvidia-docker: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # make sure to have installed docker first 5 | # and note, nvidia docker is tightly paired to docker versions 6 | # so you may need to remove and reinstall a specific version of docker 7 | 8 | # nvidia docker 9 | # Add the package repositories 10 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | \ 11 | sudo apt-key add - 12 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID) 13 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \ 14 | sudo tee /etc/apt/sources.list.d/nvidia-docker.list 15 | sudo apt update 16 | 17 | # Install nvidia-docker2 and reload the Docker daemon configuration 18 | sudo apt install -y nvidia-docker2 19 | sudo pkill -SIGHUP dockerd 20 | 21 | # Test nvidia-smi with the latest official CUDA image 22 | sudo docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi 23 | 24 | 25 | -------------------------------------------------------------------------------- /1-Assignment.py: -------------------------------------------------------------------------------- 1 | #%% 2 | import torchvision 3 | import torch 4 | 5 | # get some data -- don't forget to download it 6 | mnist = torchvision.datasets.MNIST('./var', 7 | download=True, 8 | transform=torchvision.transforms.ToTensor()) 9 | 10 | mnist[0] 11 | 12 | #%% 13 | 14 | # each batch, let's make a tensor of batch averages 15 | batches = torch.utils.data.DataLoader(mnist, 16 | batch_size=32) 17 | 18 | batch_averages = torch.Tensor([ 19 | batch[0].mean() for batch in batches 20 | ]) 21 | 22 | #%% 23 | # and there we have it 24 | batch_averages.mean() 25 | 26 | #%% now just for kicks -- let's compute the average a bit by hand 27 | # notice that the overall average is different than the batch-wise 28 | # average -- this is normal something to think about when 29 | # maching learning with batch training 30 | all_images = torch.cat([ 31 | image for image, label in mnist 32 | ]) 33 | 34 | all_images.shape, all_images.mean() -------------------------------------------------------------------------------- /1-5-torchvision.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # there are also a lot of predefined datasets in torchvision 3 | import torch 4 | import torchvision 5 | import matplotlib.pyplot as plt 6 | dir(torchvision.datasets) 7 | 8 | #%% 9 | # let's take a look at some of this data 10 | # really handy built in download! 11 | cifar = torchvision.datasets.CIFAR10('./var', download=True) 12 | cifar[0] 13 | 14 | #%% 15 | # looks like this is an image 16 | fig = plt.figure(figsize=(1,1)) 17 | sub = fig.add_subplot(111) 18 | sub.imshow(cifar[0][0]) 19 | 20 | #%% 21 | # how, that's a frog -- but -- we need a tensor of a 22 | # frog -- so that's where transforms come in 23 | # transforms are built in to torchvision and are 24 | # objects that implement __call__ can change the data 25 | from torchvision import transforms 26 | pipeline = transforms.Compose([ 27 | transforms.ToTensor() 28 | ]) 29 | cifar_tr = torchvision.datasets.CIFAR10('./var', transform=pipeline) 30 | 31 | #%% 32 | cifar_tr[0] -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 2 | 3 | 4 | ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 5 | ENV PATH /opt/conda/bin:$PATH 6 | 7 | RUN apt-get update --fix-missing && apt-get install -y wget bzip2 ca-certificates \ 8 | libglib2.0-0 libxext6 libsm6 libxrender1 \ 9 | git mercurial subversion 10 | 11 | RUN wget --quiet https://repo.anaconda.com/archive/Anaconda3-5.3.0-Linux-x86_64.sh -O ~/anaconda.sh && \ 12 | /bin/bash ~/anaconda.sh -b -p /opt/conda && \ 13 | rm ~/anaconda.sh && \ 14 | ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ 15 | echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ 16 | conda install pytorch torchvision cuda100 -c pytorch && \ 17 | echo "conda activate base" >> ~/.bashrc 18 | 19 | #all the code samples for the video series 20 | VOLUME ["/src"] 21 | 22 | #serve up a jupyter notebook 23 | WORKDIR /src 24 | EXPOSE 8888 25 | 26 | #this has security disabled which is less fuss for learning purposes 27 | CMD jupyter notebook --port=8888 --ip=0.0.0.0 --allow-root --NotebookApp.token='' --NotebookApp.disable_check_xsrf=True 28 | -------------------------------------------------------------------------------- /1-4-gpu.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # You'll need a GPU to get this to run! 3 | # `./linux/install-docker` 4 | # `./linux/install-nvidia-docker` 5 | # `./linux/run-nvidia-docker` 6 | # get your notebook server started 7 | 8 | # now let's talk about devices 9 | import torch 10 | cpu = torch.device('cpu') 11 | gpu = torch.device('cuda') 12 | cpu, gpu 13 | 14 | #%% 15 | # when you allocate a tensor, it's on a device, in the contexgt 16 | # of that device, if you don't specify, it's on the CPU 17 | x = torch.tensor([1.5]) 18 | x, x.device 19 | 20 | #%% 21 | # you can explicitly place, which is how I do it in general 22 | y = torch.tensor([2.5], device=cpu) 23 | y, y.device 24 | 25 | #%% 26 | # and now -- GPU 27 | z = torch.tensor([3.5], device=gpu) 28 | z, z.device 29 | 30 | #%% 31 | # you cannot mix devices, this is the important thing to remember 32 | # particularly when loading up data -- make sure you put things 33 | # together on a device! 34 | x + y + z 35 | 36 | #%% 37 | # but you can move things around to work on the gpu 38 | a = x.to(gpu) + y.to(gpu) + z 39 | a 40 | 41 | #%% 42 | # and you can move things back to the CPU 43 | b = a.to(cpu) 44 | b, b.device -------------------------------------------------------------------------------- /2-1-perceptron.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # Here we can use network x to create a -- network. This isn't 3 | # exactly how a neural network works in practice, but it is a 4 | # great way to create a visualization you can modify in code 5 | 6 | #%% 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | import networkx as nx 10 | import math 11 | 12 | #%% 13 | # Building a graph with network X, a neural network 14 | # consists of three basic kinds of nodes 15 | # - inputs 16 | # - activations, these are the connections between all nodes 17 | # - outputs, this is how you tell what your network did 18 | 19 | #%% 20 | dense = nx.Graph() 21 | inputs = {i: (0, i) for i in range(0, 5)} 22 | activations = {i+100: (1, i) for i in range(0, 5)} 23 | outputs= {i+1000: (2, i) for i in range(0, 2)} 24 | all = {**inputs, **activations, **outputs} 25 | # and now -- fully connected, every input talks to every 26 | # activation -- this is the classic neural network 27 | for input in inputs: 28 | for activation in activations: 29 | dense.add_edge(input, activation) 30 | for activation in activations: 31 | for output in outputs: 32 | dense.add_edge(activation, output) 33 | nx.draw_networkx_nodes(dense, all, 34 | nodelist=all.keys(), node_color='b') 35 | nx.draw_networkx_edges(dense, all, edge_color='w') 36 | axes = plt.axis('off') 37 | 38 | #%% 39 | # in practice, these graphs are represented as tensors at each 40 | # layer and are connected via operations, such as a tensor 41 | # product which mathematically connects nodes via 42 | # multiplication and addition -------------------------------------------------------------------------------- /2-2.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # Starting off, we need to import torch 3 | import torch 4 | 5 | #%% 6 | # this neural network will focus just on creating the network 7 | # and not on data or data loading, so we'll keep this simple and 8 | # build up the network step by step, in this first set of code 9 | # we'll work with dummy random inputs and outputs, and connect 10 | # them in a network 11 | 12 | #%% 13 | # inputs - -this is a 'batch' of size 1, with 1 color channel -- 14 | # imagine this is greyscale, and 64x and 64y pixels 15 | 16 | #%% 17 | inputs = torch.rand(1, 1, 64, 64) 18 | inputs 19 | 20 | #%% 21 | # outputs -- pretend we are building a binary classifier, 22 | # so we'll have to output possibilites, with a batch size of 1 23 | # we'll use rand again, so each thing can be a little bit 24 | # category 0 and a little bit category 1 25 | 26 | #%% 27 | outputs = torch.rand(1, 2) 28 | outputs 29 | 30 | #%% 31 | # OK in a real model, those inputs and outputs would be your actual 32 | # data, loaded up, in datasets, converted into batches 33 | # for this simple model, it's just tensors, no data wrangling 34 | # now for a sequential network, let's do a simple multi 35 | # layer perceptron 36 | 37 | 38 | #%% 39 | # now we start up a model with layers of linear -- these will 40 | # themselves have tensors inside filled with random numbers 41 | # these random numbers are called parameters, and these 42 | # parameters are the things that machine learning learns 43 | # basically -- the parameters -- sometimes called weights 44 | # are updated by learning algorithms, searching for the best 45 | # available answer 46 | 47 | #%% 48 | model = torch.nn.Sequential( 49 | # input features are the size of one image 50 | # outputs are how many we have when done 51 | # the 64 has to 'match' the final dimnension of the input 52 | # try changing it to another number to see errors! 53 | torch.nn.Linear(64, 256), 54 | torch.nn.Linear(256, 256), 55 | torch.nn.Linear(256, 2), 56 | ) 57 | 58 | #%% 59 | # and -- this isn't learning, we're just running our random 60 | # initialized linear network over our input 61 | 62 | #%% 63 | result = model(inputs) 64 | result, result.shape 65 | 66 | #%% 67 | # hmm -- that's not two convenient output labels, we have some 68 | # more work to do in the next videos -- but we have a model! 69 | -------------------------------------------------------------------------------- /2-4.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # Starting off, we need to import torch 3 | import torch 4 | 5 | #%% 6 | # here are the inputs and outputs from the last video 7 | # as well as the model with activations 8 | 9 | #%% 10 | inputs = torch.rand(1, 1, 64, 64) 11 | outputs = torch.rand(1, 2) 12 | model = torch.nn.Sequential( 13 | torch.nn.Linear(64, 256), 14 | torch.nn.ReLU(), 15 | torch.nn.Linear(256, 256), 16 | torch.nn.ReLU(), 17 | torch.nn.Linear(256, 2), 18 | ) 19 | 20 | #%% 21 | # now we have our inputs as sample data and our outputs 22 | # we are trying to generate with our network -- the 23 | # application of a network -- it's computation to turn input 24 | # into output is done with a forward pass simply by calling 25 | # the model as a function over the inputs 26 | 27 | #%% 28 | test_results = model(inputs) 29 | test_results 30 | 31 | #%% 32 | # don't forget the loss function! the loss function is the 33 | # key driver to compute gradients, which are needed to drive 34 | # learning 35 | 36 | #%% 37 | loss = torch.nn.MSELoss()(test_results, outputs) 38 | loss 39 | 40 | 41 | #%% 42 | # now we compute the gradients, this is done for each forward 43 | # pass after you have compute the loss -- basically you are 44 | # zeroing out as the gradients will differ on each pass 45 | # once the gradients are zeroed, then you use the loss to 46 | # drive the backward propagation of gradients through the model 47 | # this is pretty much the heart of what pytorch does for you -- 48 | # automatically computing gradients and propagating them back 49 | 50 | #%% 51 | model.zero_grad() 52 | loss.backward() 53 | 54 | #%% 55 | # and now -- we'll do some very simple learning -- remember 56 | # that the gradients tell you how far away you are from the 57 | # right answer -- so you move in the opposite direction to 58 | # get to the answer, meaning -- we just subtract! 59 | # one additional concept here -- learning rate, which we'll 60 | # experiment with more in later videos, but for now we'll use 61 | # a very simple constant learning rate 62 | 63 | #%% 64 | learning_rate = 0.001 65 | for parameter in model.parameters(): 66 | parameter.data -= parameter.grad.data * learning_rate 67 | 68 | 69 | #%% 70 | # now -- we've learned -- and should be closer to the answer 71 | # let's run the model with our new updated parameters 72 | # and see... 73 | 74 | #%% 75 | after_learning = model(inputs) 76 | loss_after_learning = torch.nn.MSELoss()(after_learning, outputs) 77 | loss_after_learning 78 | 79 | #%% 80 | # yep -- that's a smaller loss, we are closer to the answer -------------------------------------------------------------------------------- /3-1.py: -------------------------------------------------------------------------------- 1 | 2 | #%% 3 | # First, we'll need to load up a dataset. Pandas is a great 4 | # tool to use to load csv data you may find, which we 5 | # will later turn into tensors. 6 | # Let's start with the Dataset 7 | 8 | #%% 9 | 10 | import torch 11 | import pandas 12 | from torch.utils.data import Dataset 13 | 14 | class MushroomDataset(Dataset): 15 | 16 | def __init__(self): 17 | '''Load up the data. 18 | ''' 19 | self.data = pandas.read_csv('./mushrooms.csv') 20 | 21 | def __len__(self): 22 | '''How much data do we have? 23 | ''' 24 | return len(self.data) 25 | 26 | def __getitem__(self, idx): 27 | '''Grab one data sample 28 | 29 | Arguments: 30 | idx {int} -- data at this position. 31 | ''' 32 | return self.data.iloc[idx][0:1] 33 | # pretty simple when we start from pandas 34 | # here is a dataset loaded, with a single sample 35 | shrooms = MushroomDataset() 36 | len(shrooms), shrooms[0] 37 | 38 | #%% 39 | # Well -- we have some clearly identifiable properties, but we 40 | # have this all in one dataset, we're going to need to separate 41 | # out the inputs from the outputs 42 | 43 | #%% 44 | class MushroomDataset(Dataset): 45 | 46 | def __init__(self): 47 | '''Load up the data. 48 | ''' 49 | self.data = pandas.read_csv('./mushrooms.csv') 50 | 51 | def __len__(self): 52 | '''How much data do we have? 53 | ''' 54 | return len(self.data) 55 | 56 | def __getitem__(self, idx): 57 | '''Grab one data sample 58 | 59 | Arguments: 60 | idx {int, tensor} -- data at this position. 61 | ''' 62 | # handle being passed a tensor as an index 63 | if type(idx) is torch.Tensor: 64 | idx = idx.item() 65 | return self.data.iloc[idx][1:], self.data.iloc[idx][0:1] 66 | 67 | shrooms = MushroomDataset() 68 | shrooms[0] 69 | 70 | #%% 71 | # One more thing to think about -- testing and training data 72 | # we need some set of data samples we don't use in training to 73 | # verify that our model can generalize -- 74 | # that it can make a classification 75 | # for an unseen sample and hasn't merely 76 | # memorized the input data 77 | 78 | #%% 79 | number_for_testing = int(len(shrooms) * 0.05) 80 | number_for_training = len(shrooms) - number_for_testing 81 | train, test = torch.utils.data.random_split(shrooms, 82 | [number_for_training, number_for_testing]) 83 | len(test), len(train) 84 | 85 | #%% 86 | test[0] 87 | -------------------------------------------------------------------------------- /1-3.py: -------------------------------------------------------------------------------- 1 | #%% 2 | import torch 3 | torch.__version__ 4 | 5 | #%% 6 | # lots of ways to create a tensor 7 | # https://pytorch.org/docs/stable/torch.html#creation-ops 8 | # a tensor is really just a multidimensional array, starting 9 | # with a simple empty array 10 | e = torch.empty(2, 2) 11 | e 12 | 13 | #%% 14 | # ok - that's strange -- there are values in that array! this 15 | # isn't a true random, it's just whatever was in memory -- if 16 | # you really want random, which is pretty often 17 | r = torch.rand(2, 2) 18 | r 19 | 20 | #%% 21 | # that's more like it and sometimes you just want specific values 22 | # like a good old zero 23 | z = torch.zeros(2, 2) 24 | z 25 | 26 | #%% 27 | # or specifc constants, let's make some threes! 28 | c = torch.full((2, 2), 3) 29 | c 30 | 31 | #%% 32 | # the most flexible is the `torch.tensor` creation method, you can 33 | # pass it data in a lot of formats -- starting with lists 34 | l = torch.tensor([[1, 2], [3, 4]]) 35 | l 36 | 37 | #%% 38 | # as well as interoperate with numpy arrays, which is very 39 | # handy to work with data you may have already processed 40 | # with other machine learning tools liek sklearn 41 | import numpy 42 | n = numpy.linspace(0, 5, 5) 43 | n 44 | 45 | #%% 46 | # turning this into pytorch is as easy as you would wish 47 | nn = torch.tensor(n) 48 | nn 49 | 50 | #%% 51 | # and back again is easy too! 52 | nn.numpy() 53 | 54 | #%% 55 | # arrays support conventional operations -- size and slice 56 | nn.shape 57 | 58 | #%% 59 | nn[1:], nn[0] 60 | 61 | #%% 62 | # in any creation method, you can also specify the data type 63 | # like using a full precision floating point 64 | s = torch.ones(3, 3, dtype=torch.float) 65 | s 66 | 67 | #%% 68 | # all kinds of math operations are available 69 | # https://pytorch.org/docs/stable/torch.html#math-operations 70 | # math is straightforward operatos for common operations 71 | # like addition 72 | eye = torch.eye(3, 3) 73 | eye + torch.zeros(3, 3) 74 | 75 | #%% 76 | # subtraction 77 | eye - torch.ones(3, 3) 78 | 79 | #%% 80 | # broadcast multiplication of a constant 81 | eye * 3 82 | 83 | #%% 84 | # or division... 85 | eye / 3 86 | 87 | #%% 88 | # element wise tensor multiplication 89 | eye * torch.full((3,3), 4) 90 | 91 | #%% 92 | # and you might not have seen this before, but a dot product 93 | # operator in python 94 | x = torch.rand(3, 4) 95 | y = torch.rand(4, 3) 96 | x @ y 97 | 98 | #%% 99 | # and handy machine learning component operations 100 | # like getting the index of the maximum value 101 | torch.tensor([1 , 2, 5, 3, 0]).argmax() -------------------------------------------------------------------------------- /2-3.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # Starting off, we need to import torch 3 | 4 | #%% 5 | import torch 6 | import matplotlib.pyplot as plt 7 | plt.style.use('ggplot') 8 | 9 | #%% 10 | # here are the inputs and outputs from the last videoA 11 | 12 | #%% 13 | inputs = torch.rand(1, 1, 64, 64) 14 | outputs = torch.rand(1, 2) 15 | 16 | #%% 17 | # here is our model from the last video -- notice everything is 18 | # linear -- this limits what out model can learn, so we need 19 | # something else -- an activation function 20 | 21 | #%% 22 | model = torch.nn.Sequential( 23 | torch.nn.Linear(64, 256), 24 | torch.nn.Linear(256, 256), 25 | torch.nn.Linear(256, 2), 26 | ) 27 | 28 | #%% 29 | # this is just about the simplest activation functional possible 30 | # the RELU 31 | # it is nonlinear in a straightforward way -- it starts out flat 32 | # and then it inflects at 0 -- two linear parts making 33 | # non linear part -- the advantage is -- it is very fast 34 | 35 | #%% 36 | x = torch.range(-1, 1, 0.1) 37 | y = torch.nn.functional.relu(x) 38 | plt.plot(x.numpy(), y.numpy()) 39 | 40 | #%% 41 | # now we can update our model with relu 42 | model = torch.nn.Sequential( 43 | torch.nn.Linear(64, 256), 44 | torch.nn.ReLU(), 45 | torch.nn.Linear(256, 256), 46 | torch.nn.ReLU(), 47 | torch.nn.Linear(256, 2), 48 | ) 49 | 50 | #%% 51 | # and the model needs feedback in order to learn, and this is the 52 | # role of the loss function -- it simply tells you how far away 53 | # from the right answer we are 54 | # you can think of -- and it's not to far off -- of machine 55 | # learning as taking a random guess, and saying 'how far wrong' 56 | # and then updating that guess -- this would be a silly strategy 57 | # as a person, but computers can guess fast 58 | # 59 | # there are a lot of choices for loss functions, a classic 60 | # one is the Mean Squared Error, this is related to the 61 | # classic distance you may have learned in school -- 62 | # A^2 + B^2 = C^2 when looking at right triangles, but 63 | # -- this is generalized into high dimension tensors 64 | # you'll hear it referred to as the L2 (because of the square) 65 | # or Euclidean distance as well 66 | 67 | #%% 68 | results = model(inputs) 69 | loss = torch.nn.MSELoss()(results, outputs) 70 | loss 71 | 72 | #%% 73 | # and finally -- the gradient -- when I said machine learning 74 | # makes a lot of guesses, there is a bit more to it - it 75 | # makes educated guesses -- that education is in the gradient 76 | # 77 | # the gradient tells the machine learning model, based on 78 | # the loss -- which direction it is away from the right 79 | # answer 80 | # and that will be the subject of our next video -------------------------------------------------------------------------------- /2-Assignment.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # starting with imports 3 | import torch 4 | 5 | #%% 6 | # here is some iteration -- lowering the number 7 | # of hidden parameters until we no longer can get the 8 | # gradiens to vanish 9 | # this is a bit of dynamic model generation, which is 10 | # a kind of meta-learning 11 | 12 | 13 | #%% 14 | # inputs and outputs, just random values -- we'll work with real 15 | # data in subsequent videos 16 | inputs = torch.rand(1, 1, 64, 64) 17 | outputs = torch.rand(1, 2) 18 | 19 | #%% 20 | # keep track of how many learning steps it took 21 | # at each number of parameters 22 | learning_steps = [] 23 | 24 | #%% 25 | for number_of_parameters in range(256, 1, -1): 26 | class Model(torch.nn.Module): 27 | def __init__(self): 28 | super().__init__() 29 | self.layer_one = torch.nn.Linear(64, number_of_parameters) 30 | self.activation_one = torch.nn.ReLU() 31 | self.layer_two = torch.nn.Linear(number_of_parameters, number_of_parameters) 32 | self.activation_two = torch.nn.ReLU() 33 | # this is a pretty big number -- because we are flattening 34 | # which turned 64 * 256 into a flat array like tensor 35 | self.shape_outputs = torch.nn.Linear(number_of_parameters * 64, 2) 36 | 37 | def forward(self, inputs): 38 | buffer = self.layer_one(inputs) 39 | buffer = self.activation_one(buffer) 40 | buffer = self.layer_two(buffer) 41 | buffer = self.activation_two(buffer) 42 | buffer = buffer.flatten(start_dim=1) 43 | return self.shape_outputs(buffer) 44 | 45 | model = Model() 46 | loss_function = torch.nn.MSELoss() 47 | optimizer = torch.optim.SGD(model.parameters(), lr=0.01) 48 | # a limit on how much learning we do 49 | for i in range(10000): 50 | # the optimizer reaches into the model and will zero out 51 | optimizer.zero_grad() 52 | results = model(inputs) 53 | loss = loss_function(results, outputs) 54 | loss.backward() 55 | optimizer.step() 56 | # now -- look for vanishing gradients 57 | gradients = 0.0 58 | for parameter in model.parameters(): 59 | gradients += parameter.grad.data.sum() 60 | if abs(gradients) <= 0.0001: 61 | learning_steps.append((number_of_parameters, i, results)) 62 | break 63 | 64 | #%% 65 | learning_steps 66 | 67 | 68 | #%% 69 | # looks like it still learns -- but it gets a lot harder when 70 | # the number of parameters converges to the number of outputs 71 | import matplotlib.pyplot as plt 72 | plt.style.use('ggplot') 73 | learning_steps = [step[1] for step in learning_steps] 74 | plt.plot(learning_steps) 75 | 76 | -------------------------------------------------------------------------------- /1-4-gradient.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # understanding gradients is a bit of math, but we'll try to keep this 3 | # simple -- bascially a numerical gradient tells you which direction 4 | # you need to move when you are machine learning -- positive or 5 | # negative -- and well as having an actual numerical value 6 | # that tells you 'how much' you should move 7 | 8 | # a machine learning loop takes the gradeients for a group 9 | # of tensor operations, and then updates the value of the tensors 10 | # that are being 'learned' using the product of the gradients 11 | # and the learning rate 12 | 13 | # so if you know a bit of math, you know a gradient is a numerical 14 | # representation of a derivative -- which means you know to ask 15 | # 'a gradient with respect to what?' -- and the answer there 16 | # is a loss function, you use gradients to figure the direction 17 | # to go to make your loss smaller 18 | 19 | # here is the simplest example from grade school algebra I could 20 | # cook up -- before you knew algebra -- this was a bit of a 21 | # mystery how to solve it, and I know I personally tried -- 22 | # just plain guessing the numbers to 'solve' equations -- 23 | # machine learning is a bit like that, but instead of just plain 24 | # guessing, we use the gradient to figure how far off, and what 25 | # our next guess should be --= OK 26 | 27 | #%% 28 | import torch 29 | 30 | # X + 1 = 3 -- that's our little bit of algebra 31 | 32 | # here is our random initial guess 33 | X = torch.rand(1, requires_grad=True) 34 | # and our formula 35 | Y = X + 1.0 36 | Y 37 | 38 | #%% 39 | # now, our loss is -- how far are we off from 3? 40 | def mse(Y): 41 | diff = 3.0 - Y 42 | return (diff * diff).sum() / 2 43 | 44 | #%% 45 | # the gradient on our X -- that tells us which direction 46 | # we are 'off' from the right answer -- let's look when we are too high 47 | loss = mse(Y) 48 | loss.backward() 49 | X.grad 50 | 51 | #%% 52 | # now -- let's use that gradient to solve some grade school 53 | # algebra with simple machine learning 54 | learning_rate = 1e-3 55 | # here is our learning loop 56 | for i in range(0, 10000): 57 | Y = X + 1.0 58 | loss = mse(Y) 59 | # here is the 'backpropagation' of the gradient 60 | loss.backward() 61 | # and here is the 'learning', so we turn off the graidents 62 | # from being updated temporarily 63 | with torch.no_grad(): 64 | # the gradient tells you which direction you are off 65 | # so you go in the opposite direction to correct the problem 66 | X -= learning_rate * X.grad 67 | # and we zero out the gradients to get fresh values on 68 | # each learning loop iteration 69 | X.grad.zero_() 70 | # and -- here is our answer 71 | X 72 | # OK -- you can see that this is approximate -- and that's an 73 | # important point -- machine learning is going to approximate 74 | # and you can control how close you get to the target answer 75 | # by altering your learning rate or your number of iterations 76 | # experiment with this by altering the `learning_rate` 77 | # and the number of loops in `range` -------------------------------------------------------------------------------- /4-3.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # Images are quite convenient in pytorch, there are a few built 3 | # in datasets -- let's take a look at the classic -- MNIST 4 | 5 | 6 | #%% 7 | import torch 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | 11 | 12 | #%% 13 | import torchvision 14 | mnist = torchvision.datasets.MNIST('./var', download=True) 15 | mnist[0][0] 16 | 17 | #%% 18 | # looks like a squiggly 5 -- let's check the label 19 | mnist[0][1] 20 | 21 | #%% 22 | # now the data is actually images, so we're going to need to 23 | # turn it into tensors, which is conveniently built in 24 | 25 | #%% 26 | import torchvision.transforms as transforms 27 | 28 | transform = transforms.Compose( 29 | [transforms.ToTensor(), 30 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 31 | 32 | train = torchvision.datasets.MNIST('./var', train=True, transform=transform) 33 | trainloader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True) 34 | test = torchvision.datasets.MNIST('./var', train=False, transform=transform) 35 | testloader = torch.utils.data.DataLoader(test, batch_size=len(test), shuffle=True) 36 | 37 | 38 | #%% 39 | # and now to define a very simple convolutional network 40 | 41 | import torch.nn as nn 42 | import torch.nn.functional as F 43 | 44 | 45 | class Net(nn.Module): 46 | def __init__(self): 47 | super(Net, self).__init__() 48 | # in channels, out channels (filters!), kernel size (square) 49 | # one channel -- this is greyscale 50 | self.conv1 = nn.Conv2d(1, 3, 3) 51 | # pooling divides in half with 52 | # kernel size, stride the same as 2 53 | self.pool = nn.MaxPool2d(2, 2) 54 | # now here is where you start to need to think about 55 | # the size of the image 56 | self.conv2 = nn.Conv2d(3, 6, 3) 57 | self.fc1 = nn.Linear(150, 128) 58 | self.fc2 = nn.Linear(128, 128) 59 | # ten digits -- ten outputs 60 | self.fc3 = nn.Linear(128, 10) 61 | 62 | def forward(self, x): 63 | x = self.pool(F.relu(self.conv1(x))) 64 | x = self.pool(F.relu(self.conv2(x))) 65 | x = x.flatten(start_dim=1) 66 | # this is a good place to see the size for debugging 67 | # print(x.shape) 68 | x = F.relu(self.fc1(x)) 69 | x = F.relu(self.fc2(x)) 70 | x = self.fc3(x) 71 | return x 72 | 73 | 74 | net = Net() 75 | #%% 76 | # loss functions, here we are using cross entropy loss, which 77 | # actuall does the softmax for us 78 | 79 | #%% 80 | import torch.optim as optim 81 | 82 | loss_function = nn.CrossEntropyLoss() 83 | optimizer = optim.Adam(net.parameters()) 84 | 85 | 86 | #%% 87 | # and the training loop 88 | 89 | #%% 90 | for epoch in range(16): 91 | for inputs, outputs in trainloader: 92 | optimizer.zero_grad() 93 | results = net(inputs) 94 | loss = loss_function(results, outputs) 95 | loss.backward() 96 | optimizer.step() 97 | print("Loss: {0}".format(loss)) 98 | 99 | #%% 100 | # now let's use that classification report to see how well we are doing 101 | 102 | 103 | #%% 104 | import sklearn.metrics 105 | for inputs, actual in testloader: 106 | results = net(inputs).argmax(dim=1).numpy() 107 | accuracy = sklearn.metrics.accuracy_score(actual, results) 108 | print(accuracy) 109 | 110 | print(sklearn.metrics.classification_report(actual, results)) -------------------------------------------------------------------------------- /1-5-datasets.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # To do machine learning you need data, and there are three concepts 3 | # to master here, Dataset, Dataloader, and transforms 4 | 5 | #%% 6 | # Let's make use of pandas and CSV data to create a dataset. 7 | 8 | import torch 9 | import pandas 10 | from torch.utils.data import Dataset 11 | 12 | class IrisDataset(Dataset): 13 | 14 | def __init__(self): 15 | '''Load up the data. 16 | ''' 17 | self.data = pandas.read_csv('./Iris.csv') 18 | 19 | def __len__(self): 20 | '''How much data do we have? 21 | ''' 22 | return len(self.data) 23 | 24 | def __getitem__(self, idx): 25 | '''Grab one data sample 26 | 27 | Arguments: 28 | idx {int} -- data at this position. 29 | ''' 30 | return self.data.iloc[idx] 31 | # pretty simple when we start from pandas 32 | # here is a dataset loaded, with a single sample 33 | iris = IrisDataset() 34 | len(iris), iris[0] 35 | #%% 36 | # To do machine learning you need data, and there are three concepts 37 | # to master here, Dataset, Dataloader, and transforms 38 | 39 | #%% 40 | # Let's make use of pandas and CSV data to create a dataset. 41 | 42 | import torch 43 | import pandas 44 | from torch.utils.data import Dataset 45 | 46 | class IrisDataset(Dataset): 47 | 48 | def __init__(self): 49 | '''Load up the data. 50 | ''' 51 | self.data = pandas.read_csv('./Iris.csv') 52 | 53 | def __len__(self): 54 | '''How much data do we have? 55 | ''' 56 | return len(self.data) 57 | 58 | def __getitem__(self, idx): 59 | '''Grab one data sample 60 | 61 | Arguments: 62 | idx {int} -- data at this position. 63 | ''' 64 | return self.data.iloc[idx] 65 | # pretty simple when we start from pandas 66 | # here is a dataset loaded, with a single sample 67 | iris = IrisDataset() 68 | len(iris), iris[0] 69 | 70 | #%% 71 | # Now, the small problem is -- we have a named tuple, 72 | # and we're going to need a tensor for inputs and 73 | # the target label -- so we need to transform 74 | 75 | class TensorIrisDataset(IrisDataset): 76 | def __getitem__(self, idx): 77 | '''Get a single sample that is 78 | {values:, label:} 79 | ''' 80 | sample = super().__getitem__(idx) 81 | return { 82 | 'tensor': torch.Tensor( 83 | [sample.SepalLengthCm, 84 | sample.SepalWidthCm, 85 | sample.PetalLengthCm, 86 | sample.PetalWidthCm] 87 | ), 88 | 'label': sample.Species 89 | } 90 | 91 | # and output... 92 | tensors = TensorIrisDataset() 93 | len(tensors), tensors[0] 94 | 95 | #%% 96 | # Training almost always takes place in batches 97 | # so pytorch has a very convenient loader that can take 98 | # a dataset and turn it into batches so you can iterate 99 | from torch.utils.data import DataLoader 100 | 101 | loader = DataLoader(tensors, batch_size=16, shuffle=True) 102 | for batch in loader: 103 | print(batch) 104 | 105 | # see how the data comes out in batches, and the last batch 106 | # tries to be as large as it can 107 | 108 | #%% 109 | # And -- there is even a parallel possibility 110 | # this is a pretty small dataset so it's not really 111 | # essential, but here is how you use it 112 | 113 | parallel_loader = DataLoader(tensors, 114 | batch_size=16, shuffle=True, num_workers=4) 115 | for batch in parallel_loader: 116 | print(batch) 117 | -------------------------------------------------------------------------------- /4-Assignment.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # Here I'm swapping out MNIST for CIFAR, which is object recognition 3 | # -- and it is 3 channel color image 4 | 5 | 6 | #%%% 7 | import torch 8 | import torchvision 9 | import torchvision.transforms as transforms 10 | import torch.nn as nn 11 | 12 | 13 | 14 | #%% 15 | cifar = torchvision.datasets.CIFAR10('./var', download=True) 16 | transform = transforms.Compose([ 17 | transforms.ToTensor(), 18 | ]) 19 | cifar[0][0] 20 | 21 | #%% 22 | train = torchvision.datasets.CIFAR10('./var', train=True, transform=transform) 23 | trainloader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True) 24 | test = torchvision.datasets.CIFAR10('./var', train=False, transform=transform) 25 | testloader = torch.utils.data.DataLoader(test, batch_size=len(test), shuffle=True) 26 | 27 | #%% 28 | class SlimAlexNet(nn.Module): 29 | 30 | def __init__(self, num_classes=10): 31 | super().__init__() 32 | self.features = nn.Sequential( 33 | # three color input channels 34 | nn.Conv2d(3, 32, kernel_size=3, stride=1), 35 | nn.ReLU(inplace=True), 36 | nn.MaxPool2d(kernel_size=3, stride=2), 37 | nn.Conv2d(32, 64, kernel_size=3), 38 | nn.ReLU(inplace=True), 39 | nn.MaxPool2d(kernel_size=3, stride=2), 40 | nn.Conv2d(64, 128, kernel_size=3, padding=1), 41 | nn.ReLU(inplace=True), 42 | nn.Conv2d(128, 256, kernel_size=3, padding=1), 43 | nn.ReLU(inplace=True), 44 | nn.Conv2d(256, 128, kernel_size=3, padding=1), 45 | nn.ReLU(inplace=True), 46 | nn.MaxPool2d(kernel_size=3, stride=2), 47 | ) 48 | self.classifier = nn.Sequential( 49 | nn.Dropout(), 50 | # this is the shape after flattening 51 | nn.Linear(512, 1024), 52 | nn.ReLU(inplace=True), 53 | nn.Dropout(), 54 | nn.Linear(1024, 1024), 55 | nn.ReLU(inplace=True), 56 | nn.Linear(1024, num_classes), 57 | ) 58 | 59 | def forward(self, x): 60 | x = self.features(x) 61 | x = x.flatten(start_dim=1) 62 | # here is where I figure out the shape to get 63 | # the right number of parameters in the next layer 64 | # print(x.shape) 65 | x = self.classifier(x) 66 | return x 67 | 68 | 69 | #%% 70 | net = SlimAlexNet(num_classes=10) 71 | loss_function = torch.nn.CrossEntropyLoss() 72 | optimizer = torch.optim.Adam(net.parameters()) 73 | if torch.cuda.is_available(): 74 | device = torch.device('cuda') 75 | else: 76 | device = torch.device('cpu') 77 | 78 | net.to(device) 79 | 80 | # train this longer 81 | for epoch in range(64): 82 | total_loss = 0 83 | for inputs, outputs in trainloader: 84 | inputs = inputs.to(device) 85 | outputs = outputs.to(device) 86 | optimizer.zero_grad() 87 | results = net(inputs) 88 | loss = loss_function(results, outputs) 89 | total_loss += loss.item() 90 | loss.backward() 91 | optimizer.step() 92 | print("Loss: {0}".format(total_loss / len(trainloader))) 93 | 94 | #%% 95 | # let's see how much better this is! 96 | 97 | #%% 98 | import sklearn.metrics 99 | for inputs, actual in testloader: 100 | inputs = inputs.to(device) 101 | results = net(inputs).argmax(dim=1).to('cpu').numpy() 102 | accuracy = sklearn.metrics.accuracy_score(actual, results) 103 | print(accuracy) 104 | 105 | print(sklearn.metrics.classification_report(actual, results)) 106 | -------------------------------------------------------------------------------- /4-5.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # Now -- let's take a look at a full featured convolutional network 3 | # by investigating AlexNet -- this was one of the early 'truly deep' 4 | # networks and is actually a great basis for study. 5 | 6 | 7 | #%%% 8 | import torch 9 | import torchvision 10 | import torchvision.transforms as transforms 11 | import torch.nn as nn 12 | 13 | 14 | #%% 15 | # and now our data 16 | 17 | #%% 18 | mnist = torchvision.datasets.MNIST('./var', download=True) 19 | transform = transforms.Compose([ 20 | transforms.ToTensor(), 21 | ]) 22 | 23 | train = torchvision.datasets.MNIST('./var', train=True, transform=transform) 24 | trainloader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True) 25 | test = torchvision.datasets.MNIST('./var', train=False, transform=transform) 26 | testloader = torch.utils.data.DataLoader(test, batch_size=len(test), shuffle=True) 27 | 28 | #%% 29 | # we can use use AlexNet -- it's built in to torchvisionm but we'll 30 | # modify it a bit for our smaller images and grayscale 31 | 32 | #%% 33 | class SlimAlexNet(nn.Module): 34 | 35 | def __init__(self, num_classes=10): 36 | super().__init__() 37 | self.features = nn.Sequential( 38 | nn.Conv2d(1, 32, kernel_size=3, stride=1), 39 | nn.ReLU(inplace=True), 40 | nn.MaxPool2d(kernel_size=3, stride=2), 41 | nn.Conv2d(32, 64, kernel_size=3), 42 | nn.ReLU(inplace=True), 43 | nn.MaxPool2d(kernel_size=3, stride=2), 44 | nn.Conv2d(64, 128, kernel_size=3, padding=1), 45 | nn.ReLU(inplace=True), 46 | nn.Conv2d(128, 256, kernel_size=3, padding=1), 47 | nn.ReLU(inplace=True), 48 | nn.Conv2d(256, 128, kernel_size=3, padding=1), 49 | nn.ReLU(inplace=True), 50 | nn.MaxPool2d(kernel_size=3, stride=2), 51 | ) 52 | self.classifier = nn.Sequential( 53 | nn.Dropout(), 54 | nn.Linear(128, 1024), 55 | nn.ReLU(inplace=True), 56 | nn.Dropout(), 57 | nn.Linear(1024, 1024), 58 | nn.ReLU(inplace=True), 59 | nn.Linear(1024, num_classes), 60 | ) 61 | 62 | def forward(self, x): 63 | x = self.features(x) 64 | x = x.flatten(start_dim=1) 65 | x = self.classifier(x) 66 | return x 67 | 68 | 69 | #%% 70 | net = SlimAlexNet(num_classes=10) 71 | loss_function = torch.nn.CrossEntropyLoss() 72 | optimizer = torch.optim.Adam(net.parameters()) 73 | 74 | #%% 75 | # and the training loop, with CUDA support 76 | 77 | #%% 78 | if torch.cuda.is_available(): 79 | device = torch.device('cuda') 80 | else: 81 | device = torch.device('cpu') 82 | 83 | net.to(device) 84 | 85 | for epoch in range(16): 86 | total_loss = 0 87 | for inputs, outputs in trainloader: 88 | inputs = inputs.to(device) 89 | outputs = outputs.to(device) 90 | optimizer.zero_grad() 91 | results = net(inputs) 92 | loss = loss_function(results, outputs) 93 | total_loss += loss.item() 94 | loss.backward() 95 | optimizer.step() 96 | print("Loss: {0}".format(total_loss / len(trainloader))) 97 | 98 | #%% 99 | # let's see how much better this is! 100 | 101 | #%% 102 | import sklearn.metrics 103 | for inputs, actual in testloader: 104 | inputs = inputs.to(device) 105 | results = net(inputs).argmax(dim=1).to('cpu').numpy() 106 | accuracy = sklearn.metrics.accuracy_score(actual, results) 107 | print(accuracy) 108 | 109 | print(sklearn.metrics.classification_report(actual, results)) 110 | -------------------------------------------------------------------------------- /2-5.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # starting with imports 3 | import torch 4 | 5 | #%% 6 | # now we'll create a model in the way that you are most likely 7 | # going to use pytorch in practice, by creating a reusable model 8 | # module -- this is simply a class with layer member variables 9 | # and a forward method that does the actual computation 10 | # we're going to build the same model we did before with linear 11 | # and relu, but we'll also fix up our model and get the output 12 | # shape we really want -- which is a tensor with two elements 13 | 14 | 15 | #%% 16 | # inputs and outputs, just random values -- we'll work with real 17 | # data in subsequent videos 18 | inputs = torch.rand(1, 1, 64, 64) 19 | outputs = torch.rand(1, 2) 20 | 21 | #%% 22 | # and now on to our module 23 | 24 | class Model(torch.nn.Module): 25 | 26 | def __init__(self): 27 | ''' 28 | The constructor is the place to set up each of the layers 29 | and activations. 30 | ''' 31 | 32 | super().__init__() 33 | self.layer_one = torch.nn.Linear(64, 256) 34 | self.activation_one = torch.nn.ReLU() 35 | self.layer_two = torch.nn.Linear(256, 256) 36 | self.activation_two = torch.nn.ReLU() 37 | # this is a pretty big number -- because we are flattening 38 | # which turned 64 * 256 into a flat array like tensor 39 | self.shape_outputs = torch.nn.Linear(16384, 2) 40 | 41 | def forward(self, inputs): 42 | buffer = self.layer_one(inputs) 43 | buffer = self.activation_one(buffer) 44 | buffer = self.layer_two(buffer) 45 | buffer = self.activation_two(buffer) 46 | # and here -- we correct the model to give us the output 47 | # shape we want -- starting with dimension one to 48 | # preserve the batch dimension -- we only have a bactch 49 | # of one item, but dealing with batches will becomre more 50 | # important as we process real data in later videos 51 | buffer = buffer.flatten(start_dim=1) 52 | return self.shape_outputs(buffer) 53 | 54 | #%% 55 | # now let's run our model over our inputs 56 | 57 | #%% 58 | model = Model() 59 | test_results = model(inputs) 60 | test_results 61 | 62 | #%% 63 | # now -- let's learn, this time creating a learning loop 64 | # with a built in optimizer -- we'll let it cycle keeping track 65 | # of our gradients, and when our gradients 'vanish' -- we'll 66 | # stop learning and see how close we are to our model 67 | # being able to generate the correct outputs 68 | 69 | #%% 70 | loss_function = torch.nn.MSELoss() 71 | optimizer = torch.optim.SGD(model.parameters(), lr=0.01) 72 | # a limit on how much learning we do 73 | for i in range(10000): 74 | # the optimizer reaches into the model and will zero out 75 | optimizer.zero_grad() 76 | results = model(inputs) 77 | loss = loss_function(results, outputs) 78 | loss.backward() 79 | optimizer.step() 80 | # now -- look for vanishing gradients 81 | gradients = 0.0 82 | for parameter in model.parameters(): 83 | gradients += parameter.grad.data.sum() 84 | if abs(gradients) <= 0.0001: 85 | print(gradients) 86 | print('gradient vanished at iteration {0}'.format(i)) 87 | break 88 | 89 | 90 | #%% 91 | # relatively quick to get to no gradients, let's look at the answer 92 | model(inputs), outputs 93 | 94 | #%% 95 | # spot on! 96 | # this illustrates how networks can learn arbitrary functions, 97 | # in this case -- extremely arbitrary, we learned random data! 98 | # keep this in mind as you are doing machine learning on real data 99 | # -- networks are powerful enough to fix nearly any data, including 100 | # random, which means in effect the algorithm memorized the 101 | # inputs in a kind of sophisticated mathematical hashtable 102 | # -- when this happens, we call it overfitting -- meaning 103 | # the model knows only the inputs it is trained on and cannot 104 | # deal with previously unseen inputs -- think about this 105 | # when you make your models! -------------------------------------------------------------------------------- /4-4.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # Let's take a look at what is going on inside these convolutions 3 | # by viewing the layer output channels as images. It's an interesting technique 4 | # to get more of a feel for what the machine learner 'sees' 5 | 6 | 7 | #%%% 8 | import torch 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | import torchvision 12 | import torchvision.transforms as transforms 13 | 14 | #%% 15 | mnist = torchvision.datasets.MNIST('./var', download=True) 16 | transform = transforms.Compose([transforms.ToTensor()]) 17 | 18 | train = torchvision.datasets.MNIST('./var', train=True, transform=transform) 19 | trainloader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True) 20 | test = torchvision.datasets.MNIST('./var', train=False, transform=transform) 21 | testloader = torch.utils.data.DataLoader(test, batch_size=len(test), shuffle=True) 22 | 23 | #%% 24 | # let's plot a tensor as an image -- this hasn't had any machine learning 25 | # just yet -- it is only the source image data 26 | 27 | #%% 28 | for inputs, outputs in trainloader: 29 | #slice out one channel 30 | image = inputs[0][0] 31 | plt.imshow(image.numpy(), cmap=plt.get_cmap('binary')) 32 | break 33 | 34 | 35 | 36 | #%% 37 | # OK -- that's an image - now let's train up a simple convolutional network 38 | # and then augment it by saving intermediate tensors, the thing to know here 39 | # is the convolutional tensors have multiple filters, we go from 40 | # one color channel to three -- so we'll have some interesting choices when 41 | # we visualize! 42 | 43 | 44 | 45 | #%% 46 | import torch.nn as nn 47 | import torch.nn.functional as F 48 | 49 | class Net(nn.Module): 50 | def __init__(self): 51 | super(Net, self).__init__() 52 | # in channels, out channels (filters!), kernel size (square) 53 | # one channel -- this is greyscale 54 | self.conv1 = nn.Conv2d(1, 3, 3) 55 | # pooling divides in half with 56 | # kernel size, stride the same as 2 57 | self.pool = nn.MaxPool2d(2, 2) 58 | # now here is where you start to need to think about 59 | # the size of the image 60 | self.conv2 = nn.Conv2d(3, 6, 3) 61 | self.fc1 = nn.Linear(150, 128) 62 | self.fc2 = nn.Linear(128, 128) 63 | # ten digits -- ten outputs 64 | self.fc3 = nn.Linear(128, 10) 65 | 66 | def forward(self, x): 67 | x = self.pool(F.relu(self.conv1(x))) 68 | self.after_conv1 = x 69 | x = self.pool(F.relu(self.conv2(x))) 70 | self.after_conv2 = x 71 | x = x.flatten(start_dim=1) 72 | # this is a good place to see the size for debugging 73 | # print(x.shape) 74 | x = F.relu(self.fc1(x)) 75 | x = F.relu(self.fc2(x)) 76 | x = self.fc3(x) 77 | return x 78 | 79 | 80 | net = Net() 81 | #%% 82 | # loss functions, here we are using cross entropy loss, which 83 | # actuall does the softmax for us -- convience feature in pytorch 84 | 85 | #%% 86 | import torch.optim as optim 87 | 88 | loss_function = nn.CrossEntropyLoss() 89 | optimizer = optim.Adam(net.parameters()) 90 | 91 | 92 | #%% 93 | # and the training loop 94 | 95 | #%% 96 | for epoch in range(16): 97 | for inputs, outputs in trainloader: 98 | optimizer.zero_grad() 99 | results = net(inputs) 100 | loss = loss_function(results, outputs) 101 | loss.backward() 102 | optimizer.step() 103 | print("Loss: {0}".format(loss)) 104 | 105 | #%% 106 | # ok -- now we have a trained model -- now we can visualize! 107 | # pyplot is a bit odd when you make multiple images -- the 108 | # trick is to remember it is a bit modal - you create a figure 109 | # which means the plots you call are 'to' that figure implicitly 110 | # and then you add subplots which are (rows, columns, index) 111 | # and it is one based from left to right, top to bottom 112 | # 113 | # we'll make a figure with 3 rows, 6 columns to show the source 114 | # image, then the first filter of three channels 115 | # followed by the second filter of six channels 116 | 117 | 118 | #%% 119 | for inputs, outputs in trainloader: 120 | # multi image figure 121 | figure = plt.figure() 122 | # the original image 123 | image = inputs[0][0] 124 | 125 | figure.add_subplot(3, 6, 1) 126 | plt.imshow(image.numpy(), cmap=plt.get_cmap('binary')) 127 | output = net(inputs) 128 | # remember we have a batch in the model -- and this 129 | # has a gradient, so we'll need it detached to get numpy format 130 | filter_one = net.after_conv1[0].detach() 131 | for i in range(3): 132 | figure.add_subplot(3, 6, 6 + 1 + i) 133 | plt.imshow(filter_one[i].numpy(), cmap=plt.get_cmap('binary')) 134 | 135 | filter_two = net.after_conv2[0].detach() 136 | for i in range(6): 137 | figure.add_subplot(3, 6, 12 + 1 + i) 138 | plt.imshow(filter_two[i].numpy(), cmap=plt.get_cmap('binary')) 139 | plt.show() 140 | 141 | break 142 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # PyTorch Deep Learning in 7 Days[Video] 2 | This is the code repository for [PyTorch Deep Learning in 7 Days[Video]](https://prod.packtpub.com/in/big-data-and-business-intelligence/pytorch-deep-learning-7-days-video), published by [Packt](https://www.packtpub.com/?utm_source=github). It contains all the supporting project files necessary to work through the video course from start to finish. 3 | ## About the Video Course 4 | PyTorch is Facebook’s latest Python-based framework for Deep Learning. It has the ability to create dynamic Neural Networks on CPUs and GPUs, both with a significantly less code compared to other competing frameworks. PyTorch has a unique interface that makes it as easy to learn as NumPy. 5 | 6 | This 7-day course is for those who are in a hurry to get started with PyTorch. You will be introduced to the most commonly used Deep Learning models, techniques, and algorithms through PyTorch code. This course is an attempt to break the myth that Deep Learning is complicated and show you that with the right choice of tools combined with a simple and intuitive explanation of core concepts, Deep Learning is as accessible as any other application development technologies out there. It’s a journey from diving deep into the fundamentals to getting acquainted with the advance concepts such as Transfer Learning, Natural Language Processing and implementation of Generative Adversarial Networks. 7 | 8 | By the end of the course, you will be able to build Deep Learning applications with PyTorch. 9 | 10 |