├── ai-banner.png ├── machine-learning-with-go ├── ml_workflow │ ├── exercise2 │ │ ├── solutions │ │ │ ├── solution1 │ │ │ │ ├── Dockerfile │ │ │ │ ├── Makefile │ │ │ │ └── solution1.go │ │ │ ├── solution3 │ │ │ │ ├── Dockerfile │ │ │ │ ├── Makefile │ │ │ │ └── solution3.go │ │ │ └── solution2 │ │ │ │ ├── solution2a │ │ │ │ ├── Dockerfile │ │ │ │ ├── Makefile │ │ │ │ └── solution2a.go │ │ │ │ └── solution2b │ │ │ │ ├── Dockerfile │ │ │ │ ├── Makefile │ │ │ │ └── solution2b.go │ │ └── templates │ │ │ ├── template1 │ │ │ └── template1.go │ │ │ ├── template2 │ │ │ ├── template2a │ │ │ │ └── template2a.go │ │ │ └── template2b │ │ │ │ └── template2b.go │ │ │ └── template3 │ │ │ └── template3.go │ ├── data │ │ └── test │ │ │ ├── 1.json │ │ │ ├── 2.json │ │ │ └── 3.json │ └── exercise3 │ │ ├── qc_pre_process.json │ │ ├── model.json │ │ ├── infer.json │ │ └── qcontrol.json ├── ml_with_go │ ├── data │ │ ├── office.png │ │ ├── optImg.zip │ │ ├── iris.csv │ │ ├── ssd_mobilenet_labels │ │ │ └── labels.json │ │ └── 5kings_battles_v1.csv │ ├── bonus │ │ ├── bonus2 │ │ │ └── bonus2.go │ │ ├── README.md │ │ └── bonus1 │ │ │ └── bonus1.go │ ├── solutions │ │ ├── solution2.ipynb │ │ └── solution1.ipynb │ └── README.md ├── README.md └── ml_intro │ └── README.md ├── notebook-to-production ├── deploying_managing │ ├── data │ │ ├── test2.csv │ │ ├── test1.csv │ │ └── iris.csv │ ├── pre-process.json │ ├── train.json │ └── infer.json ├── introduction │ ├── data │ │ ├── test.csv │ │ └── iris.csv │ ├── example1 │ │ └── example1_data_munging.ipynb │ ├── example2 │ │ └── example2_model_training.ipynb │ ├── exercises │ │ ├── template1 │ │ │ └── template1_data_munging.ipynb │ │ ├── template2 │ │ │ └── template2_model_training.ipynb │ │ ├── solution2 │ │ │ └── solution2_model_training.ipynb │ │ └── solution1 │ │ │ └── solution1_data_munging.ipynb │ └── README.md ├── pipeline_stages │ ├── data │ │ ├── test.csv │ │ └── iris.csv │ ├── docker │ │ ├── pre-processing │ │ │ ├── Dockerfile │ │ │ └── pre_process.py │ │ ├── inference │ │ │ ├── Dockerfile │ │ │ └── infer.py │ │ └── training │ │ │ ├── Dockerfile │ │ │ └── train.py │ ├── example1 │ │ └── example1.py │ ├── exercise2 │ │ ├── template2.py │ │ └── solution2.py │ ├── exercise1 │ │ ├── template1.py │ │ └── solution1.py │ └── README.md ├── productionizing │ ├── data │ │ ├── test.csv │ │ └── iris.csv │ ├── README.md │ └── example_sklearn_workflow.ipynb ├── frameworks_that_scale │ ├── data │ │ ├── test.csv │ │ └── iris.csv │ ├── example1 │ │ └── example1.ipynb │ ├── exercise1 │ │ ├── template1.ipynb │ │ └── solution1.ipynb │ └── README.md └── README.md ├── .gitignore ├── etc ├── gophernotesDocker │ ├── README.md │ └── Dockerfile ├── tensorflow-go │ ├── README.md │ └── Dockerfile └── doPachLaunch │ ├── install_dependencies.sh │ └── launch.sh └── README.md /ai-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardanlabs/training-ai/HEAD/ai-banner.png -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution1/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | ADD train / 3 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution3/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | ADD infer / 3 | -------------------------------------------------------------------------------- /notebook-to-production/deploying_managing/data/test2.csv: -------------------------------------------------------------------------------- 1 | 5.7,2.8,4.1,1.3 2 | 6.3,3.3,6.0,2.5 3 | 7.1,3.0,5.9,2.1 4 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution2/solution2a/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | ADD qcpre / 3 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution2/solution2b/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | ADD qcontrol / 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | /.idea/* 3 | .ipynb_checkpoints 4 | .DS_Store 5 | .DS_Store? 6 | *.db -------------------------------------------------------------------------------- /machine-learning-with-go/ml_with_go/data/office.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardanlabs/training-ai/HEAD/machine-learning-with-go/ml_with_go/data/office.png -------------------------------------------------------------------------------- /machine-learning-with-go/ml_with_go/data/optImg.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardanlabs/training-ai/HEAD/machine-learning-with-go/ml_with_go/data/optImg.zip -------------------------------------------------------------------------------- /notebook-to-production/introduction/data/test.csv: -------------------------------------------------------------------------------- 1 | 5.7,2.8,4.1,1.3 2 | 6.3,3.3,6.0,2.5 3 | 5.8,2.7,5.1,1.9 4 | 7.1,3.0,5.9,2.1 5 | 5.1,3.5,1.4,0.2 6 | 4.9,3.0,1.4,0.2 7 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/data/test.csv: -------------------------------------------------------------------------------- 1 | 5.7,2.8,4.1,1.3 2 | 6.3,3.3,6.0,2.5 3 | 5.8,2.7,5.1,1.9 4 | 7.1,3.0,5.9,2.1 5 | 5.1,3.5,1.4,0.2 6 | 4.9,3.0,1.4,0.2 7 | -------------------------------------------------------------------------------- /notebook-to-production/productionizing/data/test.csv: -------------------------------------------------------------------------------- 1 | 5.7,2.8,4.1,1.3 2 | 6.3,3.3,6.0,2.5 3 | 5.8,2.7,5.1,1.9 4 | 7.1,3.0,5.9,2.1 5 | 5.1,3.5,1.4,0.2 6 | 4.9,3.0,1.4,0.2 7 | -------------------------------------------------------------------------------- /notebook-to-production/deploying_managing/data/test1.csv: -------------------------------------------------------------------------------- 1 | 5.7,2.8,4.1,1.3 2 | 6.3,3.3,6.0,2.5 3 | 5.8,2.7,5.1,1.9 4 | 7.1,3.0,5.9,2.1 5 | 5.1,3.5,1.4,0.2 6 | 4.9,3.0,1.4,0.2 7 | -------------------------------------------------------------------------------- /notebook-to-production/frameworks_that_scale/data/test.csv: -------------------------------------------------------------------------------- 1 | 5.7,2.8,4.1,1.3 2 | 6.3,3.3,6.0,2.5 3 | 5.8,2.7,5.1,1.9 4 | 7.1,3.0,5.9,2.1 5 | 5.1,3.5,1.4,0.2 6 | 4.9,3.0,1.4,0.2 7 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/data/test/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "independent_variables": [ 3 | { 4 | "name": "bmi", 5 | "value": 0.0616962065187 6 | }, 7 | { 8 | "name": "ltg", 9 | "value": 0.0199084208763 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/data/test/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "independent_variables": [ 3 | { 4 | "name": "bmi", 5 | "value": 0.0444512133366 6 | }, 7 | { 8 | "name": "ltg", 9 | "value": 0.00286377051894 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/data/test/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "independent_variables": [ 3 | { 4 | "name": "bmi", 5 | "value": -0.0115950145052 6 | }, 7 | { 8 | "name": "ltg", 9 | "value": 0.0226920225667 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/docker/pre-processing/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | 3 | # Install dependencies 4 | RUN pip install -U \ 5 | numpy \ 6 | scipy \ 7 | scikit-learn \ 8 | pandas 9 | 10 | # Add our code 11 | ADD pre_process.py /code/pre_process.py 12 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/docker/inference/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | 3 | # Install dependencies 4 | RUN pip install -U \ 5 | numpy \ 6 | scipy \ 7 | scikit-learn \ 8 | pandas \ 9 | torch \ 10 | torchvision 11 | 12 | # Add our code 13 | ADD infer.py /code/infer.py 14 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/docker/training/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | 3 | # Install dependencies 4 | RUN pip install -U \ 5 | numpy \ 6 | scipy \ 7 | scikit-learn \ 8 | pandas \ 9 | torch \ 10 | torchvision 11 | 12 | # Add our code 13 | ADD train.py /code/train.py 14 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution1/Makefile: -------------------------------------------------------------------------------- 1 | all: compile docker push clean 2 | 3 | compile: 4 | GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o train 5 | 6 | docker: 7 | docker build --force-rm=true -t gopherdata/gc2018:training . 8 | 9 | push: 10 | docker push gopherdata/gc2018:training 11 | 12 | clean: 13 | rm train 14 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution3/Makefile: -------------------------------------------------------------------------------- 1 | all: compile docker push clean 2 | 3 | compile: 4 | GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o infer 5 | 6 | docker: 7 | docker build --force-rm=true -t gopherdata/gc2018:inference . 8 | 9 | push: 10 | docker push gopherdata/gc2018:inference 11 | 12 | clean: 13 | rm infer 14 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution2/solution2a/Makefile: -------------------------------------------------------------------------------- 1 | all: compile docker push clean 2 | 3 | compile: 4 | GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o qcpre 5 | 6 | docker: 7 | docker build --force-rm=true -t gopherdata/gc2018:qcpre . 8 | 9 | push: 10 | docker push gopherdata/gc2018:qcpre 11 | 12 | clean: 13 | rm qcpre 14 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution2/solution2b/Makefile: -------------------------------------------------------------------------------- 1 | all: compile docker push clean 2 | 3 | compile: 4 | GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o qcontrol 5 | 6 | docker: 7 | docker build --force-rm=true -t gopherdata/gc2018:qc . 8 | 9 | push: 10 | docker push gopherdata/gc2018:qc 11 | 12 | clean: 13 | rm qcontrol 14 | -------------------------------------------------------------------------------- /etc/gophernotesDocker/README.md: -------------------------------------------------------------------------------- 1 | ## Dockerfile for gophernotes 2 | 3 | install jupyter for go OR build the docker image for jupyter with go 4 | in this folder 5 | ``` 6 | $ cd gophernotesDocker 7 | 8 | $ docker build . -t gophernotesLocal/gophernotes 9 | ``` 10 | 11 | Run the built image 12 | 13 | ``` 14 | $ docker run -it -p 8888:8888 -v /PATH/TO/LOCAL/PROJECT/training-ai:/go/src/training-ai gophernotesLocal/gophernotes 15 | 16 | ``` -------------------------------------------------------------------------------- /notebook-to-production/deploying_managing/pre-process.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "pre_process" 4 | }, 5 | "transform": { 6 | "image": "dwhitena/nb-to-prod:pre-process", 7 | "cmd": [ "/bin/bash" ], 8 | "stdin": [ "python3 /code/pre_process.py $training /pfs/out/" ] 9 | }, 10 | "parallelism_spec": { 11 | "constant": "1" 12 | }, 13 | "input": { 14 | "atom": { 15 | "repo": "training", 16 | "glob": "/*" 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise3/qc_pre_process.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "qc_pre_process" 4 | }, 5 | "transform": { 6 | "image": "gopherdata/gc2018:qcpre", 7 | "cmd": [ 8 | "/qcpre", 9 | "-inFile=/pfs/qc_data/holdout.csv", 10 | "-outDir=/pfs/out" 11 | ] 12 | }, 13 | "parallelism_spec": { 14 | "constant": "1" 15 | }, 16 | "input": { 17 | "atom": { 18 | "repo": "qc_data", 19 | "glob": "/" 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise3/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "model" 4 | }, 5 | "transform": { 6 | "image": "gopherdata/gc2018:training", 7 | "cmd": [ 8 | "/train", 9 | "-inFile=/pfs/training_data/training_prod.csv", 10 | "-outDir=/pfs/out" 11 | ] 12 | }, 13 | "parallelism_spec": { 14 | "constant": "1" 15 | }, 16 | "input": { 17 | "atom": { 18 | "repo": "training_data", 19 | "glob": "/" 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /etc/tensorflow-go/README.md: -------------------------------------------------------------------------------- 1 | ## Dockerfile for gophernotes 2 | 3 | install tensorflow-go image 4 | ``` 5 | $ cd gophernotesDocker 6 | 7 | $ docker build . -t gophernotesLocal/tensorflow-go 8 | ``` 9 | 10 | Run the built image 11 | 12 | ``` 13 | docker run -it -p 8887:8888 -v PATH-TO-YOUR-PROJECT/training-ai/machine-learning-with-go/ml_with_go:/go/src/machine-learning-with-go dicaormu/tensorflow-go 14 | ``` 15 | 16 | run bash in the image to compile your project 17 | 18 | ``` 19 | docker exec -it IMAGE_ID /bin/bash 20 | ``` 21 | -------------------------------------------------------------------------------- /notebook-to-production/deploying_managing/train.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "train" 4 | }, 5 | "transform": { 6 | "image": "dwhitena/nb-to-prod:train", 7 | "cmd": [ 8 | "python3", 9 | "/code/train.py", 10 | "/pfs/pre_process/x_train.csv", 11 | "/pfs/pre_process/y_train.csv", 12 | "/pfs/out/" 13 | ] 14 | }, 15 | "parallelism_spec": { 16 | "constant": "1" 17 | }, 18 | "input": { 19 | "atom": { 20 | "repo": "pre_process", 21 | "glob": "/" 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /notebook-to-production/deploying_managing/infer.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "inference" 4 | }, 5 | "transform": { 6 | "image": "dwhitena/nb-to-prod:infer", 7 | "cmd": [ "/bin/bash" ], 8 | "stdin": [ "python3 /code/infer.py /pfs/train/model.pt $attributes /pfs/out/" ] 9 | }, 10 | "parallelism_spec": { 11 | "constant": "1" 12 | }, 13 | "input": { 14 | "cross": [ 15 | { 16 | "atom": { 17 | "repo": "attributes", 18 | "glob": "/*" 19 | } 20 | }, 21 | { 22 | "atom": { 23 | "repo": "train", 24 | "glob": "/" 25 | } 26 | } 27 | ] 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise3/infer.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "infer" 4 | }, 5 | "transform": { 6 | "image": "gopherdata/gc2018:inference", 7 | "cmd": [ 8 | "/infer", 9 | "-inModelDir=/pfs/qcontrol", 10 | "-inVarDir=/pfs/attributes/", 11 | "-outDir=/pfs/out" 12 | ] 13 | }, 14 | "parallelism_spec": { 15 | "constant": "10" 16 | }, 17 | "input": { 18 | "cross": [ 19 | { 20 | "atom": { 21 | "repo": "attributes", 22 | "glob": "/*" 23 | } 24 | }, 25 | { 26 | "atom": { 27 | "repo": "qcontrol", 28 | "glob": "/" 29 | } 30 | } 31 | ] 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise3/qcontrol.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "qcontrol" 4 | }, 5 | "transform": { 6 | "image": "gopherdata/gc2018:qc", 7 | "cmd": [ 8 | "/qcontrol", 9 | "-inModelFile=/pfs/model/model.json", 10 | "-inTestDir=/pfs/qc_pre_process/", 11 | "-threshold=60.0", 12 | "-outDir=/pfs/out" 13 | ] 14 | }, 15 | "parallelism_spec": { 16 | "constant": "1" 17 | }, 18 | "input": { 19 | "cross": [ 20 | { 21 | "atom": { 22 | "repo": "qc_pre_process", 23 | "glob": "/" 24 | } 25 | }, 26 | { 27 | "atom": { 28 | "repo": "model", 29 | "glob": "/" 30 | } 31 | } 32 | ] 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /etc/doPachLaunch/install_dependencies.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # docker 4 | apt-get update 5 | apt-get install -y docker.io 6 | 7 | # kubeadm, kubelet, and kubectl 8 | apt-get update && apt-get install -y apt-transport-https curl 9 | curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - 10 | cat </etc/apt/sources.list.d/kubernetes.list 11 | deb http://apt.kubernetes.io/ kubernetes-xenial main 12 | EOF 13 | apt-get update 14 | apt-get install -y kubelet kubeadm kubectl 15 | apt-mark hold kubelet kubeadm kubectl 16 | 17 | # kubeadm images 18 | kubeadm config images pull 19 | 20 | # add the pachrat user 21 | sudo useradd pachrat 22 | sudo adduser pachrat sudo 23 | 24 | # pachctl 25 | curl -o /tmp/pachctl.deb -L https://github.com/pachyderm/pachyderm/releases/download/v1.7.5/pachctl_1.7.5_amd64.deb && sudo dpkg -i /tmp/pachctl.deb 26 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/example1/example1.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pandas as pd 4 | from sklearn.preprocessing import MinMaxScaler 5 | 6 | # command line arguments 7 | parser = argparse.ArgumentParser(description='Preprocess iris training data.') 8 | parser.add_argument('infile', type=str, help='Input file containing the training set') 9 | parser.add_argument('outdir', type=str, help='Output directory for the pre-processed data') 10 | args = parser.parse_args() 11 | 12 | # read in the data 13 | cols = ['f1', 'f2', 'f3', 'f4', 'species'] 14 | data = pd.read_csv(args.infile, names=cols) 15 | 16 | # scale the feature and encode the labels 17 | X = data[cols[0:-1]] 18 | X = MinMaxScaler().fit_transform(X) 19 | y = pd.get_dummies(data['species']) 20 | 21 | # output the features and encoded labels 22 | Xout = pd.DataFrame(X, columns=cols[0:-1]) 23 | Xout.to_csv(os.path.join(args.outdir, 'x_train.csv'), index=False, header=False) 24 | y.to_csv(os.path.join(args.outdir, 'y_train.csv'), index=False, header=False) 25 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/docker/pre-processing/pre_process.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pandas as pd 4 | from sklearn.preprocessing import MinMaxScaler 5 | 6 | # command line arguments 7 | parser = argparse.ArgumentParser(description='Preprocess iris training data.') 8 | parser.add_argument('infile', type=str, help='Input file containing the training set') 9 | parser.add_argument('outdir', type=str, help='Output directory for the pre-processed data') 10 | args = parser.parse_args() 11 | 12 | # read in the data 13 | cols = ['f1', 'f2', 'f3', 'f4', 'species'] 14 | data = pd.read_csv(args.infile, names=cols) 15 | 16 | # scale the feature and encode the labels 17 | X = data[cols[0:-1]] 18 | X = MinMaxScaler().fit_transform(X) 19 | y = pd.get_dummies(data['species']) 20 | 21 | # output the features and encoded labels 22 | Xout = pd.DataFrame(X, columns=cols[0:-1]) 23 | Xout.to_csv(os.path.join(args.outdir, 'x_train.csv'), index=False, header=False) 24 | y.to_csv(os.path.join(args.outdir, 'y_train.csv'), index=False, header=False) 25 | -------------------------------------------------------------------------------- /etc/tensorflow-go/Dockerfile: -------------------------------------------------------------------------------- 1 | # based in the dockerfile of https://github.com/ctava/tensorflow-go/blob/master/Dockerfile 2 | FROM tensorflow/tensorflow 3 | 4 | #Begin: install dependencies 5 | RUN apt-get update && apt-get install -y --no-install-recommends git 6 | #End: install dependencies 7 | 8 | #Begin: install golang 9 | ENV GOLANG_VERSION 1.10.3 10 | ENV GOLANG_DOWNLOAD_URL https://golang.org/dl/go$GOLANG_VERSION.linux-amd64.tar.gz 11 | 12 | ENV GOPATH /go 13 | ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH 14 | RUN curl -fsSL "$GOLANG_DOWNLOAD_URL" -o golang.tar.gz && \ 15 | tar -C /usr/local -xzf golang.tar.gz && \ 16 | rm golang.tar.gz && \ 17 | mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 777 "$GOPATH" 18 | WORKDIR "/go" 19 | #End: install golang 20 | 21 | #Begin: install tensorflow 22 | ENV TF_TYPE "cpu" 23 | ENV TARGET_DIRECTORY /usr/local 24 | RUN curl -L \ 25 | "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.10.0.tar.gz" | tar -C $TARGET_DIRECTORY -xz 26 | RUN ldconfig 27 | ENV LD_LIBRARY_PATH $TARGET_DIRECTORY/lib 28 | ENV LIBRARY_PATH $TARGET_DIRECTORY/lib 29 | RUN go get github.com/tensorflow/tensorflow/tensorflow/go -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/exercise2/template2.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pandas as pd 4 | import torch 5 | from torch.autograd import Variable 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from sklearn.preprocessing import MinMaxScaler 9 | torch.manual_seed(1234) 10 | 11 | # command line arguments 12 | 13 | # read in the input features 14 | cols = ['f1', 'f2', 'f3', 'f4'] 15 | infer_data = pd.read_csv(args.infile, names=cols) 16 | infer_data = MinMaxScaler().fit_transform(infer_data) 17 | 18 | # model parameters 19 | input_size = 4 20 | num_classes = 3 21 | hidden_size = 5 22 | 23 | # define model 24 | class Net(nn.Module): 25 | def __init__(self, input_size, hidden_size, num_classes): 26 | super(Net, self).__init__() 27 | self.fc1 = nn.Linear(input_size, hidden_size) 28 | self.relu = nn.ReLU() 29 | self.fc2 = nn.Linear(hidden_size, num_classes) 30 | 31 | def forward(self, x): 32 | out = self.fc1(x) 33 | out = self.relu(out) 34 | out = self.fc2(out) 35 | return out 36 | 37 | net = Net(input_size, hidden_size, num_classes) 38 | 39 | # Load the persisted model parameters 40 | 41 | # Perform the inference 42 | X = Variable(torch.from_numpy(infer_data).float()) 43 | out = net(X) 44 | _, labels = torch.max(out.data, 1) 45 | 46 | species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] 47 | predictions = [] 48 | for label in labels: 49 | predictions.append(species[label]) 50 | 51 | # save the inferences 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Alt text](ai-banner.png) 2 | 3 | # ML/AI and Data Science Training Materials 4 | 5 | *Note: This material has been designed to be taught in a classroom/video environment. The code is well commented but missing some of the contextual concepts and ideas that will be covered in class.* 6 | 7 | ## Python-Based AI Workflows - From Notebook to Production Scale 8 | 9 | This material is for intermediate-level data scientists, developers, data engineers, or researchers. Specifically, this material is for those who have some experience developing ML/AI models on sample data sets (maybe in Jupyter), but who might struggle to scale, deploy, and productionize their work. They need to understand which Python tools to use as they scale our workflows beyond the notebook, and they need to understand how to manage and distribute work on large data. 10 | 11 | [Python-based AI Workflows - From Notebook to Production Scale](notebook-to-production) 12 | 13 | ## Machine Learning with Go 14 | 15 | This is material for any Go developer, data scientist, analyst, or statistician who wishes to learn how to build robust machine learning applications in Go. This class provides an intensive, comprehensive and idiomatic view on training, utilizing, evaluating, and deploying machine learning models using Go. We believe this class is perfect for anyone wishing to build data-driven applications that produce valuable insights, have reproducible behavior, and can be deployed within modern architectures. 16 | 17 | [Machine Learning with Go](machine-learning-with-go) 18 | 19 | ___ 20 | All material is licensed under the [Apache License Version 2.0, January 2004](http://www.apache.org/licenses/LICENSE-2.0). 21 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/exercise1/template1.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pandas as pd 4 | import torch 5 | from torch.autograd import Variable 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | torch.manual_seed(1234) 9 | 10 | # command line arguments 11 | 12 | # read in the pre-processed X, y data 13 | cols = ['f1', 'f2', 'f3', 'f4', 'l1', 'l2', 'l3'] 14 | X = pd.read_csv(args.inxfile, names=cols[0:-3]) 15 | y = pd.read_csv(args.inyfile, names=cols[-3:]) 16 | 17 | # model parameters 18 | input_size = 4 19 | num_classes = 3 20 | hidden_size = 5 21 | learning_rate = 0.1 22 | num_epoch = 10000 23 | 24 | # define model 25 | class Net(nn.Module): 26 | def __init__(self, input_size, hidden_size, num_classes): 27 | super(Net, self).__init__() 28 | self.fc1 = nn.Linear(input_size, hidden_size) 29 | self.relu = nn.ReLU() 30 | self.fc2 = nn.Linear(hidden_size, num_classes) 31 | 32 | def forward(self, x): 33 | out = self.fc1(x) 34 | out = self.relu(out) 35 | out = self.fc2(out) 36 | return out 37 | 38 | net = Net(input_size, hidden_size, num_classes) 39 | 40 | # choose optimizer and loss function 41 | criterion = nn.BCEWithLogitsLoss() 42 | optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate) 43 | 44 | # X and y variables 45 | X_tensor = Variable(torch.from_numpy(X.as_matrix()).float()) 46 | Y_tensor = Variable(torch.from_numpy(y.as_matrix()).float()) 47 | 48 | # train the model 49 | for epoch in range(num_epoch): 50 | 51 | #feedforward - backprop 52 | optimizer.zero_grad() 53 | out = net(X_tensor) 54 | loss = criterion(out, Y_tensor) 55 | loss.backward() 56 | optimizer.step() 57 | 58 | # export the model 59 | -------------------------------------------------------------------------------- /etc/doPachLaunch/launch.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # make sure the rest of the node is ready 4 | sleep 2m 5 | 6 | # set kubectl config locations 7 | echo "export KUBECONFIG=/root/.kube/config" >> /root/.bashrc 8 | echo "export KUBECONFIG=/home/pachrat/.kube/config" >> /home/pachrat/.bashrc 9 | 10 | # required for kube network add on 11 | sysctl net.bridge.bridge-nf-call-iptables=1 12 | ip="$(ifconfig | grep -A 1 'eth0' | tail -1 | cut -d ':' -f 2 | cut -d ' ' -f 1)" 13 | 14 | # start kubernetes 15 | kubeadm init --apiserver-advertise-address=$ip 16 | 17 | # connect kubectl 18 | mkdir -p $HOME/.kube 19 | mkdir -p /home/pachrat/.kube 20 | sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config 21 | sudo chown $(id -u):$(id -g) $HOME/.kube/config 22 | sudo cp -i /etc/kubernetes/admin.conf /home/pachrat/.kube/config 23 | sudo chown pachrat /home/pachrat/.kube/config 24 | export KUBECONFIG=/etc/kubernetes/admin.conf 25 | 26 | # install networking 27 | kubectl apply -f "https://cloud.weave.works/k8s/net?k8s-version=$(kubectl version | base64 | tr -d '\n')" 28 | 29 | echo "Waiting for networking to come up" 30 | start_time=$(date +%s) 31 | while true; do 32 | kube_dns_running="$(kubectl get pods --all-namespaces | grep coredns | grep Running)" 33 | if [[ -n "$kube_dns_running" ]]; then 34 | break; 35 | fi 36 | printf "." 37 | sleep 1 38 | runtime=$(($(date +%s)-$start_time)) 39 | if [ $runtime -ge 120 ]; then 40 | (>&2 echo "Timed out waiting for coredns (120s)") 41 | exit 1; 42 | fi 43 | done 44 | 45 | # master isolation 46 | kubectl taint nodes --all node-role.kubernetes.io/master- 47 | 48 | # allow services to act as admin (not great in general, but an easy way 49 | # to make sure pachyderm has access to what it needs in the k8s api) 50 | kubectl create clusterrolebinding serviceaccounts-cluster-admin \ 51 | --clusterrole=cluster-admin \ 52 | --group=system:serviceaccounts 53 | 54 | # deploy pachyderm 55 | pachctl deploy local 56 | 57 | # set password auth 58 | usermod -aG sudo pachrat 59 | sudo sed -i -- 's/PasswordAuthentication no/PasswordAuthentication yes/g' /etc/ssh/sshd_config 60 | sudo service ssh restart 61 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/exercise2/solution2.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pandas as pd 4 | import torch 5 | from torch.autograd import Variable 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from sklearn.preprocessing import MinMaxScaler 9 | torch.manual_seed(1234) 10 | 11 | # command line arguments 12 | parser = argparse.ArgumentParser(description='Inference using a saved PyTorch model.') 13 | parser.add_argument('inmodel', type=str, help='Input file containing the saved model') 14 | parser.add_argument('infile', type=str, help='Input file containing the input features') 15 | parser.add_argument('outdir', type=str, help='Output directory for the inferences') 16 | args = parser.parse_args() 17 | 18 | # read in the input features 19 | cols = ['f1', 'f2', 'f3', 'f4'] 20 | infer_data = pd.read_csv(args.infile, names=cols) 21 | infer_data = MinMaxScaler().fit_transform(infer_data) 22 | 23 | # model parameters 24 | input_size = 4 25 | num_classes = 3 26 | hidden_size = 5 27 | 28 | # define model 29 | class Net(nn.Module): 30 | def __init__(self, input_size, hidden_size, num_classes): 31 | super(Net, self).__init__() 32 | self.fc1 = nn.Linear(input_size, hidden_size) 33 | self.relu = nn.ReLU() 34 | self.fc2 = nn.Linear(hidden_size, num_classes) 35 | 36 | def forward(self, x): 37 | out = self.fc1(x) 38 | out = self.relu(out) 39 | out = self.fc2(out) 40 | return out 41 | 42 | net = Net(input_size, hidden_size, num_classes) 43 | 44 | # Load the persisted model parameters 45 | net.load_state_dict(torch.load(args.inmodel)) 46 | 47 | # Perform the inference 48 | X = Variable(torch.from_numpy(infer_data).float()) 49 | out = net(X) 50 | _, labels = torch.max(out.data, 1) 51 | 52 | species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] 53 | predictions = [] 54 | for label in labels: 55 | predictions.append(species[label]) 56 | 57 | # save the inferences 58 | out_data = pd.DataFrame(predictions, columns=['predictions']) 59 | out_data.to_csv(os.path.join(args.outdir, os.path.basename(args.infile)), index=False, header=False) 60 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/docker/inference/infer.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pandas as pd 4 | import torch 5 | from torch.autograd import Variable 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from sklearn.preprocessing import MinMaxScaler 9 | torch.manual_seed(1234) 10 | 11 | # command line arguments 12 | parser = argparse.ArgumentParser(description='Inference using a saved PyTorch model.') 13 | parser.add_argument('inmodel', type=str, help='Input file containing the saved model') 14 | parser.add_argument('infile', type=str, help='Input file containing the input features') 15 | parser.add_argument('outdir', type=str, help='Output directory for the inferences') 16 | args = parser.parse_args() 17 | 18 | # read in the pre-processed X, y data 19 | cols = ['f1', 'f2', 'f3', 'f4'] 20 | infer_data = pd.read_csv(args.infile, names=cols) 21 | infer_data = MinMaxScaler().fit_transform(infer_data) 22 | 23 | # model parameters 24 | input_size = 4 25 | num_classes = 3 26 | hidden_size = 5 27 | 28 | # define model 29 | class Net(nn.Module): 30 | def __init__(self, input_size, hidden_size, num_classes): 31 | super(Net, self).__init__() 32 | self.fc1 = nn.Linear(input_size, hidden_size) 33 | self.relu = nn.ReLU() 34 | self.fc2 = nn.Linear(hidden_size, num_classes) 35 | 36 | def forward(self, x): 37 | out = self.fc1(x) 38 | out = self.relu(out) 39 | out = self.fc2(out) 40 | return out 41 | 42 | net = Net(input_size, hidden_size, num_classes) 43 | 44 | # Load the persisted model parameters 45 | net.load_state_dict(torch.load(args.inmodel)) 46 | 47 | # Perform the inference 48 | X = Variable(torch.from_numpy(infer_data).float()) 49 | out = net(X) 50 | _, labels = torch.max(out.data, 1) 51 | 52 | species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] 53 | predictions = [] 54 | for label in labels: 55 | predictions.append(species[label]) 56 | 57 | # save the inferences 58 | out_data = pd.DataFrame(predictions, columns=['predictions']) 59 | out_data.to_csv(os.path.join(args.outdir, os.path.basename(args.infile)), index=False, header=False) 60 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/docker/training/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pandas as pd 4 | import torch 5 | from torch.autograd import Variable 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | torch.manual_seed(1234) 9 | 10 | # command line arguments 11 | parser = argparse.ArgumentParser(description='Train a model with PyTorch.') 12 | parser.add_argument('inxfile', type=str, help='Input file containing the x training data') 13 | parser.add_argument('inyfile', type=str, help='Input file containing the y training data') 14 | parser.add_argument('outdir', type=str, help='Output directory for the trained model') 15 | args = parser.parse_args() 16 | 17 | # read in the pre-processed X, y data 18 | cols = ['f1', 'f2', 'f3', 'f4', 'l1', 'l2', 'l3'] 19 | X = pd.read_csv(args.inxfile, names=cols[0:-3]) 20 | y = pd.read_csv(args.inyfile, names=cols[-3:]) 21 | 22 | # model parameters 23 | input_size = 4 24 | num_classes = 3 25 | hidden_size = 5 26 | learning_rate = 0.1 27 | num_epoch = 10000 28 | 29 | # define model 30 | class Net(nn.Module): 31 | def __init__(self, input_size, hidden_size, num_classes): 32 | super(Net, self).__init__() 33 | self.fc1 = nn.Linear(input_size, hidden_size) 34 | self.relu = nn.ReLU() 35 | self.fc2 = nn.Linear(hidden_size, num_classes) 36 | 37 | def forward(self, x): 38 | out = self.fc1(x) 39 | out = self.relu(out) 40 | out = self.fc2(out) 41 | return out 42 | 43 | net = Net(input_size, hidden_size, num_classes) 44 | 45 | # choose optimizer and loss function 46 | criterion = nn.BCEWithLogitsLoss() 47 | optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate) 48 | 49 | # X and y variables 50 | X_tensor = Variable(torch.from_numpy(X.as_matrix()).float()) 51 | Y_tensor = Variable(torch.from_numpy(y.as_matrix()).float()) 52 | 53 | # train the model 54 | for epoch in range(num_epoch): 55 | 56 | #feedforward - backprop 57 | optimizer.zero_grad() 58 | out = net(X_tensor) 59 | loss = criterion(out, Y_tensor) 60 | loss.backward() 61 | optimizer.step() 62 | 63 | # export the model 64 | torch.save(net.state_dict(), os.path.join(args.outdir, 'model.pt')) 65 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/exercise1/solution1.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pandas as pd 4 | import torch 5 | from torch.autograd import Variable 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | torch.manual_seed(1234) 9 | 10 | # command line arguments 11 | parser = argparse.ArgumentParser(description='Train a model with PyTorch.') 12 | parser.add_argument('inxfile', type=str, help='Input file containing the x training data') 13 | parser.add_argument('inyfile', type=str, help='Input file containing the y training data') 14 | parser.add_argument('outdir', type=str, help='Output directory for the trained model') 15 | args = parser.parse_args() 16 | 17 | # read in the pre-processed X, y data 18 | cols = ['f1', 'f2', 'f3', 'f4', 'l1', 'l2', 'l3'] 19 | X = pd.read_csv(args.inxfile, names=cols[0:-3]) 20 | y = pd.read_csv(args.inyfile, names=cols[-3:]) 21 | 22 | # model parameters 23 | input_size = 4 24 | num_classes = 3 25 | hidden_size = 5 26 | learning_rate = 0.1 27 | num_epoch = 10000 28 | 29 | # define model 30 | class Net(nn.Module): 31 | def __init__(self, input_size, hidden_size, num_classes): 32 | super(Net, self).__init__() 33 | self.fc1 = nn.Linear(input_size, hidden_size) 34 | self.relu = nn.ReLU() 35 | self.fc2 = nn.Linear(hidden_size, num_classes) 36 | 37 | def forward(self, x): 38 | out = self.fc1(x) 39 | out = self.relu(out) 40 | out = self.fc2(out) 41 | return out 42 | 43 | net = Net(input_size, hidden_size, num_classes) 44 | 45 | # choose optimizer and loss function 46 | criterion = nn.BCEWithLogitsLoss() 47 | optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate) 48 | 49 | # X and y variables 50 | X_tensor = Variable(torch.from_numpy(X.as_matrix()).float()) 51 | Y_tensor = Variable(torch.from_numpy(y.as_matrix()).float()) 52 | 53 | # train the model 54 | for epoch in range(num_epoch): 55 | 56 | #feedforward - backprop 57 | optimizer.zero_grad() 58 | out = net(X_tensor) 59 | loss = criterion(out, Y_tensor) 60 | loss.backward() 61 | optimizer.step() 62 | 63 | # export the model 64 | torch.save(net.state_dict(), os.path.join(args.outdir, 'model.pt')) 65 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/templates/template1/template1.go: -------------------------------------------------------------------------------- 1 | // All material is licensed under the Apache License Version 2.0, January 2004 2 | // http://www.apache.org/licenses/LICENSE-2.0 3 | 4 | // go build 5 | // ./solution1 6 | 7 | // Sample program to train a regression model with multiple independent variables. 8 | package main 9 | 10 | import ( 11 | "encoding/csv" 12 | "encoding/json" 13 | "flag" 14 | "io/ioutil" 15 | "log" 16 | "os" 17 | "path/filepath" 18 | ) 19 | 20 | // ModelInfo includes the information about the 21 | // model that is output from the training. 22 | type ModelInfo struct { 23 | Intercept float64 `json:"intercept"` 24 | Coefficients []CoefficientInfo `json:"coefficients"` 25 | } 26 | 27 | // CoefficientInfo include information about a 28 | // particular model coefficient. 29 | type CoefficientInfo struct { 30 | Name string `json:"name"` 31 | Coefficient float64 `json:"coefficient"` 32 | } 33 | 34 | func main() { 35 | 36 | // Declare the input and output directory/file flags. 37 | inFilePtr := flag.String("inFile", "", "The file containing the training data.") 38 | outDirPtr := flag.String("outDir", "", "The output directory") 39 | 40 | // Parse the command line flags. 41 | flag.Parse() 42 | 43 | // Open the training dataset file. 44 | f, err := os.Open(*inFilePtr) 45 | if err != nil { 46 | log.Fatal(err) 47 | } 48 | defer f.Close() 49 | 50 | // Create a new CSV reader reading from the opened file. 51 | reader := csv.NewReader(f) 52 | 53 | // Read in all of the CSV records 54 | reader.FieldsPerRecord = 11 55 | trainingData, err := reader.ReadAll() 56 | if err != nil { 57 | log.Fatal(err) 58 | } 59 | 60 | // Create the value(s) needed to train a model using 61 | // github.com/sajari/regression or gonum. 62 | 63 | // Train/fit the regression model similar to how we did it 64 | // in our exploratory notebook. 65 | 66 | // Fill in the model information into a model info struct. 67 | modelInfo := ModelInfo{ 68 | Intercept: r.Coeff(0), 69 | Coefficients: []CoefficientInfo{ 70 | CoefficientInfo{ 71 | Name: "bmi", 72 | // Coefficient: ?, 73 | }, 74 | CoefficientInfo{ 75 | Name: "ltg", 76 | // Coefficient: ?, 77 | }, 78 | }, 79 | } 80 | 81 | // Marshal the model information. 82 | outputData, err := json.MarshalIndent(modelInfo, "", " ") 83 | if err != nil { 84 | log.Fatal(err) 85 | } 86 | 87 | // Save the marshalled output to a file. 88 | if err := ioutil.WriteFile(filepath.Join(*outDirPtr, "model.json"), outputData, 0644); err != nil { 89 | log.Fatal(err) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /notebook-to-production/introduction/example1/example1_data_munging.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from sklearn.preprocessing import MinMaxScaler" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Data import and pre-processing" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "We are going to use pandas and scikit-learn to import and pre-process some data. These are super common tools that you will see everywhere in the data science and machine learning world. We will look at some more advanced tooling later, but it will be good to familiarize yourself with these as a starting point.\n", 25 | "\n", 26 | "- [pandas docs](https://pandas.pydata.org/pandas-docs/stable/)\n", 27 | "- [scikit-learn docs](http://scikit-learn.org/stable/)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Data Import" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "cols = ['f1', 'f2', 'f3', 'f4', 'species']\n", 44 | "data = pd.read_csv('../data/iris.csv', names=cols)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "data.head()" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "## Example Pre-processing" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "X = data[cols[0:-1]]\n", 70 | "X = MinMaxScaler().fit_transform(X)\n", 71 | "y = pd.get_dummies(data['species'])" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "X[0:5]" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "y.head()" 90 | ] 91 | } 92 | ], 93 | "metadata": { 94 | "kernelspec": { 95 | "display_name": "Python 3", 96 | "language": "python", 97 | "name": "python3" 98 | }, 99 | "language_info": { 100 | "codemirror_mode": { 101 | "name": "ipython", 102 | "version": 3 103 | }, 104 | "file_extension": ".py", 105 | "mimetype": "text/x-python", 106 | "name": "python", 107 | "nbconvert_exporter": "python", 108 | "pygments_lexer": "ipython3", 109 | "version": "3.6.4" 110 | } 111 | }, 112 | "nbformat": 4, 113 | "nbformat_minor": 2 114 | } 115 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_with_go/bonus/bonus2/bonus2.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "image" 7 | "image/color" 8 | "log" 9 | "os" 10 | "strconv" 11 | 12 | "gocv.io/x/gocv" 13 | ) 14 | 15 | // readDescriptions reads the descriptions from a file 16 | // and returns a slice of its lines. 17 | func readDescriptions(path string) ([]string, error) { 18 | file, err := os.Open(path) 19 | if err != nil { 20 | return nil, err 21 | } 22 | defer file.Close() 23 | 24 | var lines []string 25 | scanner := bufio.NewScanner(file) 26 | for scanner.Scan() { 27 | lines = append(lines, scanner.Text()) 28 | } 29 | return lines, scanner.Err() 30 | } 31 | 32 | func main() { 33 | if len(os.Args) < 4 { 34 | fmt.Println("How to run:\ntf-classifier [camera ID] [modelfile] [descriptionsfile]") 35 | return 36 | } 37 | 38 | // Parse the command line arguments. 39 | deviceID, err := strconv.Atoi(os.Args[1]) 40 | if err != nil { 41 | log.Fatal(err) 42 | } 43 | 44 | model := os.Args[2] 45 | 46 | descriptions, err := readDescriptions(os.Args[3]) 47 | if err != nil { 48 | log.Fatal(err) 49 | } 50 | 51 | // Open the default capture device, 0. 52 | webcam, err := gocv.VideoCaptureDevice(deviceID) 53 | if err != nil { 54 | fmt.Printf("Error opening video capture device: %v\n", deviceID) 55 | return 56 | } 57 | defer webcam.Close() 58 | 59 | window := gocv.NewWindow("Tensorflow Classifier") 60 | defer window.Close() 61 | 62 | img := gocv.NewMat() 63 | defer img.Close() 64 | 65 | // Read in the TensorFlow model. 66 | net := gocv.ReadNetFromTensorflow(model) 67 | defer net.Close() 68 | 69 | status := "Ready" 70 | statusColor := color.RGBA{0, 255, 0, 0} 71 | fmt.Printf("Start reading camera device: %v\n", deviceID) 72 | 73 | // Begin reading in frames from the camera. 74 | for { 75 | if ok := webcam.Read(&img); !ok { 76 | fmt.Printf("Error cannot read device %d\n", deviceID) 77 | return 78 | } 79 | if img.Empty() { 80 | continue 81 | } 82 | 83 | // Convert the image Mat to 224x244 blob that the classifier can analyze. 84 | blob := gocv.BlobFromImage(img, 1.0, image.Pt(224, 244), gocv.NewScalar(0, 0, 0, 0), true, false) 85 | defer blob.Close() 86 | 87 | // Feed the blob into the classifier. 88 | net.SetInput(blob, "input") 89 | 90 | // Run a forward pass thru the network. 91 | prob := net.Forward("softmax2") 92 | defer prob.Close() 93 | 94 | // Reshape the results into a 1x1000 matrix. 95 | probMat := prob.Reshape(1, 1) 96 | defer probMat.Close() 97 | 98 | // Determine the most probable classification. 99 | _, maxVal, _, maxLoc := gocv.MinMaxLoc(probMat) 100 | 101 | // Display the classification. 102 | desc := "Unknown" 103 | if maxLoc.X < 1000 { 104 | desc = descriptions[maxLoc.X] 105 | } 106 | status = fmt.Sprintf("description: %v, maxVal: %v\n", desc, maxVal) 107 | gocv.PutText(&img, status, image.Pt(10, 20), gocv.FontHersheyPlain, 1.2, statusColor, 2) 108 | 109 | window.IMShow(img) 110 | if window.WaitKey(1) >= 0 { 111 | break 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/templates/template2/template2a/template2a.go: -------------------------------------------------------------------------------- 1 | // All material is licensed under the Apache License Version 2.0, January 2004 2 | // http://www.apache.org/licenses/LICENSE-2.0 3 | 4 | // go build 5 | // ./solution1 6 | 7 | // Sample program to pre-process data for quality control of a regression model. 8 | package main 9 | 10 | import ( 11 | "encoding/csv" 12 | "encoding/json" 13 | "flag" 14 | "io/ioutil" 15 | "log" 16 | "os" 17 | "path/filepath" 18 | "strconv" 19 | ) 20 | 21 | // PredictionData includes the data necessary to make 22 | // a prediction and encodes the output prediction. 23 | type PredictionData struct { 24 | Prediction float64 `json:"predicted_diabetes_progression,omitempty"` 25 | IndependentVars []IndependentVar `json:"independent_variables"` 26 | DependentVar float64 `json:"dependent_variable"` 27 | } 28 | 29 | // IndependentVar include information about and a 30 | // value for an independent variable. 31 | type IndependentVar struct { 32 | Name string `json:"name"` 33 | Value float64 `json:"value"` 34 | } 35 | 36 | func main() { 37 | 38 | // Declare the input and output directory/file flags. 39 | inFilePtr := flag.String("inFile", "", "The file containing the qc data.") 40 | outDirPtr := flag.String("outDir", "", "The output directory") 41 | 42 | // Parse the command line flags. 43 | flag.Parse() 44 | 45 | // Open the training dataset file. 46 | f, err := os.Open(*inFilePtr) 47 | if err != nil { 48 | log.Fatal(err) 49 | } 50 | defer f.Close() 51 | 52 | // Create a new CSV reader reading from the opened file. 53 | reader := csv.NewReader(f) 54 | 55 | // Read in all of the CSV records 56 | reader.FieldsPerRecord = 11 57 | trainingData, err := reader.ReadAll() 58 | if err != nil { 59 | log.Fatal(err) 60 | } 61 | 62 | // Loop of records in the CSV, pre-processing and saving them. 63 | var header []string 64 | for idx, record := range trainingData { 65 | 66 | // Collect the column names from the header. 67 | if idx == 0 { 68 | header = record 69 | continue 70 | } 71 | 72 | // Create a PredictionData value. 73 | var predictionData PredictionData 74 | 75 | // Fill the values in the predictionData value. 76 | var independentVars []IndependentVar 77 | for varID, value := range record { 78 | 79 | // Fill the observed value. 80 | if varID == 10 { 81 | 82 | // Parse the observed value. 83 | 84 | // Add the observed value to predictionData. 85 | } 86 | 87 | // Parse the feature value. 88 | 89 | // Add the independent variable. 90 | 91 | independentVars = append(independentVars, independentVar) 92 | } 93 | 94 | predictionData.IndependentVars = independentVars 95 | 96 | // Save the pre-processed record. 97 | outputData, err := json.MarshalIndent(predictionData, "", " ") 98 | if err != nil { 99 | log.Fatal(err) 100 | } 101 | 102 | // Save the marshalled output to a file. 103 | outputFile := strconv.Itoa(idx) + ".json" 104 | if err := ioutil.WriteFile(filepath.Join(*outDirPtr, outputFile), outputData, 0644); err != nil { 105 | log.Fatal(err) 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_with_go/bonus/README.md: -------------------------------------------------------------------------------- 1 | ## More sophisticated models 2 | 3 | We should all strive for simplicity (which is a mantra of the Go community), but sometimes we do need a model that is more complicated than linear regression or kNN. Go has us covered here. We can interface with major frameworks like TensorFlow, utilize more Go-centric frameworks like Gorgonia, or you can utilize services like MachineBox to manage your ML models. Moreover, in some cases (e.g., streaming analysis) we may want to leverage Go's built in concurrency primitives. 4 | 5 | - The Go bindings for Tensorflow allow for easy inference based on models trained in Python. You can technically train a model directly in Go, but it is not recommended quite yet. These binding are actively being developed. 6 | - Because Go provides built in HTTP support and `cgo`, it is relatively easy to interact with a variety of frameworks that have a REST or C/C++ interface (e.g., H2O or Intel's Deep Learning SDK). 7 | - Gorgonia remains the largest Go-native effort to enable many of the same workflows that are associated with TensorFlow. 8 | - Go provides built in concurrency primitives which allow you to scale streaming analysis. 9 | 10 | ## Links 11 | 12 | [TensorFlow in Go](https://www.tensorflow.org/install/install_go) 13 | [Gorgonia](https://github.com/gorgonia/gorgonia) 14 | [MachineBox (which includes a very nice Go SDK)](https://machinebox.io/) 15 | [GoCV (which we will use here)](https://gocv.io/) 16 | 17 | ## Examples 18 | 19 | [Object detection with TensorFlow](bonus1/bonus1.go) 20 | [Streaming webcam object detection with GoCV](bonus2/bonus2.go) 21 | [Streaming sentiment analysis with MachineBox](bonus3/bonus3.ipynb) 22 | 23 | ## Exercises 24 | 25 | ### Exercise 1 26 | 27 | Recreate the object detection example on your local machine by: 28 | 29 | 1. [Install Tensorflow](https://www.tensorflow.org/install/install_go), or you can use the pre-built docker image 30 | found in dicaormu/tensorflow-go 31 | 32 | ``` 33 | docker run -it -p 8887:8888 -v PATH-TO-YOUR-PROJECT/training-ai/machine-learning-with-go/ml_with_go:/go/src/machine-learning-with-go dicaormu/tensorflow-go 34 | ``` 35 | 36 | 2. Download the Tensorflow model from [here](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz) 37 | 3. Unzip the model files in the dara folder. 38 | 4. Modify the paths in the main to point to your model, labels and image 39 | 5. Run the example 40 | 41 | ### Exercise 2 42 | 43 | Recreate the object detection example on your local machine by: 44 | 45 | 1. [Installing GoCV](https://gocv.io/getting-started/) 46 | 2. Downloading the TensorFlow Inception model from [here](https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip) 47 | 3. Unzip the Inception model files. 48 | 4. Build the bonus1 example with `go build`. 49 | 5. Run the example with `./bonus2 0 ` 50 | 51 | ### Exercise 3 52 | 53 | Try running the streaming analysis of tweets included in the [bonus2](bonus2) notebook with your own Twitter creds: 54 | 55 | 1. From Jupyter, navigate to the `bonus3.ipynb` notebook under `bonus3` 56 | 2. Replace the MachineBox IP and twitter creds with your own 57 | 3. Experiment with different search criteria, different numbers of goroutines, etc. 58 | -------------------------------------------------------------------------------- /notebook-to-production/introduction/example2/example2_model_training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from sklearn.neighbors import KNeighborsClassifier\n", 11 | "from sklearn.preprocessing import MinMaxScaler" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Data import" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "This portion of the notebook is a carry over from the \"example1_\" notebook." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "cols = ['f1', 'f2', 'f3', 'f4', 'species']\n", 35 | "data = pd.read_csv('../data/iris.csv', names=cols)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "data.head()" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "# Model Training" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "Again, as a starting point, we will be utilizing a simple model in scikit-learn. This model is call \"k nearest neighbors.\" We will discuss how it works in class and how it differs from more advanced models that we will use later in the class (i.e., neural networks)." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "neigh = KNeighborsClassifier(n_neighbors=3)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "neigh.fit(data[['f1', 'f2', 'f3', 'f4']], data['species']) " 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "# Inference" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "test_infer = pd.read_csv('../data/test.csv', names=cols[0:-1])" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "labels = neigh.predict(test_infer)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "labels" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "Python 3", 124 | "language": "python", 125 | "name": "python3" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.6.4" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 2 142 | } 143 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution2/solution2a/solution2a.go: -------------------------------------------------------------------------------- 1 | // All material is licensed under the Apache License Version 2.0, January 2004 2 | // http://www.apache.org/licenses/LICENSE-2.0 3 | 4 | // go build 5 | // ./solution1 6 | 7 | // Sample program to pre-process data for quality control of a regression model. 8 | package main 9 | 10 | import ( 11 | "encoding/csv" 12 | "encoding/json" 13 | "flag" 14 | "io/ioutil" 15 | "log" 16 | "os" 17 | "path/filepath" 18 | "strconv" 19 | ) 20 | 21 | // PredictionData includes the data necessary to make 22 | // a prediction and encodes the output prediction. 23 | type PredictionData struct { 24 | Prediction float64 `json:"predicted_diabetes_progression,omitempty"` 25 | IndependentVars []IndependentVar `json:"independent_variables"` 26 | DependentVar float64 `json:"dependent_variable"` 27 | } 28 | 29 | // IndependentVar include information about and a 30 | // value for an independent variable. 31 | type IndependentVar struct { 32 | Name string `json:"name"` 33 | Value float64 `json:"value"` 34 | } 35 | 36 | func main() { 37 | 38 | // Declare the input and output directory/file flags. 39 | inFilePtr := flag.String("inFile", "", "The file containing the qc data.") 40 | outDirPtr := flag.String("outDir", "", "The output directory") 41 | 42 | // Parse the command line flags. 43 | flag.Parse() 44 | 45 | // Open the training dataset file. 46 | f, err := os.Open(*inFilePtr) 47 | if err != nil { 48 | log.Fatal(err) 49 | } 50 | defer f.Close() 51 | 52 | // Create a new CSV reader reading from the opened file. 53 | reader := csv.NewReader(f) 54 | 55 | // Read in all of the CSV records 56 | reader.FieldsPerRecord = 11 57 | trainingData, err := reader.ReadAll() 58 | if err != nil { 59 | log.Fatal(err) 60 | } 61 | 62 | // Loop of records in the CSV, pre-processing and saving them. 63 | var header []string 64 | for idx, record := range trainingData { 65 | 66 | // Collect the column names from the header. 67 | if idx == 0 { 68 | header = record 69 | continue 70 | } 71 | 72 | // Create a PredictionData value. 73 | var predictionData PredictionData 74 | 75 | // Fill the values in the predictionData value. 76 | var independentVars []IndependentVar 77 | for varID, value := range record { 78 | 79 | // Fill the observed value. 80 | if varID == 10 { 81 | 82 | // Parse the observed value. 83 | observation, err := strconv.ParseFloat(value, 64) 84 | if err != nil { 85 | log.Fatal(err) 86 | } 87 | 88 | // Add the observed value to predictionData. 89 | predictionData.DependentVar = observation 90 | continue 91 | } 92 | 93 | // Parse the feature value. 94 | feature, err := strconv.ParseFloat(value, 64) 95 | if err != nil { 96 | log.Fatal(err) 97 | } 98 | 99 | // Add the independent variable. 100 | independentVar := IndependentVar{ 101 | Name: header[varID], 102 | Value: feature, 103 | } 104 | 105 | independentVars = append(independentVars, independentVar) 106 | } 107 | 108 | predictionData.IndependentVars = independentVars 109 | 110 | // Save the pre-processed record. 111 | outputData, err := json.MarshalIndent(predictionData, "", " ") 112 | if err != nil { 113 | log.Fatal(err) 114 | } 115 | 116 | // Save the marshalled output to a file. 117 | outputFile := strconv.Itoa(idx) + ".json" 118 | if err := ioutil.WriteFile(filepath.Join(*outDirPtr, outputFile), outputData, 0644); err != nil { 119 | log.Fatal(err) 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /etc/gophernotesDocker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.7 2 | 3 | # Jovyan user 4 | ENV NB_USER jovyan 5 | ENV NB_UID 1000 6 | RUN adduser -s /bin/bash -u $NB_UID -D $NB_USER 7 | USER jovyan 8 | RUN mkdir /home/$NB_USER/work && \ 9 | mkdir /home/$NB_USER/.jupyter && \ 10 | mkdir /home/$NB_USER/.local 11 | 12 | USER root 13 | 14 | # Install Jupyter and gophernotes. 15 | RUN set -x \ 16 | # install python and dependencies 17 | && apk update \ 18 | && apk --no-cache add \ 19 | ca-certificates \ 20 | python3 \ 21 | su-exec \ 22 | gcc \ 23 | g++ \ 24 | git \ 25 | pkgconfig \ 26 | python3-dev \ 27 | zeromq-dev \ 28 | musl-dev \ 29 | mercurial \ 30 | libtool \ 31 | autoconf \ 32 | automake \ 33 | make \ 34 | && echo pip upgrade && pip3 install --upgrade pip \ 35 | && echo python3.6 && cp /usr/bin/python3.6 /usr/bin/python \ 36 | ## install Go 37 | && echo update chacher && apk --update-cache --allow-untrusted \ 38 | --repository http://dl-4.alpinelinux.org/alpine/edge/community \ 39 | --arch=x86_64 add \ 40 | go \ 41 | ## jupyter notebook 42 | && ln -s /usr/include/locale.h /usr/include/xlocale.h \ 43 | && pip3 install jupyter notebook \ 44 | ## clean 45 | && find /usr/lib/python3.6 -name __pycache__ | xargs rm -r \ 46 | && rm -rf \ 47 | /root/.[acpw]* \ 48 | ipaexg00301* \ 49 | && rm -rf /var/cache/apk/* 50 | 51 | # switch to jovyan user 52 | USER jovyan 53 | 54 | # environment variables 55 | ENV GOPATH /home/jovyan/go 56 | ENV CGO_ENABLED=1 CGO_CPPFLAGS="-I/usr/include" 57 | 58 | # install gophernotes 59 | RUN set -x \ 60 | && GOPATH=~/go \ 61 | && go get -insecure github.com/pebbe/zmq4 \ 62 | && go get github.com/gopherdata/gophernotes \ 63 | && mkdir -p ~/.local/share/jupyter/kernels/gophernotes \ 64 | && cp -r ~/go/src/github.com/gopherdata/gophernotes/kernel/* ~/.local/share/jupyter/kernels/gophernotes 65 | 66 | # move the gophernotes binary 67 | USER root 68 | RUN cp /home/jovyan/go/bin/gophernotes /usr/local/bin/ 69 | USER jovyan 70 | 71 | # get the relevant Go packages 72 | RUN set -x \ 73 | && GOPATH=~/go \ 74 | && go get -insecure gonum.org/v1/plot/... \ 75 | && go get -insecure gonum.org/v1/gonum/... \ 76 | && go get github.com/kniren/gota/... \ 77 | && go get github.com/sajari/regression \ 78 | && go get github.com/sjwhitworth/golearn/... \ 79 | && go get -insecure go-hep.org/x/hep/csvutil/... \ 80 | && go get -insecure go-hep.org/x/hep/fit \ 81 | && go get -insecure go-hep.org/x/hep/hbook \ 82 | && go get github.com/montanaflynn/stats \ 83 | && go get github.com/boltdb/bolt \ 84 | && go get github.com/patrickmn/go-cache \ 85 | && go get github.com/chewxy/math32 \ 86 | && go get gonum.org/v1/gonum/mat \ 87 | && go get github.com/chewxy/hm \ 88 | && go get -u gorgonia.org/vecf64 \ 89 | && go get -u gorgonia.org/vecf32 \ 90 | && go get github.com/awalterschulze/gographviz \ 91 | && go get github.com/leesper/go_rng \ 92 | && go get github.com/pkg/errors \ 93 | && go get github.com/stretchr/testify/assert \ 94 | && go get github.com/kniren/gota/dataframe \ 95 | && go get github.com/skelterjohn/go.matrix\ 96 | && go get github.com/gonum/matrix/mat64\ 97 | && go get github.com/gonum/stat \ 98 | && go get github.com/mash/gokmeans \ 99 | && go get github.com/garyburd/go-oauth/oauth \ 100 | && go get github.com/machinebox/sdk-go/textbox 101 | 102 | EXPOSE 8888 103 | CMD [ "jupyter", "notebook", "--no-browser", "--ip=*", "--NotebookApp.token=''", "--NotebookApp.disable_check_xsrf=True" ] 104 | -------------------------------------------------------------------------------- /notebook-to-production/productionizing/README.md: -------------------------------------------------------------------------------- 1 | # Introduction to productionizing ML/AI 2 | 3 | This material introduces some common pain points and pitfalls that people fall into when trying to productionize data science work. Once your are done with this material, you will understand what the common pain points are and the guiding principles that will help us overcome them. 4 | 5 | Much of this section of the course is meant to be a discussion. That discussion will be centered around an example AI workflow in a Jupyter notebook. This guide will walk you through: 6 | 7 | 1. [Running the example python workflow](#1-running-the-example-python-workflow) 8 | 2. [Discussing how we might productionize the workflow](#2-discussing-how-we-might-productionize-the-workflow) 9 | 3. [Production/deployed ML/AI workflows](#3-productiondeployed-mlai-workflows) 10 | 11 | It also includes a [list of resources](#resources) for those that want to dive in a little bit deeper. 12 | 13 | ## 1. Running the example Python workflow 14 | 15 | While you have Jupyter up and running, click on the `example_sklearn_workflow.ipynb` notebook (which you should see in the `productionizing` directory). This will bring up our example notebook. We will run through and discuss this notebook interactively in class. As a reminder, our example problem for the day will be the [Iris flower classification problem](https://en.wikipedia.org/wiki/Iris_flower_data_set), and we are using scikit-learn as a jumping off point. 16 | 17 | ## 2. Discussing how we might productionize the workflow 18 | 19 | It might not be clear what "productionize" means or how we might try doing that. This will be discussed in class, but before or during our conversation think about: 20 | 21 | - What characteristics should a production AI workflow exhibit? 22 | - What differentiates a production AI workflow from a non-production workflow? 23 | - In what environments do production AI workflows run? 24 | - Are scaling and deployment the same thing? 25 | - How might data change as we move to production? 26 | - How do we want to manage our production AI workflows? 27 | - Do we treat all parts of our workflow the same as we move them to production? 28 | - What happens when our workflows fail at scale? 29 | - How does data ingress/egress happen at scale? 30 | - How will our workflow interact with other pieces of infrastruction or other applications? 31 | - How do we handle our dependencies? 32 | 33 | By the end of this discussion, we should decide on set of guidelines that should drive how we productionize our Python workflow. 34 | 35 | ## 3. Production/deployed ML/AI workflows 36 | 37 | Now that we know the guidelines that we should follow as we work towards a production ML/AI deployment, let's review some common patterns. This will again be focused around diagrams drawn in class and the workshop slides. 38 | 39 | ## Resources 40 | 41 | Technical resources: 42 | 43 | - [Jupyter](http://jupyter.org/) 44 | - [Pandas](https://pandas.pydata.org/) 45 | - [scikit-learn](http://scikit-learn.org/stable/) 46 | - [Introduction to Neural Networks](http://blog.kaggle.com/2017/11/27/introduction-to-neural-networks/) 47 | 48 | Articles: 49 | 50 | - [A Guide to Building a High Functioning Data Science Department](http://multithreaded.stitchfix.com/blog/2016/03/16/engineers-shouldnt-write-etl/) 51 | - [Data Science at the Speed of Hype](http://www.john-foreman.com/blog/surviving-data-science-at-the-speed-of-hype) 52 | - [How we do Data Science at People Pattern](https://www.peoplepattern.com/post.html#!/how-we-do-data-science-at-people-pattern) 53 | - [Doing Data Science at Twitter](https://medium.com/@rchang/my-two-year-journey-as-a-data-scientist-at-twitter-f0c13298aee6) 54 | - [Data Science Bill of Rights](http://www.pachyderm.io/dsbor.html) 55 | 56 | ___ 57 | All material is licensed under the [Apache License Version 2.0, January 2004](http://www.apache.org/licenses/LICENSE-2.0). 58 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution1/solution1.go: -------------------------------------------------------------------------------- 1 | // All material is licensed under the Apache License Version 2.0, January 2004 2 | // http://www.apache.org/licenses/LICENSE-2.0 3 | 4 | // go build 5 | // ./solution1 6 | 7 | // Sample program to train a regression model with multiple independent variables. 8 | package main 9 | 10 | import ( 11 | "encoding/csv" 12 | "encoding/json" 13 | "flag" 14 | "fmt" 15 | "io/ioutil" 16 | "log" 17 | "os" 18 | "path/filepath" 19 | "strconv" 20 | 21 | "github.com/sajari/regression" 22 | ) 23 | 24 | // ModelInfo includes the information about the 25 | // model that is output from the training. 26 | type ModelInfo struct { 27 | Intercept float64 `json:"intercept"` 28 | Coefficients []CoefficientInfo `json:"coefficients"` 29 | } 30 | 31 | // CoefficientInfo include information about a 32 | // particular model coefficient. 33 | type CoefficientInfo struct { 34 | Name string `json:"name"` 35 | Coefficient float64 `json:"coefficient"` 36 | } 37 | 38 | func main() { 39 | 40 | // Declare the input and output directory/file flags. 41 | inFilePtr := flag.String("inFile", "", "The file containing the training data.") 42 | outDirPtr := flag.String("outDir", "", "The output directory") 43 | 44 | // Parse the command line flags. 45 | flag.Parse() 46 | 47 | // Open the training dataset file. 48 | f, err := os.Open(*inFilePtr) 49 | if err != nil { 50 | log.Fatal(err) 51 | } 52 | defer f.Close() 53 | 54 | // Create a new CSV reader reading from the opened file. 55 | reader := csv.NewReader(f) 56 | 57 | // Read in all of the CSV records 58 | reader.FieldsPerRecord = 11 59 | trainingData, err := reader.ReadAll() 60 | if err != nil { 61 | log.Fatal(err) 62 | } 63 | 64 | // In this case we are going to try and model our disease measure 65 | // y by the bmi feature plust an intercept. As such, let's create 66 | // the struct needed to train a model using github.com/sajari/regression. 67 | var r regression.Regression 68 | r.SetObserved("diabetes progression") 69 | r.SetVar(0, "bmi") 70 | r.SetVar(1, "ltg") 71 | 72 | // Loop of records in the CSV, adding the training data to the regression value. 73 | for i, record := range trainingData { 74 | 75 | // Skip the header. 76 | if i == 0 { 77 | continue 78 | } 79 | 80 | // Parse the diabetes progression measure, or "y". 81 | yVal, err := strconv.ParseFloat(record[10], 64) 82 | if err != nil { 83 | log.Fatal(err) 84 | } 85 | 86 | // Parse the bmi value. 87 | bmiVal, err := strconv.ParseFloat(record[2], 64) 88 | if err != nil { 89 | log.Fatal(err) 90 | } 91 | 92 | // Parse the ltg value. 93 | ltgVal, err := strconv.ParseFloat(record[8], 64) 94 | if err != nil { 95 | log.Fatal(err) 96 | } 97 | 98 | // Add these points to the regression value. 99 | r.Train(regression.DataPoint(yVal, []float64{bmiVal, ltgVal})) 100 | } 101 | 102 | // Train/fit the regression model. 103 | r.Run() 104 | 105 | // Output the trained model parameters to stdout. 106 | fmt.Printf("\nRegression Formula:\n%v\n\n", r.Formula) 107 | 108 | // Fill in the model information. 109 | modelInfo := ModelInfo{ 110 | Intercept: r.Coeff(0), 111 | Coefficients: []CoefficientInfo{ 112 | CoefficientInfo{ 113 | Name: "bmi", 114 | Coefficient: r.Coeff(1), 115 | }, 116 | CoefficientInfo{ 117 | Name: "ltg", 118 | Coefficient: r.Coeff(2), 119 | }, 120 | }, 121 | } 122 | 123 | // Marshal the model information. 124 | outputData, err := json.MarshalIndent(modelInfo, "", " ") 125 | if err != nil { 126 | log.Fatal(err) 127 | } 128 | 129 | // Save the marshalled output to a file. 130 | if err := ioutil.WriteFile(filepath.Join(*outDirPtr, "model.json"), outputData, 0644); err != nil { 131 | log.Fatal(err) 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_with_go/solutions/solution2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise - Classification" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Imports" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import (\n", 24 | " \"io/ioutil\"\n", 25 | " \"fmt\"\n", 26 | " \"os\"\n", 27 | " \"math\"\n", 28 | " \"math/rand\"\n", 29 | " \n", 30 | " \"github.com/kniren/gota/dataframe\"\n", 31 | " \"github.com/kniren/gota/series\"\n", 32 | " \"gonum.org/v1/plot\"\n", 33 | " \"gonum.org/v1/plot/plotter\"\n", 34 | " \"gonum.org/v1/plot/plotutil\"\n", 35 | " \"gonum.org/v1/plot/vg\"\n", 36 | " \"gonum.org/v1/gonum/stat\"\n", 37 | " \"gonum.org/v1/gonum/floats\"\n", 38 | " \"github.com/sjwhitworth/golearn/knn\"\n", 39 | " \"github.com/sjwhitworth/golearn/base\"\n", 40 | " \"github.com/sjwhitworth/golearn/evaluation\"\n", 41 | " \"github.com/sjwhitworth/golearn/trees\"\n", 42 | ")" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "## Import the Data" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "// Read in the iris data set into golearn \"instances\".\n", 59 | "irisData, err := base.ParseCSVToInstances(\"../data/iris.csv\", true)\n", 60 | "if err != nil {\n", 61 | " fmt.Println(err)\n", 62 | "}" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "## Defining and evaluating multiple kNN models" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "// The k's we are going to test.\n", 79 | "ks := []int{2, 3, 4, 5, 6, 7, 8, 9, 10}\n", 80 | "\n", 81 | "// Loop over the k values.\n", 82 | "var metrics []string\n", 83 | "for _, k := range ks {\n", 84 | " \n", 85 | " // Define our kNN model.\n", 86 | " knn := knn.NewKnnClassifier(\"euclidean\", \"linear\", k)\n", 87 | " \n", 88 | " // Use cross-fold validation to evaluate the kNN model\n", 89 | " // on 5 folds of the data set.\n", 90 | " cv, err := evaluation.GenerateCrossFoldValidationConfusionMatrices(irisData, knn, 5)\n", 91 | " if err != nil {\n", 92 | " fmt.Println(err)\n", 93 | " }\n", 94 | "\n", 95 | " // Get the mean, variance and standard deviation of the accuracy for the\n", 96 | " // cross validation.\n", 97 | " mean, variance := evaluation.GetCrossValidatedMetric(cv, evaluation.GetAccuracy)\n", 98 | " stdev := math.Sqrt(variance)\n", 99 | "\n", 100 | " // Output the cross metrics to standard out.\n", 101 | " metrics = append(metrics, fmt.Sprintf(\"\\n\\nkNN Accuracy for k=%d:\\n%.2f (+/- %.2f)\", k, mean, stdev*2))\n", 102 | "}\n", 103 | "\n", 104 | "// Output the results.\n", 105 | "for _, metric := range metrics {\n", 106 | " fmt.Println(metric)\n", 107 | "}" 108 | ] 109 | } 110 | ], 111 | "metadata": { 112 | "kernelspec": { 113 | "display_name": "Go", 114 | "language": "go", 115 | "name": "gophernotes" 116 | }, 117 | "language_info": { 118 | "codemirror_mode": "", 119 | "file_extension": ".go", 120 | "mimetype": "", 121 | "name": "go", 122 | "nbconvert_exporter": "", 123 | "pygments_lexer": "", 124 | "version": "go1.10.3" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 2 129 | } 130 | -------------------------------------------------------------------------------- /machine-learning-with-go/README.md: -------------------------------------------------------------------------------- 1 | ![Alt text](https://docs.google.com/drawings/d/e/2PACX-1vT37glyZXd8ViXedt0LCSpzsbWCUSSLhWuR3o5_74tL92fh7zeIo3hVtCzhnpw8IeWAM-KcI419cIkm/pub?w=745&h=310) 2 | 3 | # Machine Learning with Go 4 | 5 | This is material for any Go developer, data scientist, analyst, or statistician who wishes to learn how to build robust machine learning (ML) applications in Go. This class provides an intensive, comprehensive and idiomatic view on training, utilizing, evaluating, and deploying machine learning models using Go. We believe this class is perfect for anyone wishing to build data-driven applications that produce valuable insights, have reproducible behavior, and can be deployed within modern architectures. 6 | 7 | - [Slides from the class](https://docs.google.com/presentation/d/1BMRPCNPptXsLxw40-1c7HG2UEXOK-sBp8OfloJrCv6I/edit?usp=sharing) 8 | - Instructors 9 | - Daniel Whitenack - author of [Machine Learning with Go](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-go), [website/blog](http://www.datadan.io/), [online courses](http://learn.datadan.io/), [twitter](https://twitter.com/dwhitena), [github](https://github.com/dwhitena) 10 | - Diana Ortega - [twitter](https://twitter.com/dicaormu) 11 | - Prerequisties/getting started: 12 | - You will need to ssh into a cloud instance. Remind yourself of how to do that and install a client if needed: 13 | - On a Mac or Linux machine, you should be able to ssh from a terminal (see these [Mac instructions](http://accc.uic.edu/answer/how-do-i-use-ssh-and-sftp-mac-os-x) and [Linux instructions](https://www.digitalocean.com/community/tutorials/how-to-use-ssh-to-connect-to-a-remote-server-in-ubuntu)). 14 | - On a Windows machine, you can either [install and use an ssh client (I recommend PuTTY)](https://www.putty.org/) or [use the WSL](https://docs.microsoft.com/en-us/windows/wsl/install-win10). 15 | - You will also need to work a bit at the command line. If you are new to the command line or need a refresher, look through [this quick tutorial](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything). 16 | - If you need further help productionizing Go ML/AI workflows, want to bring this class to your company, or just have ML/AI related questions, [Ardan Labs](https://www.ardanlabs.com/) is here to help! Reach out to the instructor using the links above or via the [Ardan Labs website](https://www.ardanlabs.com/). 17 | 18 | *Note: This material has been designed to be taught in a classroom environment. The code is well commented but missing some of the contextual concepts and ideas that will be covered in class.* 19 | 20 | ## ML and the ML workflow 21 | 22 | This material introduces the basics of machine learning and the workflow that should be used when developing and utilizing machine learning models. Once you are done with this material, you will be exposed to the most commonly used machine learning techniques and you will understand the significance of data profiling, training, evaluation, validation, and inference. 23 | 24 | [ML and the ML workflow](ml_intro) 25 | 26 | ## ML with Go 27 | 28 | This material introduces some Go packages and frameworks that will help us implement ML in Go. Once you are done with this material, you will know where to look to find ML-related packages for Go, have some hands-on experience working with these packages, and understand the utility of Go for ML. 29 | 30 | [ML with Go](ml_with_go) 31 | 32 | ## Building a complete Go-based ML workflow 33 | 34 | This material walks you through a lab in which you will implement a full ML workflow with Go, from data ingress to training to evaluation to inference. Once you are done with this material, you know how to implement and deploy the stages of the ML workflow in Go (for at least one type of ML model and data), and you will be able to transfer this workflow scaffolding to other problems. 35 | 36 | [Building a complete Go-based ML workflow](ml_workflow) 37 | 38 | ___ 39 | All material is licensed under the [Apache License Version 2.0, January 2004](http://www.apache.org/licenses/LICENSE-2.0). 40 | -------------------------------------------------------------------------------- /notebook-to-production/frameworks_that_scale/example1/example1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from sklearn.preprocessing import MinMaxScaler\n", 11 | "import torch\n", 12 | "from torch.autograd import Variable\n", 13 | "import torch.nn as nn\n", 14 | "import torch.nn.functional as F\n", 15 | "torch.manual_seed(1234)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "# Load and pre-process the training data" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# load the data\n", 32 | "cols = ['f1', 'f2', 'f3', 'f4', 'species']\n", 33 | "data = pd.read_csv('../data/iris.csv', names=cols)\n", 34 | "\n", 35 | "# pre-process the data\n", 36 | "X = data[cols[0:-1]]\n", 37 | "X = MinMaxScaler().fit_transform(X)\n", 38 | "y = pd.get_dummies(data['species'])" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "# Define our network" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# Define our neural net\n", 55 | "class Net(nn.Module):\n", 56 | " def __init__(self, input_size, hidden_size, num_classes):\n", 57 | " super(Net, self).__init__()\n", 58 | " self.fc1 = nn.Linear(input_size, hidden_size) \n", 59 | " self.relu = nn.ReLU()\n", 60 | " self.fc2 = nn.Linear(hidden_size, num_classes) \n", 61 | " \n", 62 | " def forward(self, x):\n", 63 | " out = self.fc1(x)\n", 64 | " out = self.relu(out)\n", 65 | " out = self.fc2(out)\n", 66 | " return out" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "# Train our network" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "input_size = 4\n", 83 | "num_classes = 3\n", 84 | "hidden_size = 5\n", 85 | "learning_rate = 0.1\n", 86 | "num_epoch = 1000\n", 87 | "\n", 88 | "# initialize our network \n", 89 | "net = Net(input_size, hidden_size, num_classes)\n", 90 | "\n", 91 | "#choose optimizer and loss function\n", 92 | "criterion = nn.BCEWithLogitsLoss() \n", 93 | "optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)\n", 94 | "\n", 95 | "# Create variables for our X and y\n", 96 | "X_tensor = Variable(torch.from_numpy(X).float())\n", 97 | "Y_tensor = Variable(torch.from_numpy(y.as_matrix()).float())\n", 98 | "\n", 99 | "#train our network\n", 100 | "for epoch in range(num_epoch):\n", 101 | "\n", 102 | " #feedforward\n", 103 | " optimizer.zero_grad()\n", 104 | " out = net(X_tensor)\n", 105 | " \n", 106 | " # backpropagation\n", 107 | " loss = criterion(out, Y_tensor)\n", 108 | " loss.backward()\n", 109 | " optimizer.step()\n", 110 | " \n", 111 | " if epoch % 100 == 0:\n", 112 | " print ('Epoch [%d/%d], Loss: %.4f' \n", 113 | " %(epoch+1, num_epoch, loss.data[0]))" 114 | ] 115 | } 116 | ], 117 | "metadata": { 118 | "kernelspec": { 119 | "display_name": "Python 3", 120 | "language": "python", 121 | "name": "python3" 122 | }, 123 | "language_info": { 124 | "codemirror_mode": { 125 | "name": "ipython", 126 | "version": 3 127 | }, 128 | "file_extension": ".py", 129 | "mimetype": "text/x-python", 130 | "name": "python", 131 | "nbconvert_exporter": "python", 132 | "pygments_lexer": "ipython3", 133 | "version": "3.6.4" 134 | } 135 | }, 136 | "nbformat": 4, 137 | "nbformat_minor": 2 138 | } 139 | -------------------------------------------------------------------------------- /notebook-to-production/introduction/exercises/template1/template1_data_munging.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from sklearn.preprocessing import MinMaxScaler" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Data import and pre-processing" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Data Import" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "cols = ['f1', 'f2', 'f3', 'f4', 'species']\n", 34 | "data = pd.read_csv('../../data/iris.csv', names=cols)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "data.head()" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Example Pre-processing" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "X = data[cols[0:-1]]\n", 60 | "X = MinMaxScaler().fit_transform(X)\n", 61 | "y = pd.get_dummies(data['species'])" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "X[0:5]" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "y.head()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "# Exercise" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "## Plotting Practice" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "Try plotting some of the iris data (either a histogram or bar plot). Hints:\n", 101 | "\n", 102 | "- You might need to modify your imports\n", 103 | "- You should look up how to make your plot \"inline\" in the Jupyter notebook\n", 104 | "- Pandas has built in plotting functionality" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "# Plotting code goes here..." 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Data Manipulation Practice" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "Try one or all of the following if you have time:\n", 128 | "\n", 129 | "- Use pandas to add all of the feature columns into a new column called \"sum\"\n", 130 | "- Create a new column with the iris species named, but where they are all lower cased and stripped of dashes\n", 131 | "- Join the Y data back to the X data in a single DataFrame" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "# Data manipulation code goes here..." 141 | ] 142 | } 143 | ], 144 | "metadata": { 145 | "kernelspec": { 146 | "display_name": "Python 3", 147 | "language": "python", 148 | "name": "python3" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.6.4" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 2 165 | } 166 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_intro/README.md: -------------------------------------------------------------------------------- 1 | # Introduction and ML/AI workflows 2 | 3 | This material introduces some of the infrastructure and tooling we will be using in the workshop. It also introduces the major types of machine learning along with a development workflow used for model development. Once you are done with this material, you will have a high level understanding of the landscape of machine learning techniques and the process that data scientists employ when developing models. 4 | 5 | You can follow this guide as we work through the material in class. Most of the commands/instructions that will be given in class are repeated here, so you can follow along and/or catch up when needed. Specifically, this guide will walk you through: 6 | 7 | 1. [Connecting to your workshop instance](#1-connecting-to-your-workshop-instance) 8 | 2. [Cloning the workshop material](#2-cloning-the-workshop-material) 9 | 3. [Starting Jupyter](#3-starting-jupyter) 10 | 4. [Intro to ML and the ML workflow](#6-intro-to-ml-and-the-ml-workflow) 11 | 12 | It also includes a [list of resources](#resources) for those that want to dive in a little bit deeper. 13 | 14 | ## 1. Connecting to your workshop instance 15 | 16 | You should have been given an IP for a remote machine at the beginning of the course. The remote machine already has Jupyter, scikit-learn, PyTorch, Docker, etc. installed along with all of the command line tools we will be needing throughout the course. To log into the remote machine on Linux or Mac, open and terminal and run: 17 | 18 | ``` 19 | $ ssh pachrat@ 20 | ``` 21 | 22 | On Windows you can use PuTTY or another ssh client. You will be asked for a password, which you should also be given during the workshop. To verify that everything is running correctly on the machine, you should be able to verify the Docker version installed on this instance by running: 23 | 24 | ``` 25 | $ docker version 26 | ``` 27 | 28 | ## 2. Cloning the workshop material 29 | 30 | Once you are logged into your workshop instance, you will need to retrieve the workshop materials from this git repo. That way, we will all be working off of the same code templates, Dockerfiles, pipeline specifications, and notebooks. To clone the repo, you can run: 31 | 32 | ``` 33 | $ git clone https://github.com/ardanlabs/training-ai.git 34 | ``` 35 | 36 | This will pull down the workshop materials to your instance. To confirm that the materials are there, you can navigate to the `training-ai/machine-learning-with-go` directory and list the contents. You should see something like: 37 | 38 | ``` 39 | $ cd training-ai/machine-learning-with-go/ 40 | $ ls 41 | README.md ml_intro ml_with_go ml_workflow 42 | ``` 43 | 44 | ## 3. Starting Jupyter 45 | 46 | Now, we are going to start our journey in Go ML with a tool that is familiar to many ML/AI devs called [Jupyter](http://jupyter.org/). To make this easy, the organizers have create a docker image with Jupyter and a Go kernel for Jupyter called `gophernotes`. You could run Jupyter locally for this sort of development work (which would be the typical case), but for this workshop just run the following from your workshop instance: 47 | 48 | ``` 49 | $ docker run -it -p 8888:8888 -v /home/pachrat/training-ai/machine-learning-with-go:/notebooks gopherdata/gophernotes:gc2018 50 | ``` 51 | 52 | You will now be able to visit `:8888` in a browser to use Jupyter. When you are ready to stop using Jupyter, you can type `CTRL+c` in the terminal to stop Jupyter. 53 | 54 | ## 4. Intro to ML and the ML workflow 55 | 56 | Before we push on into some data munging and ML model development, let's take a step back and think about machine learning in general and the model development workflow. This will give us a solid baseline and will put the hands-on work we will do in the next section in context. 57 | 58 | We will do this interactively in class via Q&A and via the class slides. 59 | 60 | ## Resources 61 | 62 | Technical resources: 63 | 64 | - [Jupyter](http://jupyter.org/) 65 | - [gophernotes](https://github.com/gopherdata/gophernotes) 66 | 67 | ___ 68 | All material is licensed under the [Apache License Version 2.0, January 2004](http://www.apache.org/licenses/LICENSE-2.0). 69 | -------------------------------------------------------------------------------- /notebook-to-production/introduction/exercises/template2/template2_model_training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from sklearn.neighbors import KNeighborsClassifier\n", 11 | "from sklearn.preprocessing import MinMaxScaler" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Data import and pre-processing" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Data Import" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "cols = ['f1', 'f2', 'f3', 'f4', 'species']\n", 35 | "data = pd.read_csv('../../data/iris.csv', names=cols)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "data.head()" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "# Model Training" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "neigh = KNeighborsClassifier(n_neighbors=3)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "neigh.fit(data[['f1', 'f2', 'f3', 'f4']], data['species'])" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "# Inference" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "test_infer = pd.read_csv('../../data/test.csv', names=cols[0:-1])" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "labels = neigh.predict(test_infer)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "labels" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "# Exercise" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "Try implementing a classification model from scikit-learn other than kNN (maybe a decision tree). Train the model and use it for inference. What's different or the same?" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "# Model training an inference code here..." 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "How might we \"evaluate\" and/or \"validate\" our model? See if you can evaluate the kNN model and/or your new model to determine how they perform. Hint: look up cross validation or train/test splits in the scikit-learn docs." 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "# Model evaluation/validation code goes here..." 143 | ] 144 | } 145 | ], 146 | "metadata": { 147 | "kernelspec": { 148 | "display_name": "Python 3", 149 | "language": "python", 150 | "name": "python3" 151 | }, 152 | "language_info": { 153 | "codemirror_mode": { 154 | "name": "ipython", 155 | "version": 3 156 | }, 157 | "file_extension": ".py", 158 | "mimetype": "text/x-python", 159 | "name": "python", 160 | "nbconvert_exporter": "python", 161 | "pygments_lexer": "ipython3", 162 | "version": "3.6.4" 163 | } 164 | }, 165 | "nbformat": 4, 166 | "nbformat_minor": 2 167 | } 168 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/templates/template3/template3.go: -------------------------------------------------------------------------------- 1 | // All material is licensed under the Apache License Version 2.0, January 2004 2 | // http://www.apache.org/licenses/LICENSE-2.0 3 | 4 | // go build 5 | // ./example2 6 | 7 | // Sample program to predict based on a persisted regression model. 8 | package main 9 | 10 | import ( 11 | "encoding/json" 12 | "flag" 13 | "fmt" 14 | "io/ioutil" 15 | "log" 16 | "os" 17 | "path/filepath" 18 | ) 19 | 20 | // ModelInfo includes the information about the 21 | // model that is output from the training. 22 | type ModelInfo struct { 23 | RSquared float64 `json:"r_squared"` 24 | Intercept float64 `json:"intercept"` 25 | Coefficients []CoefficientInfo `json:"coefficients"` 26 | } 27 | 28 | // CoefficientInfo include information about a 29 | // particular model coefficient. 30 | type CoefficientInfo struct { 31 | Name string `json:"name"` 32 | Coefficient float64 `json:"coefficient"` 33 | } 34 | 35 | // PredictionData includes the data necessary to make 36 | // a prediction and encodes the output prediction. 37 | type PredictionData struct { 38 | Prediction float64 `json:"predicted_diabetes_progression"` 39 | IndependentVars []IndependentVar `json:"independent_variables"` 40 | } 41 | 42 | // IndependentVar include information about and a 43 | // value for an independent variable. 44 | type IndependentVar struct { 45 | Name string `json:"name"` 46 | Value float64 `json:"value"` 47 | } 48 | 49 | func main() { 50 | 51 | // Declare the input and output directory flags. 52 | inModelDirPtr := flag.String("inModelDir", "", "The directory containing the model.") 53 | inVarDirPtr := flag.String("inVarDir", "", "The directory containing the input attributes.") 54 | outDirPtr := flag.String("outDir", "", "The output directory") 55 | 56 | // Parse the command line flags. 57 | flag.Parse() 58 | 59 | // Load the model file. 60 | f, err := ioutil.ReadFile(filepath.Join(*inModelDirPtr, "model.json")) 61 | if err != nil { 62 | log.Fatal(err) 63 | } 64 | 65 | // Unmarshal the model information. 66 | var modelInfo ModelInfo 67 | if err := json.Unmarshal(f, &modelInfo); err != nil { 68 | log.Fatal(err) 69 | } 70 | 71 | // Walk over files in the input. 72 | if err := filepath.Walk(*inVarDirPtr, func(path string, info os.FileInfo, err error) error { 73 | 74 | // Skip any directories. 75 | if info.IsDir() { 76 | return nil 77 | } 78 | 79 | // Open any files. 80 | f, err := ioutil.ReadFile(filepath.Join(*inVarDirPtr, info.Name())) 81 | if err != nil { 82 | return err 83 | } 84 | 85 | // Unmarshal the independent variables. 86 | var predictionData PredictionData 87 | if err := json.Unmarshal(f, &predictionData); err != nil { 88 | return err 89 | } 90 | 91 | // Make the prediction. 92 | 93 | // Marshal the prediction data. 94 | 95 | // Save the marshalled output to a file. 96 | 97 | return nil 98 | }); err != nil { 99 | log.Fatal(err) 100 | } 101 | } 102 | 103 | // Predict makes a prediction based on input JSON. 104 | func Predict(modelInfo *ModelInfo, predictionData *PredictionData) error { 105 | 106 | // Initialize the prediction value 107 | // to the intercept. 108 | prediction := modelInfo.Intercept 109 | 110 | // Create a map of independent variable coefficients. 111 | coeffs := make(map[string]float64) 112 | varNames := make([]string, len(modelInfo.Coefficients)) 113 | for idx, coeff := range modelInfo.Coefficients { 114 | coeffs[coeff.Name] = coeff.Coefficient 115 | varNames[idx] = coeff.Name 116 | } 117 | 118 | // Create a map of the independent variable values. 119 | varVals := make(map[string]float64) 120 | for _, indVar := range predictionData.IndependentVars { 121 | varVals[indVar.Name] = indVar.Value 122 | } 123 | 124 | // Loop over the independent variables. 125 | for _, varName := range varNames { 126 | 127 | // Get the coefficient. 128 | coeff, ok := coeffs[varName] 129 | if !ok { 130 | return fmt.Errorf("Could not find model coefficient %s", varName) 131 | } 132 | 133 | // Get the variable value. 134 | val, ok := varVals[varName] 135 | if !ok { 136 | return fmt.Errorf("Expected a value for variable %s", varName) 137 | } 138 | 139 | // Add to the prediction. 140 | prediction = prediction + coeff*val 141 | } 142 | 143 | // Add the prediction to the prediction data. 144 | predictionData.Prediction = prediction 145 | 146 | return nil 147 | } 148 | -------------------------------------------------------------------------------- /notebook-to-production/productionizing/example_sklearn_workflow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from sklearn.neural_network import MLPClassifier\n", 11 | "from sklearn.preprocessing import MinMaxScaler" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Data import and pre-processing" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Data Import" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "cols = ['f1', 'f2', 'f3', 'f4', 'species']\n", 35 | "data = pd.read_csv('data/iris.csv', names=cols)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "data.head()" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Pre-processing" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "X = data[cols[0:-1]]\n", 61 | "X = MinMaxScaler().fit_transform(X)\n", 62 | "y = pd.get_dummies(data['species'])" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "X[0:5]" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "y.head()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "# Model Training" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "nn = MLPClassifier(solver='sgd', learning_rate='constant', momentum=0,\n", 97 | " learning_rate_init=0.2, verbose=0, random_state=0, max_iter=1000)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "nn.fit(X, y)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "# Inference" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "test_infer = pd.read_csv('data/test.csv', names=cols[0:-1])\n", 123 | "test_infer = MinMaxScaler().fit_transform(test_infer)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "labels = nn.predict(test_infer)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']\n", 142 | "predictions = []\n", 143 | "for label_set in labels:\n", 144 | " idx = 3\n", 145 | " for label_idx, label in enumerate(label_set):\n", 146 | " if label == 1:\n", 147 | " idx = label_idx\n", 148 | " if idx == 3:\n", 149 | " predictions.append('NA')\n", 150 | " else:\n", 151 | " predictions.append(species[idx])" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "predictions" 161 | ] 162 | } 163 | ], 164 | "metadata": { 165 | "kernelspec": { 166 | "display_name": "Python 3", 167 | "language": "python", 168 | "name": "python3" 169 | }, 170 | "language_info": { 171 | "codemirror_mode": { 172 | "name": "ipython", 173 | "version": 3 174 | }, 175 | "file_extension": ".py", 176 | "mimetype": "text/x-python", 177 | "name": "python", 178 | "nbconvert_exporter": "python", 179 | "pygments_lexer": "ipython3", 180 | "version": "3.6.4" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 2 185 | } 186 | -------------------------------------------------------------------------------- /notebook-to-production/README.md: -------------------------------------------------------------------------------- 1 | # Python-based workflows - from notebook to production 2 | 3 | This material is for intermediate-level data scientists, developers, data engineers, or researchers. Specifically, this material is for those who have some interest in or experience with developing ML/AI models on sample data sets (maybe in Jupyter), but who might struggle to understand the full ML/AI workflow and scale, deploy, and productionize their work. They need to understand which Python tools to use as they scale workflows beyond the notebook, and they need to understand how to manage and distribute work on large data. 4 | 5 | - [Slides from the class](https://docs.google.com/presentation/d/1vhINmKo-gIoDU1vVDqpg51auZpPkFV0h_X-ukdyYsFc/edit?usp=sharing) 6 | - Instructor - Daniel Whitenack 7 | - [website/blog](http://www.datadan.io/) 8 | - [twitter](https://twitter.com/dwhitena) 9 | - [github](https://github.com/dwhitena) 10 | - Prerequisties/getting started: 11 | - You will need to ssh into a cloud instance. Remind yourself of how to do that and install a client if needed: 12 | - On a Mac or Linux machine, you should be able to ssh from a terminal (see these [Mac instructions](http://accc.uic.edu/answer/how-do-i-use-ssh-and-sftp-mac-os-x) and [Linux instructions](https://www.digitalocean.com/community/tutorials/how-to-use-ssh-to-connect-to-a-remote-server-in-ubuntu)). 13 | - On a Windows machine, you can either [install and use an ssh client (I recommend PuTTY)](https://www.putty.org/) or [use the WSL](https://docs.microsoft.com/en-us/windows/wsl/install-win10). 14 | - You will also need to work a bit at the command line. If you are new to the command line or need a refresher, look through [this quick tutorial](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything). 15 | - If you need further help productionizing ML/AI workflows, want to bring this class to your company, or just have ML/AI related questions, [Ardan Labs](https://www.ardanlabs.com/) is here to help! Reach out to the instructor using the links above or via the [Ardan Labs website](https://www.ardanlabs.com/). 16 | 17 | *Note: This material has been designed to be taught in a classroom environment. The code is well commented but missing some of the contextual concepts and ideas that will be covered in class.* 18 | 19 | ## Introduction to Python tooling and ML/AI workflows 20 | 21 | This material introduces some of the commonly used Python tooling for data science and ML/AI. It also introduces the ML/AI model development workflow. Once you are done with this material, you will understand what sets of tools are used in producing AI models, and how data scientists often interact with those tools. 22 | 23 | [Introduction to Python tooling and ML/AI workflows](introduction) 24 | 25 | ## Productionizing ML/AI 26 | 27 | This material introduces some pain points and pitfalls that people fall into when trying to productionize data science work. Once you are done with this material, you will understand what the common pain points are and the guiding principles that will help us overcome them. 28 | 29 | [Productioning ML/AI](productionizing) 30 | 31 | ## Using frameworks that scale 32 | 33 | This material introduces some methods and frameworks that will help our workflow scale beyond local sample data. Once you are done with this material, you will be exposed to some of the more scalable Python frameworks in the ecosystem (e.g., PyTorch) and have some experience refactoring modeling code for production. 34 | 35 | [Using frameworks that scale](frameworks_that_scale) 36 | 37 | ## Breaking our workflow up into pipeline stages 38 | 39 | This material walks you through breaking up a workflow, contained in a Jupyter notebook, into separate, scalable pipeline stages. Once you are done with this material, you will understand which portions of a ML/AI pipeline might benefit from being managed in isolation. You will also get some experience writing code for specific stages of a data pipeline (pre-processing, training, inference). 40 | 41 | [Breaking our workflow up into pipeline stages](pipeline_stages) 42 | 43 | ## Deploying, scaling, and managing our pipeline 44 | 45 | This material introduces you to methods for orchestrating a multi-stage AI pipeline at scale. Once you are done with this material, you will understand various methods for deploying multi-stage pipelines along with their trade offs. You will also get hands-on experience deploying a multi-stage AI pipeline on a remote cluster. 46 | 47 | [Deploying, scaling, and managing our pipeline](deploying_managing) 48 | 49 | ___ 50 | All material is licensed under the [Apache License Version 2.0, January 2004](http://www.apache.org/licenses/LICENSE-2.0). 51 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_with_go/README.md: -------------------------------------------------------------------------------- 1 | # ML with Go 2 | 3 | This material introduces some Go packages and frameworks that will help us implement ML in Go. Once you are done with this material, you will know where to look to find ML-related packages for Go, have some hands-on experience working with these packages, and understand the various types of machine learning models. 4 | 5 | This section is meant to be a survey of Go packages and frameworks related to ML along with some hands-on exercises using those packages and frameworks. We will be referring back to the ML workflow introduced in [the last section](../ml_workflow) as necessary. This guide will walk you through: 6 | 7 | 1. [Gathering, Profiling, and Cleaning Data](#1-gathering-profiling-and-cleaning-data) 8 | 2. [Defining, Training, and Testing Models](#2-defining-training-and-testing-models) 9 | 3. [(Bonus) More sophisticated models](#3-bonus-more-sophisticated-models) 10 | 11 | It also includes a [list of resources](#resources) for those that want to dive in a little bit deeper. 12 | 13 | For each session there is a notebook with the number of the session. For example, for section 1, is example1 14 | 15 | 16 | ## 1. Gathering, Profiling, and Cleaning Data 17 | 18 | In this section, we will look at how we can import, parse, manipulate, and profile data with Go. Note, there are innumerable types and formats of data that you might have to deal with in an ML/AI workflow (CSV, JSON, Parquet, Avro, etc.), and we won't cover all of them. Rather, we will highlight a few of the main Go packages that you can utilize for data gathering, profiling, and cleaning. We will look at two different example data sets using Jupyter: (i) an [emoji data set](https://www.kaggle.com/sanjayaw/emosim508) in JSON format, and (ii) a [Game of Thrones data set](https://github.com/chrisalbon/war_of_the_five_kings_dataset) in CSV format. 19 | 20 | For running each cell in the notebook, use ctrl+intro 21 | 22 | **Example**: [example1/example1.ipynb](example1/example1.ipynb) 23 | 24 | ## 2. Defining, Training, and Testing Models 25 | 26 | ### Regression 27 | 28 | To make correct predictions, we need to understand the relationships between variables in our data and model this using statistical methods. One of those methods is called regression. In this example notebook, we are going to create a regression model to predict the weights of baseball players based on their height. 29 | 30 | **Example**: [example2/example2.ipynb](example2/example2.ipynb) 31 | 32 | **Exercise** - Try using `github.com/sajari/regression` to train our regression model instead of `gonum`. *Hint* - look at the examples [here](https://github.com/sajari/regression). When you are ready, you can look at our solution notebook [here](solutions/solution1.ipynb). 33 | 34 | ### Classification 35 | 36 | Sometimes we don't need to predict a continuous value (like weight, stock price, or temperature). We might need to predict whether some observation belong to one or more discrete labels/classes (e.g., fraud or not fraud). In this example notebook, we will create a couple of classification models that predict flower species from physical measurements of flowers. 37 | 38 | **Example**: [example3/example3.ipynb](example3/example3.ipynb). 39 | 40 | **Exercise** - Test out our kNN model for multiple *k* values to determine what *k* value we should use. When you are ready, you can look at our solution notebook [here](solutions/solution2.ipynb). 41 | 42 | ### Clustering 43 | 44 | Both regression and classification are considered "supervised" learning techniques, where we are trying to predict something based on labeled examples of that thing. However, there are also "unsupervised" learning techniques to, for example, detect groupings in your data set when you don't know what groups exist. This is called clustering, and we will look at one clustering algorithm in the following notebook called k-means. 45 | 46 | **Example**: [example4/example4.ipynb](example4/example4.ipynb). 47 | 48 | ## 3. (Bonus) More sophisticated models 49 | 50 | Sometimes we may need a model that is more complicated than linear regression or kNN. Go has us covered here. We can interface with major frameworks like TensorFlow, utilize more Go-centric frameworks like Gorgonia, or utilize services like MachineBox. Moreover, in some cases (e.g., streaming ML analysis) we may want to leverage Go's built in concurrency primitives. 51 | 52 | [This bonus material](bonus), which we may or may not get to cover in the workshop, provides some more info about these methods and gives a couple of examples. 53 | 54 | ## Resources 55 | 56 | - [Machine Learning with Go](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-go) 57 | 58 | ___ 59 | All material is licensed under the [Apache License Version 2.0, January 2004](http://www.apache.org/licenses/LICENSE-2.0). 60 | -------------------------------------------------------------------------------- /notebook-to-production/introduction/exercises/solution2/solution2_model_training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from sklearn.neighbors import KNeighborsClassifier\n", 11 | "from sklearn.model_selection import cross_val_score\n", 12 | "from sklearn import tree" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# Data import and pre-processing" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## Data Import" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "cols = ['f1', 'f2', 'f3', 'f4', 'species']\n", 36 | "data = pd.read_csv('../../data/iris.csv', names=cols)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "data.head()" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "# Model Training" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "neigh = KNeighborsClassifier(n_neighbors=3)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "neigh.fit(data[['f1', 'f2', 'f3', 'f4']], data['species']) " 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "# Inference" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "test_infer = pd.read_csv('../../data/test.csv', names=cols[0:-1])" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "labels = neigh.predict(test_infer)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "labels" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "# Exercise" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "Try implementing a classification model from scikit-learn other than kNN (maybe a decision tree). Train the model and use it for inference. What's different or the same?" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "clf = tree.DecisionTreeClassifier()\n", 128 | "clf.fit(data[['f1', 'f2', 'f3', 'f4']], data['species'])\n", 129 | "labels_new = clf.predict(test_infer)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "labels_new" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "How might we \"evaluate\" and/or \"validate\" our model? See if you can evaluate the kNN model and/or your new model to determine how they perform. Hint: look up cross validation or train/test splits in the scikit-learn docs." 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "scores = cross_val_score(clf, data[['f1', 'f2', 'f3', 'f4']], data['species'], cv=5)\n", 155 | "print(\"Accuracy: %0.2f (+/- %0.2f)\" % (scores.mean(), scores.std() * 2))" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [] 164 | } 165 | ], 166 | "metadata": { 167 | "kernelspec": { 168 | "display_name": "Python 3", 169 | "language": "python", 170 | "name": "python3" 171 | }, 172 | "language_info": { 173 | "codemirror_mode": { 174 | "name": "ipython", 175 | "version": 3 176 | }, 177 | "file_extension": ".py", 178 | "mimetype": "text/x-python", 179 | "name": "python", 180 | "nbconvert_exporter": "python", 181 | "pygments_lexer": "ipython3", 182 | "version": "3.6.4" 183 | } 184 | }, 185 | "nbformat": 4, 186 | "nbformat_minor": 2 187 | } 188 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution3/solution3.go: -------------------------------------------------------------------------------- 1 | // All material is licensed under the Apache License Version 2.0, January 2004 2 | // http://www.apache.org/licenses/LICENSE-2.0 3 | 4 | // go build 5 | // ./example2 6 | 7 | // Sample program to predict based on a persisted regression model. 8 | package main 9 | 10 | import ( 11 | "encoding/json" 12 | "flag" 13 | "fmt" 14 | "io/ioutil" 15 | "log" 16 | "os" 17 | "path/filepath" 18 | ) 19 | 20 | // ModelInfo includes the information about the 21 | // model that is output from the training. 22 | type ModelInfo struct { 23 | RSquared float64 `json:"r_squared"` 24 | Intercept float64 `json:"intercept"` 25 | Coefficients []CoefficientInfo `json:"coefficients"` 26 | } 27 | 28 | // CoefficientInfo include information about a 29 | // particular model coefficient. 30 | type CoefficientInfo struct { 31 | Name string `json:"name"` 32 | Coefficient float64 `json:"coefficient"` 33 | } 34 | 35 | // PredictionData includes the data necessary to make 36 | // a prediction and encodes the output prediction. 37 | type PredictionData struct { 38 | Prediction float64 `json:"predicted_diabetes_progression"` 39 | IndependentVars []IndependentVar `json:"independent_variables"` 40 | } 41 | 42 | // IndependentVar include information about and a 43 | // value for an independent variable. 44 | type IndependentVar struct { 45 | Name string `json:"name"` 46 | Value float64 `json:"value"` 47 | } 48 | 49 | func main() { 50 | 51 | // Declare the input and output directory flags. 52 | inModelDirPtr := flag.String("inModelDir", "", "The directory containing the model.") 53 | inVarDirPtr := flag.String("inVarDir", "", "The directory containing the input attributes.") 54 | outDirPtr := flag.String("outDir", "", "The output directory") 55 | 56 | // Parse the command line flags. 57 | flag.Parse() 58 | 59 | // Load the model file. 60 | f, err := ioutil.ReadFile(filepath.Join(*inModelDirPtr, "model.json")) 61 | if err != nil { 62 | log.Fatal(err) 63 | } 64 | 65 | // Unmarshal the model information. 66 | var modelInfo ModelInfo 67 | if err := json.Unmarshal(f, &modelInfo); err != nil { 68 | log.Fatal(err) 69 | } 70 | 71 | // Walk over files in the input. 72 | if err := filepath.Walk(*inVarDirPtr, func(path string, info os.FileInfo, err error) error { 73 | 74 | // Skip any directories. 75 | if info.IsDir() { 76 | return nil 77 | } 78 | 79 | // Open any files. 80 | f, err := ioutil.ReadFile(filepath.Join(*inVarDirPtr, info.Name())) 81 | if err != nil { 82 | return err 83 | } 84 | 85 | // Unmarshal the independent variables. 86 | var predictionData PredictionData 87 | if err := json.Unmarshal(f, &predictionData); err != nil { 88 | return err 89 | } 90 | 91 | // Make the prediction. 92 | if err := Predict(&modelInfo, &predictionData); err != nil { 93 | return err 94 | } 95 | 96 | // Marshal the prediction data. 97 | outputData, err := json.MarshalIndent(predictionData, "", " ") 98 | if err != nil { 99 | log.Fatal(err) 100 | } 101 | 102 | // Save the marshalled output to a file. 103 | if err := ioutil.WriteFile(filepath.Join(*outDirPtr, info.Name()), outputData, 0644); err != nil { 104 | log.Fatal(err) 105 | } 106 | 107 | return nil 108 | }); err != nil { 109 | log.Fatal(err) 110 | } 111 | } 112 | 113 | // Predict makes a prediction based on input JSON. 114 | func Predict(modelInfo *ModelInfo, predictionData *PredictionData) error { 115 | 116 | // Initialize the prediction value 117 | // to the intercept. 118 | prediction := modelInfo.Intercept 119 | 120 | // Create a map of independent variable coefficients. 121 | coeffs := make(map[string]float64) 122 | varNames := make([]string, len(modelInfo.Coefficients)) 123 | for idx, coeff := range modelInfo.Coefficients { 124 | coeffs[coeff.Name] = coeff.Coefficient 125 | varNames[idx] = coeff.Name 126 | } 127 | 128 | // Create a map of the independent variable values. 129 | varVals := make(map[string]float64) 130 | for _, indVar := range predictionData.IndependentVars { 131 | varVals[indVar.Name] = indVar.Value 132 | } 133 | 134 | // Loop over the independent variables. 135 | for _, varName := range varNames { 136 | 137 | // Get the coefficient. 138 | coeff, ok := coeffs[varName] 139 | if !ok { 140 | return fmt.Errorf("Could not find model coefficient %s", varName) 141 | } 142 | 143 | // Get the variable value. 144 | val, ok := varVals[varName] 145 | if !ok { 146 | return fmt.Errorf("Expected a value for variable %s", varName) 147 | } 148 | 149 | // Add to the prediction. 150 | prediction = prediction + coeff*val 151 | } 152 | 153 | // Add the prediction to the prediction data. 154 | predictionData.Prediction = prediction 155 | 156 | return nil 157 | } 158 | -------------------------------------------------------------------------------- /notebook-to-production/introduction/exercises/solution1/solution1_data_munging.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import pandas as pd\n", 11 | "from sklearn.preprocessing import MinMaxScaler" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Data import and pre-processing" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Data Import" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "cols = ['f1', 'f2', 'f3', 'f4', 'species']\n", 35 | "data = pd.read_csv('../../data/iris.csv', names=cols)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "data.head()" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Example Pre-processing" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "X = data[cols[0:-1]]\n", 61 | "X = MinMaxScaler().fit_transform(X)\n", 62 | "y = pd.get_dummies(data['species'])" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "X[0:5]" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "y.head()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "# Exercise" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "## Plotting Practice" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "Try plotting some of the iris data (either a histogram or bar plot). Hints:\n", 102 | "\n", 103 | "- You might need to modify your imports\n", 104 | "- You should look up how to make your plot \"inline\" in the Jupyter notebook\n", 105 | "- Pandas has built in plotting functionality" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "data.plot(kind='hist')" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Data Manipulation Practice" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "Try one or all of the following if you have time:\n", 129 | "\n", 130 | "- Use pandas to add all of the feature columns into a new column called \"sum\"\n", 131 | "- Create a new column with the iris species named, but where they are all lower cased and stripped of dashes\n", 132 | "- Join the Y data back to the X data in a single DataFrame" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "# add a column called sum\n", 142 | "data['sum'] = data[['f1', 'f2', 'f3', 'f4']].sum(axis=1)\n", 143 | "data.head()" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# Create a new column with a modified version of species\n", 153 | "data['species_mod'] = data['species'].apply(lambda x: x.lower().replace('-',''))\n", 154 | "data.head()" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "data_rejoined = pd.DataFrame(X, columns=['f1', 'f2', 'f3', 'f4']).join(y)\n", 164 | "data_rejoined.head()" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [] 173 | } 174 | ], 175 | "metadata": { 176 | "kernelspec": { 177 | "display_name": "Python 3", 178 | "language": "python", 179 | "name": "python3" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.6.4" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 2 196 | } 197 | -------------------------------------------------------------------------------- /notebook-to-production/introduction/data/iris.csv: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 5.0,3.4,1.5,0.2,Iris-setosa 4 | 4.4,2.9,1.4,0.2,Iris-setosa 5 | 4.9,3.1,1.5,0.1,Iris-setosa 6 | 5.4,3.7,1.5,0.2,Iris-setosa 7 | 4.8,3.4,1.6,0.2,Iris-setosa 8 | 4.8,3.0,1.4,0.1,Iris-setosa 9 | 4.3,3.0,1.1,0.1,Iris-setosa 10 | 5.8,4.0,1.2,0.2,Iris-setosa 11 | 5.7,4.4,1.5,0.4,Iris-setosa 12 | 5.4,3.9,1.3,0.4,Iris-setosa 13 | 5.1,3.5,1.4,0.3,Iris-setosa 14 | 5.7,3.8,1.7,0.3,Iris-setosa 15 | 5.1,3.8,1.5,0.3,Iris-setosa 16 | 5.4,3.4,1.7,0.2,Iris-setosa 17 | 5.1,3.7,1.5,0.4,Iris-setosa 18 | 4.6,3.6,1.0,0.2,Iris-setosa 19 | 5.1,3.3,1.7,0.5,Iris-setosa 20 | 4.8,3.4,1.9,0.2,Iris-setosa 21 | 5.0,3.0,1.6,0.2,Iris-setosa 22 | 5.0,3.4,1.6,0.4,Iris-setosa 23 | 5.2,3.5,1.5,0.2,Iris-setosa 24 | 5.2,3.4,1.4,0.2,Iris-setosa 25 | 4.7,3.2,1.6,0.2,Iris-setosa 26 | 4.8,3.1,1.6,0.2,Iris-setosa 27 | 5.4,3.4,1.5,0.4,Iris-setosa 28 | 5.2,4.1,1.5,0.1,Iris-setosa 29 | 5.5,4.2,1.4,0.2,Iris-setosa 30 | 4.9,3.1,1.5,0.1,Iris-setosa 31 | 5.0,3.2,1.2,0.2,Iris-setosa 32 | 5.5,3.5,1.3,0.2,Iris-setosa 33 | 4.9,3.1,1.5,0.1,Iris-setosa 34 | 4.4,3.0,1.3,0.2,Iris-setosa 35 | 5.1,3.4,1.5,0.2,Iris-setosa 36 | 5.0,3.5,1.3,0.3,Iris-setosa 37 | 4.5,2.3,1.3,0.3,Iris-setosa 38 | 4.4,3.2,1.3,0.2,Iris-setosa 39 | 5.0,3.5,1.6,0.6,Iris-setosa 40 | 5.1,3.8,1.9,0.4,Iris-setosa 41 | 4.8,3.0,1.4,0.3,Iris-setosa 42 | 5.1,3.8,1.6,0.2,Iris-setosa 43 | 4.6,3.2,1.4,0.2,Iris-setosa 44 | 5.3,3.7,1.5,0.2,Iris-setosa 45 | 5.0,3.3,1.4,0.2,Iris-setosa 46 | 7.0,3.2,4.7,1.4,Iris-versicolor 47 | 6.4,3.2,4.5,1.5,Iris-versicolor 48 | 6.9,3.1,4.9,1.5,Iris-versicolor 49 | 5.5,2.3,4.0,1.3,Iris-versicolor 50 | 6.5,2.8,4.6,1.5,Iris-versicolor 51 | 5.7,2.8,4.5,1.3,Iris-versicolor 52 | 6.3,3.3,4.7,1.6,Iris-versicolor 53 | 4.9,2.4,3.3,1.0,Iris-versicolor 54 | 6.6,2.9,4.6,1.3,Iris-versicolor 55 | 5.2,2.7,3.9,1.4,Iris-versicolor 56 | 5.0,2.0,3.5,1.0,Iris-versicolor 57 | 5.9,3.0,4.2,1.5,Iris-versicolor 58 | 6.0,2.2,4.0,1.0,Iris-versicolor 59 | 6.1,2.9,4.7,1.4,Iris-versicolor 60 | 5.6,2.9,3.6,1.3,Iris-versicolor 61 | 6.7,3.1,4.4,1.4,Iris-versicolor 62 | 5.6,3.0,4.5,1.5,Iris-versicolor 63 | 5.8,2.7,4.1,1.0,Iris-versicolor 64 | 6.2,2.2,4.5,1.5,Iris-versicolor 65 | 5.6,2.5,3.9,1.1,Iris-versicolor 66 | 5.9,3.2,4.8,1.8,Iris-versicolor 67 | 6.1,2.8,4.0,1.3,Iris-versicolor 68 | 6.3,2.5,4.9,1.5,Iris-versicolor 69 | 6.1,2.8,4.7,1.2,Iris-versicolor 70 | 6.4,2.9,4.3,1.3,Iris-versicolor 71 | 6.6,3.0,4.4,1.4,Iris-versicolor 72 | 6.8,2.8,4.8,1.4,Iris-versicolor 73 | 6.7,3.0,5.0,1.7,Iris-versicolor 74 | 6.0,2.9,4.5,1.5,Iris-versicolor 75 | 5.7,2.6,3.5,1.0,Iris-versicolor 76 | 5.5,2.4,3.8,1.1,Iris-versicolor 77 | 5.5,2.4,3.7,1.0,Iris-versicolor 78 | 5.8,2.7,3.9,1.2,Iris-versicolor 79 | 6.0,2.7,5.1,1.6,Iris-versicolor 80 | 5.4,3.0,4.5,1.5,Iris-versicolor 81 | 6.0,3.4,4.5,1.6,Iris-versicolor 82 | 6.7,3.1,4.7,1.5,Iris-versicolor 83 | 6.3,2.3,4.4,1.3,Iris-versicolor 84 | 5.6,3.0,4.1,1.3,Iris-versicolor 85 | 5.5,2.5,4.0,1.3,Iris-versicolor 86 | 5.5,2.6,4.4,1.2,Iris-versicolor 87 | 6.1,3.0,4.6,1.4,Iris-versicolor 88 | 5.8,2.6,4.0,1.2,Iris-versicolor 89 | 5.0,2.3,3.3,1.0,Iris-versicolor 90 | 5.6,2.7,4.2,1.3,Iris-versicolor 91 | 5.7,3.0,4.2,1.2,Iris-versicolor 92 | 5.7,2.9,4.2,1.3,Iris-versicolor 93 | 6.2,2.9,4.3,1.3,Iris-versicolor 94 | 5.1,2.5,3.0,1.1,Iris-versicolor 95 | 5.7,2.8,4.1,1.3,Iris-versicolor 96 | 6.3,3.3,6.0,2.5,Iris-virginica 97 | 5.8,2.7,5.1,1.9,Iris-virginica 98 | 7.1,3.0,5.9,2.1,Iris-virginica 99 | 6.3,2.9,5.6,1.8,Iris-virginica 100 | 6.5,3.0,5.8,2.2,Iris-virginica 101 | 7.6,3.0,6.6,2.1,Iris-virginica 102 | 4.9,2.5,4.5,1.7,Iris-virginica 103 | 7.3,2.9,6.3,1.8,Iris-virginica 104 | 6.7,2.5,5.8,1.8,Iris-virginica 105 | 7.2,3.6,6.1,2.5,Iris-virginica 106 | 6.5,3.2,5.1,2.0,Iris-virginica 107 | 6.4,2.7,5.3,1.9,Iris-virginica 108 | 6.8,3.0,5.5,2.1,Iris-virginica 109 | 5.7,2.5,5.0,2.0,Iris-virginica 110 | 5.8,2.8,5.1,2.4,Iris-virginica 111 | 6.4,3.2,5.3,2.3,Iris-virginica 112 | 6.5,3.0,5.5,1.8,Iris-virginica 113 | 7.7,3.8,6.7,2.2,Iris-virginica 114 | 7.7,2.6,6.9,2.3,Iris-virginica 115 | 6.0,2.2,5.0,1.5,Iris-virginica 116 | 6.9,3.2,5.7,2.3,Iris-virginica 117 | 5.6,2.8,4.9,2.0,Iris-virginica 118 | 7.7,2.8,6.7,2.0,Iris-virginica 119 | 6.3,2.7,4.9,1.8,Iris-virginica 120 | 6.7,3.3,5.7,2.1,Iris-virginica 121 | 7.2,3.2,6.0,1.8,Iris-virginica 122 | 6.2,2.8,4.8,1.8,Iris-virginica 123 | 6.1,3.0,4.9,1.8,Iris-virginica 124 | 6.4,2.8,5.6,2.1,Iris-virginica 125 | 7.2,3.0,5.8,1.6,Iris-virginica 126 | 7.4,2.8,6.1,1.9,Iris-virginica 127 | 7.9,3.8,6.4,2.0,Iris-virginica 128 | 6.4,2.8,5.6,2.2,Iris-virginica 129 | 6.3,2.8,5.1,1.5,Iris-virginica 130 | 6.1,2.6,5.6,1.4,Iris-virginica 131 | 7.7,3.0,6.1,2.3,Iris-virginica 132 | 6.3,3.4,5.6,2.4,Iris-virginica 133 | 6.4,3.1,5.5,1.8,Iris-virginica 134 | 6.0,3.0,4.8,1.8,Iris-virginica 135 | 6.9,3.1,5.4,2.1,Iris-virginica 136 | 6.7,3.1,5.6,2.4,Iris-virginica 137 | 6.9,3.1,5.1,2.3,Iris-virginica 138 | 5.8,2.7,5.1,1.9,Iris-virginica 139 | 6.8,3.2,5.9,2.3,Iris-virginica 140 | 6.7,3.3,5.7,2.5,Iris-virginica 141 | 6.7,3.0,5.2,2.3,Iris-virginica 142 | 6.3,2.5,5.0,1.9,Iris-virginica 143 | 6.5,3.0,5.2,2.0,Iris-virginica 144 | 6.2,3.4,5.4,2.3,Iris-virginica 145 | 5.9,3.0,5.1,1.8,Iris-virginica 146 | 147 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/data/iris.csv: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 5.0,3.4,1.5,0.2,Iris-setosa 4 | 4.4,2.9,1.4,0.2,Iris-setosa 5 | 4.9,3.1,1.5,0.1,Iris-setosa 6 | 5.4,3.7,1.5,0.2,Iris-setosa 7 | 4.8,3.4,1.6,0.2,Iris-setosa 8 | 4.8,3.0,1.4,0.1,Iris-setosa 9 | 4.3,3.0,1.1,0.1,Iris-setosa 10 | 5.8,4.0,1.2,0.2,Iris-setosa 11 | 5.7,4.4,1.5,0.4,Iris-setosa 12 | 5.4,3.9,1.3,0.4,Iris-setosa 13 | 5.1,3.5,1.4,0.3,Iris-setosa 14 | 5.7,3.8,1.7,0.3,Iris-setosa 15 | 5.1,3.8,1.5,0.3,Iris-setosa 16 | 5.4,3.4,1.7,0.2,Iris-setosa 17 | 5.1,3.7,1.5,0.4,Iris-setosa 18 | 4.6,3.6,1.0,0.2,Iris-setosa 19 | 5.1,3.3,1.7,0.5,Iris-setosa 20 | 4.8,3.4,1.9,0.2,Iris-setosa 21 | 5.0,3.0,1.6,0.2,Iris-setosa 22 | 5.0,3.4,1.6,0.4,Iris-setosa 23 | 5.2,3.5,1.5,0.2,Iris-setosa 24 | 5.2,3.4,1.4,0.2,Iris-setosa 25 | 4.7,3.2,1.6,0.2,Iris-setosa 26 | 4.8,3.1,1.6,0.2,Iris-setosa 27 | 5.4,3.4,1.5,0.4,Iris-setosa 28 | 5.2,4.1,1.5,0.1,Iris-setosa 29 | 5.5,4.2,1.4,0.2,Iris-setosa 30 | 4.9,3.1,1.5,0.1,Iris-setosa 31 | 5.0,3.2,1.2,0.2,Iris-setosa 32 | 5.5,3.5,1.3,0.2,Iris-setosa 33 | 4.9,3.1,1.5,0.1,Iris-setosa 34 | 4.4,3.0,1.3,0.2,Iris-setosa 35 | 5.1,3.4,1.5,0.2,Iris-setosa 36 | 5.0,3.5,1.3,0.3,Iris-setosa 37 | 4.5,2.3,1.3,0.3,Iris-setosa 38 | 4.4,3.2,1.3,0.2,Iris-setosa 39 | 5.0,3.5,1.6,0.6,Iris-setosa 40 | 5.1,3.8,1.9,0.4,Iris-setosa 41 | 4.8,3.0,1.4,0.3,Iris-setosa 42 | 5.1,3.8,1.6,0.2,Iris-setosa 43 | 4.6,3.2,1.4,0.2,Iris-setosa 44 | 5.3,3.7,1.5,0.2,Iris-setosa 45 | 5.0,3.3,1.4,0.2,Iris-setosa 46 | 7.0,3.2,4.7,1.4,Iris-versicolor 47 | 6.4,3.2,4.5,1.5,Iris-versicolor 48 | 6.9,3.1,4.9,1.5,Iris-versicolor 49 | 5.5,2.3,4.0,1.3,Iris-versicolor 50 | 6.5,2.8,4.6,1.5,Iris-versicolor 51 | 5.7,2.8,4.5,1.3,Iris-versicolor 52 | 6.3,3.3,4.7,1.6,Iris-versicolor 53 | 4.9,2.4,3.3,1.0,Iris-versicolor 54 | 6.6,2.9,4.6,1.3,Iris-versicolor 55 | 5.2,2.7,3.9,1.4,Iris-versicolor 56 | 5.0,2.0,3.5,1.0,Iris-versicolor 57 | 5.9,3.0,4.2,1.5,Iris-versicolor 58 | 6.0,2.2,4.0,1.0,Iris-versicolor 59 | 6.1,2.9,4.7,1.4,Iris-versicolor 60 | 5.6,2.9,3.6,1.3,Iris-versicolor 61 | 6.7,3.1,4.4,1.4,Iris-versicolor 62 | 5.6,3.0,4.5,1.5,Iris-versicolor 63 | 5.8,2.7,4.1,1.0,Iris-versicolor 64 | 6.2,2.2,4.5,1.5,Iris-versicolor 65 | 5.6,2.5,3.9,1.1,Iris-versicolor 66 | 5.9,3.2,4.8,1.8,Iris-versicolor 67 | 6.1,2.8,4.0,1.3,Iris-versicolor 68 | 6.3,2.5,4.9,1.5,Iris-versicolor 69 | 6.1,2.8,4.7,1.2,Iris-versicolor 70 | 6.4,2.9,4.3,1.3,Iris-versicolor 71 | 6.6,3.0,4.4,1.4,Iris-versicolor 72 | 6.8,2.8,4.8,1.4,Iris-versicolor 73 | 6.7,3.0,5.0,1.7,Iris-versicolor 74 | 6.0,2.9,4.5,1.5,Iris-versicolor 75 | 5.7,2.6,3.5,1.0,Iris-versicolor 76 | 5.5,2.4,3.8,1.1,Iris-versicolor 77 | 5.5,2.4,3.7,1.0,Iris-versicolor 78 | 5.8,2.7,3.9,1.2,Iris-versicolor 79 | 6.0,2.7,5.1,1.6,Iris-versicolor 80 | 5.4,3.0,4.5,1.5,Iris-versicolor 81 | 6.0,3.4,4.5,1.6,Iris-versicolor 82 | 6.7,3.1,4.7,1.5,Iris-versicolor 83 | 6.3,2.3,4.4,1.3,Iris-versicolor 84 | 5.6,3.0,4.1,1.3,Iris-versicolor 85 | 5.5,2.5,4.0,1.3,Iris-versicolor 86 | 5.5,2.6,4.4,1.2,Iris-versicolor 87 | 6.1,3.0,4.6,1.4,Iris-versicolor 88 | 5.8,2.6,4.0,1.2,Iris-versicolor 89 | 5.0,2.3,3.3,1.0,Iris-versicolor 90 | 5.6,2.7,4.2,1.3,Iris-versicolor 91 | 5.7,3.0,4.2,1.2,Iris-versicolor 92 | 5.7,2.9,4.2,1.3,Iris-versicolor 93 | 6.2,2.9,4.3,1.3,Iris-versicolor 94 | 5.1,2.5,3.0,1.1,Iris-versicolor 95 | 5.7,2.8,4.1,1.3,Iris-versicolor 96 | 6.3,3.3,6.0,2.5,Iris-virginica 97 | 5.8,2.7,5.1,1.9,Iris-virginica 98 | 7.1,3.0,5.9,2.1,Iris-virginica 99 | 6.3,2.9,5.6,1.8,Iris-virginica 100 | 6.5,3.0,5.8,2.2,Iris-virginica 101 | 7.6,3.0,6.6,2.1,Iris-virginica 102 | 4.9,2.5,4.5,1.7,Iris-virginica 103 | 7.3,2.9,6.3,1.8,Iris-virginica 104 | 6.7,2.5,5.8,1.8,Iris-virginica 105 | 7.2,3.6,6.1,2.5,Iris-virginica 106 | 6.5,3.2,5.1,2.0,Iris-virginica 107 | 6.4,2.7,5.3,1.9,Iris-virginica 108 | 6.8,3.0,5.5,2.1,Iris-virginica 109 | 5.7,2.5,5.0,2.0,Iris-virginica 110 | 5.8,2.8,5.1,2.4,Iris-virginica 111 | 6.4,3.2,5.3,2.3,Iris-virginica 112 | 6.5,3.0,5.5,1.8,Iris-virginica 113 | 7.7,3.8,6.7,2.2,Iris-virginica 114 | 7.7,2.6,6.9,2.3,Iris-virginica 115 | 6.0,2.2,5.0,1.5,Iris-virginica 116 | 6.9,3.2,5.7,2.3,Iris-virginica 117 | 5.6,2.8,4.9,2.0,Iris-virginica 118 | 7.7,2.8,6.7,2.0,Iris-virginica 119 | 6.3,2.7,4.9,1.8,Iris-virginica 120 | 6.7,3.3,5.7,2.1,Iris-virginica 121 | 7.2,3.2,6.0,1.8,Iris-virginica 122 | 6.2,2.8,4.8,1.8,Iris-virginica 123 | 6.1,3.0,4.9,1.8,Iris-virginica 124 | 6.4,2.8,5.6,2.1,Iris-virginica 125 | 7.2,3.0,5.8,1.6,Iris-virginica 126 | 7.4,2.8,6.1,1.9,Iris-virginica 127 | 7.9,3.8,6.4,2.0,Iris-virginica 128 | 6.4,2.8,5.6,2.2,Iris-virginica 129 | 6.3,2.8,5.1,1.5,Iris-virginica 130 | 6.1,2.6,5.6,1.4,Iris-virginica 131 | 7.7,3.0,6.1,2.3,Iris-virginica 132 | 6.3,3.4,5.6,2.4,Iris-virginica 133 | 6.4,3.1,5.5,1.8,Iris-virginica 134 | 6.0,3.0,4.8,1.8,Iris-virginica 135 | 6.9,3.1,5.4,2.1,Iris-virginica 136 | 6.7,3.1,5.6,2.4,Iris-virginica 137 | 6.9,3.1,5.1,2.3,Iris-virginica 138 | 5.8,2.7,5.1,1.9,Iris-virginica 139 | 6.8,3.2,5.9,2.3,Iris-virginica 140 | 6.7,3.3,5.7,2.5,Iris-virginica 141 | 6.7,3.0,5.2,2.3,Iris-virginica 142 | 6.3,2.5,5.0,1.9,Iris-virginica 143 | 6.5,3.0,5.2,2.0,Iris-virginica 144 | 6.2,3.4,5.4,2.3,Iris-virginica 145 | 5.9,3.0,5.1,1.8,Iris-virginica 146 | 147 | -------------------------------------------------------------------------------- /notebook-to-production/productionizing/data/iris.csv: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 5.0,3.4,1.5,0.2,Iris-setosa 4 | 4.4,2.9,1.4,0.2,Iris-setosa 5 | 4.9,3.1,1.5,0.1,Iris-setosa 6 | 5.4,3.7,1.5,0.2,Iris-setosa 7 | 4.8,3.4,1.6,0.2,Iris-setosa 8 | 4.8,3.0,1.4,0.1,Iris-setosa 9 | 4.3,3.0,1.1,0.1,Iris-setosa 10 | 5.8,4.0,1.2,0.2,Iris-setosa 11 | 5.7,4.4,1.5,0.4,Iris-setosa 12 | 5.4,3.9,1.3,0.4,Iris-setosa 13 | 5.1,3.5,1.4,0.3,Iris-setosa 14 | 5.7,3.8,1.7,0.3,Iris-setosa 15 | 5.1,3.8,1.5,0.3,Iris-setosa 16 | 5.4,3.4,1.7,0.2,Iris-setosa 17 | 5.1,3.7,1.5,0.4,Iris-setosa 18 | 4.6,3.6,1.0,0.2,Iris-setosa 19 | 5.1,3.3,1.7,0.5,Iris-setosa 20 | 4.8,3.4,1.9,0.2,Iris-setosa 21 | 5.0,3.0,1.6,0.2,Iris-setosa 22 | 5.0,3.4,1.6,0.4,Iris-setosa 23 | 5.2,3.5,1.5,0.2,Iris-setosa 24 | 5.2,3.4,1.4,0.2,Iris-setosa 25 | 4.7,3.2,1.6,0.2,Iris-setosa 26 | 4.8,3.1,1.6,0.2,Iris-setosa 27 | 5.4,3.4,1.5,0.4,Iris-setosa 28 | 5.2,4.1,1.5,0.1,Iris-setosa 29 | 5.5,4.2,1.4,0.2,Iris-setosa 30 | 4.9,3.1,1.5,0.1,Iris-setosa 31 | 5.0,3.2,1.2,0.2,Iris-setosa 32 | 5.5,3.5,1.3,0.2,Iris-setosa 33 | 4.9,3.1,1.5,0.1,Iris-setosa 34 | 4.4,3.0,1.3,0.2,Iris-setosa 35 | 5.1,3.4,1.5,0.2,Iris-setosa 36 | 5.0,3.5,1.3,0.3,Iris-setosa 37 | 4.5,2.3,1.3,0.3,Iris-setosa 38 | 4.4,3.2,1.3,0.2,Iris-setosa 39 | 5.0,3.5,1.6,0.6,Iris-setosa 40 | 5.1,3.8,1.9,0.4,Iris-setosa 41 | 4.8,3.0,1.4,0.3,Iris-setosa 42 | 5.1,3.8,1.6,0.2,Iris-setosa 43 | 4.6,3.2,1.4,0.2,Iris-setosa 44 | 5.3,3.7,1.5,0.2,Iris-setosa 45 | 5.0,3.3,1.4,0.2,Iris-setosa 46 | 7.0,3.2,4.7,1.4,Iris-versicolor 47 | 6.4,3.2,4.5,1.5,Iris-versicolor 48 | 6.9,3.1,4.9,1.5,Iris-versicolor 49 | 5.5,2.3,4.0,1.3,Iris-versicolor 50 | 6.5,2.8,4.6,1.5,Iris-versicolor 51 | 5.7,2.8,4.5,1.3,Iris-versicolor 52 | 6.3,3.3,4.7,1.6,Iris-versicolor 53 | 4.9,2.4,3.3,1.0,Iris-versicolor 54 | 6.6,2.9,4.6,1.3,Iris-versicolor 55 | 5.2,2.7,3.9,1.4,Iris-versicolor 56 | 5.0,2.0,3.5,1.0,Iris-versicolor 57 | 5.9,3.0,4.2,1.5,Iris-versicolor 58 | 6.0,2.2,4.0,1.0,Iris-versicolor 59 | 6.1,2.9,4.7,1.4,Iris-versicolor 60 | 5.6,2.9,3.6,1.3,Iris-versicolor 61 | 6.7,3.1,4.4,1.4,Iris-versicolor 62 | 5.6,3.0,4.5,1.5,Iris-versicolor 63 | 5.8,2.7,4.1,1.0,Iris-versicolor 64 | 6.2,2.2,4.5,1.5,Iris-versicolor 65 | 5.6,2.5,3.9,1.1,Iris-versicolor 66 | 5.9,3.2,4.8,1.8,Iris-versicolor 67 | 6.1,2.8,4.0,1.3,Iris-versicolor 68 | 6.3,2.5,4.9,1.5,Iris-versicolor 69 | 6.1,2.8,4.7,1.2,Iris-versicolor 70 | 6.4,2.9,4.3,1.3,Iris-versicolor 71 | 6.6,3.0,4.4,1.4,Iris-versicolor 72 | 6.8,2.8,4.8,1.4,Iris-versicolor 73 | 6.7,3.0,5.0,1.7,Iris-versicolor 74 | 6.0,2.9,4.5,1.5,Iris-versicolor 75 | 5.7,2.6,3.5,1.0,Iris-versicolor 76 | 5.5,2.4,3.8,1.1,Iris-versicolor 77 | 5.5,2.4,3.7,1.0,Iris-versicolor 78 | 5.8,2.7,3.9,1.2,Iris-versicolor 79 | 6.0,2.7,5.1,1.6,Iris-versicolor 80 | 5.4,3.0,4.5,1.5,Iris-versicolor 81 | 6.0,3.4,4.5,1.6,Iris-versicolor 82 | 6.7,3.1,4.7,1.5,Iris-versicolor 83 | 6.3,2.3,4.4,1.3,Iris-versicolor 84 | 5.6,3.0,4.1,1.3,Iris-versicolor 85 | 5.5,2.5,4.0,1.3,Iris-versicolor 86 | 5.5,2.6,4.4,1.2,Iris-versicolor 87 | 6.1,3.0,4.6,1.4,Iris-versicolor 88 | 5.8,2.6,4.0,1.2,Iris-versicolor 89 | 5.0,2.3,3.3,1.0,Iris-versicolor 90 | 5.6,2.7,4.2,1.3,Iris-versicolor 91 | 5.7,3.0,4.2,1.2,Iris-versicolor 92 | 5.7,2.9,4.2,1.3,Iris-versicolor 93 | 6.2,2.9,4.3,1.3,Iris-versicolor 94 | 5.1,2.5,3.0,1.1,Iris-versicolor 95 | 5.7,2.8,4.1,1.3,Iris-versicolor 96 | 6.3,3.3,6.0,2.5,Iris-virginica 97 | 5.8,2.7,5.1,1.9,Iris-virginica 98 | 7.1,3.0,5.9,2.1,Iris-virginica 99 | 6.3,2.9,5.6,1.8,Iris-virginica 100 | 6.5,3.0,5.8,2.2,Iris-virginica 101 | 7.6,3.0,6.6,2.1,Iris-virginica 102 | 4.9,2.5,4.5,1.7,Iris-virginica 103 | 7.3,2.9,6.3,1.8,Iris-virginica 104 | 6.7,2.5,5.8,1.8,Iris-virginica 105 | 7.2,3.6,6.1,2.5,Iris-virginica 106 | 6.5,3.2,5.1,2.0,Iris-virginica 107 | 6.4,2.7,5.3,1.9,Iris-virginica 108 | 6.8,3.0,5.5,2.1,Iris-virginica 109 | 5.7,2.5,5.0,2.0,Iris-virginica 110 | 5.8,2.8,5.1,2.4,Iris-virginica 111 | 6.4,3.2,5.3,2.3,Iris-virginica 112 | 6.5,3.0,5.5,1.8,Iris-virginica 113 | 7.7,3.8,6.7,2.2,Iris-virginica 114 | 7.7,2.6,6.9,2.3,Iris-virginica 115 | 6.0,2.2,5.0,1.5,Iris-virginica 116 | 6.9,3.2,5.7,2.3,Iris-virginica 117 | 5.6,2.8,4.9,2.0,Iris-virginica 118 | 7.7,2.8,6.7,2.0,Iris-virginica 119 | 6.3,2.7,4.9,1.8,Iris-virginica 120 | 6.7,3.3,5.7,2.1,Iris-virginica 121 | 7.2,3.2,6.0,1.8,Iris-virginica 122 | 6.2,2.8,4.8,1.8,Iris-virginica 123 | 6.1,3.0,4.9,1.8,Iris-virginica 124 | 6.4,2.8,5.6,2.1,Iris-virginica 125 | 7.2,3.0,5.8,1.6,Iris-virginica 126 | 7.4,2.8,6.1,1.9,Iris-virginica 127 | 7.9,3.8,6.4,2.0,Iris-virginica 128 | 6.4,2.8,5.6,2.2,Iris-virginica 129 | 6.3,2.8,5.1,1.5,Iris-virginica 130 | 6.1,2.6,5.6,1.4,Iris-virginica 131 | 7.7,3.0,6.1,2.3,Iris-virginica 132 | 6.3,3.4,5.6,2.4,Iris-virginica 133 | 6.4,3.1,5.5,1.8,Iris-virginica 134 | 6.0,3.0,4.8,1.8,Iris-virginica 135 | 6.9,3.1,5.4,2.1,Iris-virginica 136 | 6.7,3.1,5.6,2.4,Iris-virginica 137 | 6.9,3.1,5.1,2.3,Iris-virginica 138 | 5.8,2.7,5.1,1.9,Iris-virginica 139 | 6.8,3.2,5.9,2.3,Iris-virginica 140 | 6.7,3.3,5.7,2.5,Iris-virginica 141 | 6.7,3.0,5.2,2.3,Iris-virginica 142 | 6.3,2.5,5.0,1.9,Iris-virginica 143 | 6.5,3.0,5.2,2.0,Iris-virginica 144 | 6.2,3.4,5.4,2.3,Iris-virginica 145 | 5.9,3.0,5.1,1.8,Iris-virginica 146 | 147 | -------------------------------------------------------------------------------- /notebook-to-production/deploying_managing/data/iris.csv: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 5.0,3.4,1.5,0.2,Iris-setosa 4 | 4.4,2.9,1.4,0.2,Iris-setosa 5 | 4.9,3.1,1.5,0.1,Iris-setosa 6 | 5.4,3.7,1.5,0.2,Iris-setosa 7 | 4.8,3.4,1.6,0.2,Iris-setosa 8 | 4.8,3.0,1.4,0.1,Iris-setosa 9 | 4.3,3.0,1.1,0.1,Iris-setosa 10 | 5.8,4.0,1.2,0.2,Iris-setosa 11 | 5.7,4.4,1.5,0.4,Iris-setosa 12 | 5.4,3.9,1.3,0.4,Iris-setosa 13 | 5.1,3.5,1.4,0.3,Iris-setosa 14 | 5.7,3.8,1.7,0.3,Iris-setosa 15 | 5.1,3.8,1.5,0.3,Iris-setosa 16 | 5.4,3.4,1.7,0.2,Iris-setosa 17 | 5.1,3.7,1.5,0.4,Iris-setosa 18 | 4.6,3.6,1.0,0.2,Iris-setosa 19 | 5.1,3.3,1.7,0.5,Iris-setosa 20 | 4.8,3.4,1.9,0.2,Iris-setosa 21 | 5.0,3.0,1.6,0.2,Iris-setosa 22 | 5.0,3.4,1.6,0.4,Iris-setosa 23 | 5.2,3.5,1.5,0.2,Iris-setosa 24 | 5.2,3.4,1.4,0.2,Iris-setosa 25 | 4.7,3.2,1.6,0.2,Iris-setosa 26 | 4.8,3.1,1.6,0.2,Iris-setosa 27 | 5.4,3.4,1.5,0.4,Iris-setosa 28 | 5.2,4.1,1.5,0.1,Iris-setosa 29 | 5.5,4.2,1.4,0.2,Iris-setosa 30 | 4.9,3.1,1.5,0.1,Iris-setosa 31 | 5.0,3.2,1.2,0.2,Iris-setosa 32 | 5.5,3.5,1.3,0.2,Iris-setosa 33 | 4.9,3.1,1.5,0.1,Iris-setosa 34 | 4.4,3.0,1.3,0.2,Iris-setosa 35 | 5.1,3.4,1.5,0.2,Iris-setosa 36 | 5.0,3.5,1.3,0.3,Iris-setosa 37 | 4.5,2.3,1.3,0.3,Iris-setosa 38 | 4.4,3.2,1.3,0.2,Iris-setosa 39 | 5.0,3.5,1.6,0.6,Iris-setosa 40 | 5.1,3.8,1.9,0.4,Iris-setosa 41 | 4.8,3.0,1.4,0.3,Iris-setosa 42 | 5.1,3.8,1.6,0.2,Iris-setosa 43 | 4.6,3.2,1.4,0.2,Iris-setosa 44 | 5.3,3.7,1.5,0.2,Iris-setosa 45 | 5.0,3.3,1.4,0.2,Iris-setosa 46 | 7.0,3.2,4.7,1.4,Iris-versicolor 47 | 6.4,3.2,4.5,1.5,Iris-versicolor 48 | 6.9,3.1,4.9,1.5,Iris-versicolor 49 | 5.5,2.3,4.0,1.3,Iris-versicolor 50 | 6.5,2.8,4.6,1.5,Iris-versicolor 51 | 5.7,2.8,4.5,1.3,Iris-versicolor 52 | 6.3,3.3,4.7,1.6,Iris-versicolor 53 | 4.9,2.4,3.3,1.0,Iris-versicolor 54 | 6.6,2.9,4.6,1.3,Iris-versicolor 55 | 5.2,2.7,3.9,1.4,Iris-versicolor 56 | 5.0,2.0,3.5,1.0,Iris-versicolor 57 | 5.9,3.0,4.2,1.5,Iris-versicolor 58 | 6.0,2.2,4.0,1.0,Iris-versicolor 59 | 6.1,2.9,4.7,1.4,Iris-versicolor 60 | 5.6,2.9,3.6,1.3,Iris-versicolor 61 | 6.7,3.1,4.4,1.4,Iris-versicolor 62 | 5.6,3.0,4.5,1.5,Iris-versicolor 63 | 5.8,2.7,4.1,1.0,Iris-versicolor 64 | 6.2,2.2,4.5,1.5,Iris-versicolor 65 | 5.6,2.5,3.9,1.1,Iris-versicolor 66 | 5.9,3.2,4.8,1.8,Iris-versicolor 67 | 6.1,2.8,4.0,1.3,Iris-versicolor 68 | 6.3,2.5,4.9,1.5,Iris-versicolor 69 | 6.1,2.8,4.7,1.2,Iris-versicolor 70 | 6.4,2.9,4.3,1.3,Iris-versicolor 71 | 6.6,3.0,4.4,1.4,Iris-versicolor 72 | 6.8,2.8,4.8,1.4,Iris-versicolor 73 | 6.7,3.0,5.0,1.7,Iris-versicolor 74 | 6.0,2.9,4.5,1.5,Iris-versicolor 75 | 5.7,2.6,3.5,1.0,Iris-versicolor 76 | 5.5,2.4,3.8,1.1,Iris-versicolor 77 | 5.5,2.4,3.7,1.0,Iris-versicolor 78 | 5.8,2.7,3.9,1.2,Iris-versicolor 79 | 6.0,2.7,5.1,1.6,Iris-versicolor 80 | 5.4,3.0,4.5,1.5,Iris-versicolor 81 | 6.0,3.4,4.5,1.6,Iris-versicolor 82 | 6.7,3.1,4.7,1.5,Iris-versicolor 83 | 6.3,2.3,4.4,1.3,Iris-versicolor 84 | 5.6,3.0,4.1,1.3,Iris-versicolor 85 | 5.5,2.5,4.0,1.3,Iris-versicolor 86 | 5.5,2.6,4.4,1.2,Iris-versicolor 87 | 6.1,3.0,4.6,1.4,Iris-versicolor 88 | 5.8,2.6,4.0,1.2,Iris-versicolor 89 | 5.0,2.3,3.3,1.0,Iris-versicolor 90 | 5.6,2.7,4.2,1.3,Iris-versicolor 91 | 5.7,3.0,4.2,1.2,Iris-versicolor 92 | 5.7,2.9,4.2,1.3,Iris-versicolor 93 | 6.2,2.9,4.3,1.3,Iris-versicolor 94 | 5.1,2.5,3.0,1.1,Iris-versicolor 95 | 5.7,2.8,4.1,1.3,Iris-versicolor 96 | 6.3,3.3,6.0,2.5,Iris-virginica 97 | 5.8,2.7,5.1,1.9,Iris-virginica 98 | 7.1,3.0,5.9,2.1,Iris-virginica 99 | 6.3,2.9,5.6,1.8,Iris-virginica 100 | 6.5,3.0,5.8,2.2,Iris-virginica 101 | 7.6,3.0,6.6,2.1,Iris-virginica 102 | 4.9,2.5,4.5,1.7,Iris-virginica 103 | 7.3,2.9,6.3,1.8,Iris-virginica 104 | 6.7,2.5,5.8,1.8,Iris-virginica 105 | 7.2,3.6,6.1,2.5,Iris-virginica 106 | 6.5,3.2,5.1,2.0,Iris-virginica 107 | 6.4,2.7,5.3,1.9,Iris-virginica 108 | 6.8,3.0,5.5,2.1,Iris-virginica 109 | 5.7,2.5,5.0,2.0,Iris-virginica 110 | 5.8,2.8,5.1,2.4,Iris-virginica 111 | 6.4,3.2,5.3,2.3,Iris-virginica 112 | 6.5,3.0,5.5,1.8,Iris-virginica 113 | 7.7,3.8,6.7,2.2,Iris-virginica 114 | 7.7,2.6,6.9,2.3,Iris-virginica 115 | 6.0,2.2,5.0,1.5,Iris-virginica 116 | 6.9,3.2,5.7,2.3,Iris-virginica 117 | 5.6,2.8,4.9,2.0,Iris-virginica 118 | 7.7,2.8,6.7,2.0,Iris-virginica 119 | 6.3,2.7,4.9,1.8,Iris-virginica 120 | 6.7,3.3,5.7,2.1,Iris-virginica 121 | 7.2,3.2,6.0,1.8,Iris-virginica 122 | 6.2,2.8,4.8,1.8,Iris-virginica 123 | 6.1,3.0,4.9,1.8,Iris-virginica 124 | 6.4,2.8,5.6,2.1,Iris-virginica 125 | 7.2,3.0,5.8,1.6,Iris-virginica 126 | 7.4,2.8,6.1,1.9,Iris-virginica 127 | 7.9,3.8,6.4,2.0,Iris-virginica 128 | 6.4,2.8,5.6,2.2,Iris-virginica 129 | 6.3,2.8,5.1,1.5,Iris-virginica 130 | 6.1,2.6,5.6,1.4,Iris-virginica 131 | 7.7,3.0,6.1,2.3,Iris-virginica 132 | 6.3,3.4,5.6,2.4,Iris-virginica 133 | 6.4,3.1,5.5,1.8,Iris-virginica 134 | 6.0,3.0,4.8,1.8,Iris-virginica 135 | 6.9,3.1,5.4,2.1,Iris-virginica 136 | 6.7,3.1,5.6,2.4,Iris-virginica 137 | 6.9,3.1,5.1,2.3,Iris-virginica 138 | 5.8,2.7,5.1,1.9,Iris-virginica 139 | 6.8,3.2,5.9,2.3,Iris-virginica 140 | 6.7,3.3,5.7,2.5,Iris-virginica 141 | 6.7,3.0,5.2,2.3,Iris-virginica 142 | 6.3,2.5,5.0,1.9,Iris-virginica 143 | 6.5,3.0,5.2,2.0,Iris-virginica 144 | 6.2,3.4,5.4,2.3,Iris-virginica 145 | 5.9,3.0,5.1,1.8,Iris-virginica 146 | 147 | -------------------------------------------------------------------------------- /notebook-to-production/frameworks_that_scale/data/iris.csv: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 5.0,3.4,1.5,0.2,Iris-setosa 4 | 4.4,2.9,1.4,0.2,Iris-setosa 5 | 4.9,3.1,1.5,0.1,Iris-setosa 6 | 5.4,3.7,1.5,0.2,Iris-setosa 7 | 4.8,3.4,1.6,0.2,Iris-setosa 8 | 4.8,3.0,1.4,0.1,Iris-setosa 9 | 4.3,3.0,1.1,0.1,Iris-setosa 10 | 5.8,4.0,1.2,0.2,Iris-setosa 11 | 5.7,4.4,1.5,0.4,Iris-setosa 12 | 5.4,3.9,1.3,0.4,Iris-setosa 13 | 5.1,3.5,1.4,0.3,Iris-setosa 14 | 5.7,3.8,1.7,0.3,Iris-setosa 15 | 5.1,3.8,1.5,0.3,Iris-setosa 16 | 5.4,3.4,1.7,0.2,Iris-setosa 17 | 5.1,3.7,1.5,0.4,Iris-setosa 18 | 4.6,3.6,1.0,0.2,Iris-setosa 19 | 5.1,3.3,1.7,0.5,Iris-setosa 20 | 4.8,3.4,1.9,0.2,Iris-setosa 21 | 5.0,3.0,1.6,0.2,Iris-setosa 22 | 5.0,3.4,1.6,0.4,Iris-setosa 23 | 5.2,3.5,1.5,0.2,Iris-setosa 24 | 5.2,3.4,1.4,0.2,Iris-setosa 25 | 4.7,3.2,1.6,0.2,Iris-setosa 26 | 4.8,3.1,1.6,0.2,Iris-setosa 27 | 5.4,3.4,1.5,0.4,Iris-setosa 28 | 5.2,4.1,1.5,0.1,Iris-setosa 29 | 5.5,4.2,1.4,0.2,Iris-setosa 30 | 4.9,3.1,1.5,0.1,Iris-setosa 31 | 5.0,3.2,1.2,0.2,Iris-setosa 32 | 5.5,3.5,1.3,0.2,Iris-setosa 33 | 4.9,3.1,1.5,0.1,Iris-setosa 34 | 4.4,3.0,1.3,0.2,Iris-setosa 35 | 5.1,3.4,1.5,0.2,Iris-setosa 36 | 5.0,3.5,1.3,0.3,Iris-setosa 37 | 4.5,2.3,1.3,0.3,Iris-setosa 38 | 4.4,3.2,1.3,0.2,Iris-setosa 39 | 5.0,3.5,1.6,0.6,Iris-setosa 40 | 5.1,3.8,1.9,0.4,Iris-setosa 41 | 4.8,3.0,1.4,0.3,Iris-setosa 42 | 5.1,3.8,1.6,0.2,Iris-setosa 43 | 4.6,3.2,1.4,0.2,Iris-setosa 44 | 5.3,3.7,1.5,0.2,Iris-setosa 45 | 5.0,3.3,1.4,0.2,Iris-setosa 46 | 7.0,3.2,4.7,1.4,Iris-versicolor 47 | 6.4,3.2,4.5,1.5,Iris-versicolor 48 | 6.9,3.1,4.9,1.5,Iris-versicolor 49 | 5.5,2.3,4.0,1.3,Iris-versicolor 50 | 6.5,2.8,4.6,1.5,Iris-versicolor 51 | 5.7,2.8,4.5,1.3,Iris-versicolor 52 | 6.3,3.3,4.7,1.6,Iris-versicolor 53 | 4.9,2.4,3.3,1.0,Iris-versicolor 54 | 6.6,2.9,4.6,1.3,Iris-versicolor 55 | 5.2,2.7,3.9,1.4,Iris-versicolor 56 | 5.0,2.0,3.5,1.0,Iris-versicolor 57 | 5.9,3.0,4.2,1.5,Iris-versicolor 58 | 6.0,2.2,4.0,1.0,Iris-versicolor 59 | 6.1,2.9,4.7,1.4,Iris-versicolor 60 | 5.6,2.9,3.6,1.3,Iris-versicolor 61 | 6.7,3.1,4.4,1.4,Iris-versicolor 62 | 5.6,3.0,4.5,1.5,Iris-versicolor 63 | 5.8,2.7,4.1,1.0,Iris-versicolor 64 | 6.2,2.2,4.5,1.5,Iris-versicolor 65 | 5.6,2.5,3.9,1.1,Iris-versicolor 66 | 5.9,3.2,4.8,1.8,Iris-versicolor 67 | 6.1,2.8,4.0,1.3,Iris-versicolor 68 | 6.3,2.5,4.9,1.5,Iris-versicolor 69 | 6.1,2.8,4.7,1.2,Iris-versicolor 70 | 6.4,2.9,4.3,1.3,Iris-versicolor 71 | 6.6,3.0,4.4,1.4,Iris-versicolor 72 | 6.8,2.8,4.8,1.4,Iris-versicolor 73 | 6.7,3.0,5.0,1.7,Iris-versicolor 74 | 6.0,2.9,4.5,1.5,Iris-versicolor 75 | 5.7,2.6,3.5,1.0,Iris-versicolor 76 | 5.5,2.4,3.8,1.1,Iris-versicolor 77 | 5.5,2.4,3.7,1.0,Iris-versicolor 78 | 5.8,2.7,3.9,1.2,Iris-versicolor 79 | 6.0,2.7,5.1,1.6,Iris-versicolor 80 | 5.4,3.0,4.5,1.5,Iris-versicolor 81 | 6.0,3.4,4.5,1.6,Iris-versicolor 82 | 6.7,3.1,4.7,1.5,Iris-versicolor 83 | 6.3,2.3,4.4,1.3,Iris-versicolor 84 | 5.6,3.0,4.1,1.3,Iris-versicolor 85 | 5.5,2.5,4.0,1.3,Iris-versicolor 86 | 5.5,2.6,4.4,1.2,Iris-versicolor 87 | 6.1,3.0,4.6,1.4,Iris-versicolor 88 | 5.8,2.6,4.0,1.2,Iris-versicolor 89 | 5.0,2.3,3.3,1.0,Iris-versicolor 90 | 5.6,2.7,4.2,1.3,Iris-versicolor 91 | 5.7,3.0,4.2,1.2,Iris-versicolor 92 | 5.7,2.9,4.2,1.3,Iris-versicolor 93 | 6.2,2.9,4.3,1.3,Iris-versicolor 94 | 5.1,2.5,3.0,1.1,Iris-versicolor 95 | 5.7,2.8,4.1,1.3,Iris-versicolor 96 | 6.3,3.3,6.0,2.5,Iris-virginica 97 | 5.8,2.7,5.1,1.9,Iris-virginica 98 | 7.1,3.0,5.9,2.1,Iris-virginica 99 | 6.3,2.9,5.6,1.8,Iris-virginica 100 | 6.5,3.0,5.8,2.2,Iris-virginica 101 | 7.6,3.0,6.6,2.1,Iris-virginica 102 | 4.9,2.5,4.5,1.7,Iris-virginica 103 | 7.3,2.9,6.3,1.8,Iris-virginica 104 | 6.7,2.5,5.8,1.8,Iris-virginica 105 | 7.2,3.6,6.1,2.5,Iris-virginica 106 | 6.5,3.2,5.1,2.0,Iris-virginica 107 | 6.4,2.7,5.3,1.9,Iris-virginica 108 | 6.8,3.0,5.5,2.1,Iris-virginica 109 | 5.7,2.5,5.0,2.0,Iris-virginica 110 | 5.8,2.8,5.1,2.4,Iris-virginica 111 | 6.4,3.2,5.3,2.3,Iris-virginica 112 | 6.5,3.0,5.5,1.8,Iris-virginica 113 | 7.7,3.8,6.7,2.2,Iris-virginica 114 | 7.7,2.6,6.9,2.3,Iris-virginica 115 | 6.0,2.2,5.0,1.5,Iris-virginica 116 | 6.9,3.2,5.7,2.3,Iris-virginica 117 | 5.6,2.8,4.9,2.0,Iris-virginica 118 | 7.7,2.8,6.7,2.0,Iris-virginica 119 | 6.3,2.7,4.9,1.8,Iris-virginica 120 | 6.7,3.3,5.7,2.1,Iris-virginica 121 | 7.2,3.2,6.0,1.8,Iris-virginica 122 | 6.2,2.8,4.8,1.8,Iris-virginica 123 | 6.1,3.0,4.9,1.8,Iris-virginica 124 | 6.4,2.8,5.6,2.1,Iris-virginica 125 | 7.2,3.0,5.8,1.6,Iris-virginica 126 | 7.4,2.8,6.1,1.9,Iris-virginica 127 | 7.9,3.8,6.4,2.0,Iris-virginica 128 | 6.4,2.8,5.6,2.2,Iris-virginica 129 | 6.3,2.8,5.1,1.5,Iris-virginica 130 | 6.1,2.6,5.6,1.4,Iris-virginica 131 | 7.7,3.0,6.1,2.3,Iris-virginica 132 | 6.3,3.4,5.6,2.4,Iris-virginica 133 | 6.4,3.1,5.5,1.8,Iris-virginica 134 | 6.0,3.0,4.8,1.8,Iris-virginica 135 | 6.9,3.1,5.4,2.1,Iris-virginica 136 | 6.7,3.1,5.6,2.4,Iris-virginica 137 | 6.9,3.1,5.1,2.3,Iris-virginica 138 | 5.8,2.7,5.1,1.9,Iris-virginica 139 | 6.8,3.2,5.9,2.3,Iris-virginica 140 | 6.7,3.3,5.7,2.5,Iris-virginica 141 | 6.7,3.0,5.2,2.3,Iris-virginica 142 | 6.3,2.5,5.0,1.9,Iris-virginica 143 | 6.5,3.0,5.2,2.0,Iris-virginica 144 | 6.2,3.4,5.4,2.3,Iris-virginica 145 | 5.9,3.0,5.1,1.8,Iris-virginica 146 | 147 | -------------------------------------------------------------------------------- /notebook-to-production/frameworks_that_scale/exercise1/template1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from sklearn.preprocessing import MinMaxScaler\n", 11 | "import torch\n", 12 | "from torch.autograd import Variable\n", 13 | "import torch.nn as nn\n", 14 | "import torch.nn.functional as F\n", 15 | "torch.manual_seed(1234)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "# Load and pre-process the training data" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# load the data\n", 32 | "cols = ['f1', 'f2', 'f3', 'f4', 'species']\n", 33 | "data = pd.read_csv('../data/iris.csv', names=cols)\n", 34 | "\n", 35 | "# pre-process the data\n", 36 | "X = data[cols[0:-1]]\n", 37 | "X = MinMaxScaler().fit_transform(X)\n", 38 | "y = pd.get_dummies(data['species'])" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "# Define our network" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# Define our neural net\n", 55 | "class Net(nn.Module):\n", 56 | " def __init__(self, input_size, hidden_size, num_classes):\n", 57 | " super(Net, self).__init__()\n", 58 | " self.fc1 = nn.Linear(input_size, hidden_size) \n", 59 | " self.relu = nn.ReLU()\n", 60 | " self.fc2 = nn.Linear(hidden_size, num_classes) \n", 61 | " \n", 62 | " def forward(self, x):\n", 63 | " out = self.fc1(x)\n", 64 | " out = self.relu(out)\n", 65 | " out = self.fc2(out)\n", 66 | " return out" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "# Train our network" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "input_size = 4\n", 83 | "num_classes = 3\n", 84 | "hidden_size = 5\n", 85 | "learning_rate = 0.1\n", 86 | "num_epoch = 1000\n", 87 | "\n", 88 | "# initialize our network \n", 89 | "net = Net(input_size, hidden_size, num_classes)\n", 90 | "\n", 91 | "#choose optimizer and loss function\n", 92 | "criterion = nn.BCEWithLogitsLoss() \n", 93 | "optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)\n", 94 | "\n", 95 | "# Create variables for our X and y\n", 96 | "X_tensor = Variable(torch.from_numpy(X).float())\n", 97 | "Y_tensor = Variable(torch.from_numpy(y.as_matrix()).float())\n", 98 | "\n", 99 | "#train our network\n", 100 | "for epoch in range(num_epoch):\n", 101 | "\n", 102 | " #feedforward\n", 103 | " optimizer.zero_grad()\n", 104 | " out = net(X_tensor)\n", 105 | " \n", 106 | " # backpropagation\n", 107 | " loss = criterion(out, Y_tensor)\n", 108 | " loss.backward()\n", 109 | " optimizer.step()\n", 110 | " \n", 111 | " if epoch % 100 == 0:\n", 112 | " print ('Epoch [%d/%d], Loss: %.4f' \n", 113 | " %(epoch+1, num_epoch, loss.data[0]))" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "# Use our trained network for inference" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "# Read in and pre-process our test data\n", 130 | "test_infer = pd.read_csv('../data/test.csv', names=cols[0:-1])\n", 131 | "test_infer = MinMaxScaler().fit_transform(test_infer)\n", 132 | "\n", 133 | "# Create a PyTorch Variable with our test data\n", 134 | "\n", 135 | "# Run our test data through the network\n", 136 | "\n", 137 | "# Get our label indices using torch.max and put them in a \"labels\" variable\n", 138 | "\n", 139 | "# Parse the results\n", 140 | "species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']\n", 141 | "predictions = []\n", 142 | "for label in labels:\n", 143 | " predictions.append(species[label])" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "predictions" 153 | ] 154 | } 155 | ], 156 | "metadata": { 157 | "kernelspec": { 158 | "display_name": "Python 3", 159 | "language": "python", 160 | "name": "python3" 161 | }, 162 | "language_info": { 163 | "codemirror_mode": { 164 | "name": "ipython", 165 | "version": 3 166 | }, 167 | "file_extension": ".py", 168 | "mimetype": "text/x-python", 169 | "name": "python", 170 | "nbconvert_exporter": "python", 171 | "pygments_lexer": "ipython3", 172 | "version": "3.6.4" 173 | } 174 | }, 175 | "nbformat": 4, 176 | "nbformat_minor": 2 177 | } 178 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_with_go/data/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,Iris-setosa 3 | 4.9,3.0,1.4,0.2,Iris-setosa 4 | 4.7,3.2,1.3,0.2,Iris-setosa 5 | 4.6,3.1,1.5,0.2,Iris-setosa 6 | 5.0,3.6,1.4,0.2,Iris-setosa 7 | 5.4,3.9,1.7,0.4,Iris-setosa 8 | 4.6,3.4,1.4,0.3,Iris-setosa 9 | 5.0,3.4,1.5,0.2,Iris-setosa 10 | 4.4,2.9,1.4,0.2,Iris-setosa 11 | 4.9,3.1,1.5,0.1,Iris-setosa 12 | 5.4,3.7,1.5,0.2,Iris-setosa 13 | 4.8,3.4,1.6,0.2,Iris-setosa 14 | 4.8,3.0,1.4,0.1,Iris-setosa 15 | 4.3,3.0,1.1,0.1,Iris-setosa 16 | 5.8,4.0,1.2,0.2,Iris-setosa 17 | 5.7,4.4,1.5,0.4,Iris-setosa 18 | 5.4,3.9,1.3,0.4,Iris-setosa 19 | 5.1,3.5,1.4,0.3,Iris-setosa 20 | 5.7,3.8,1.7,0.3,Iris-setosa 21 | 5.1,3.8,1.5,0.3,Iris-setosa 22 | 5.4,3.4,1.7,0.2,Iris-setosa 23 | 5.1,3.7,1.5,0.4,Iris-setosa 24 | 4.6,3.6,1.0,0.2,Iris-setosa 25 | 5.1,3.3,1.7,0.5,Iris-setosa 26 | 4.8,3.4,1.9,0.2,Iris-setosa 27 | 5.0,3.0,1.6,0.2,Iris-setosa 28 | 5.0,3.4,1.6,0.4,Iris-setosa 29 | 5.2,3.5,1.5,0.2,Iris-setosa 30 | 5.2,3.4,1.4,0.2,Iris-setosa 31 | 4.7,3.2,1.6,0.2,Iris-setosa 32 | 4.8,3.1,1.6,0.2,Iris-setosa 33 | 5.4,3.4,1.5,0.4,Iris-setosa 34 | 5.2,4.1,1.5,0.1,Iris-setosa 35 | 5.5,4.2,1.4,0.2,Iris-setosa 36 | 4.9,3.1,1.5,0.1,Iris-setosa 37 | 5.0,3.2,1.2,0.2,Iris-setosa 38 | 5.5,3.5,1.3,0.2,Iris-setosa 39 | 4.9,3.1,1.5,0.1,Iris-setosa 40 | 4.4,3.0,1.3,0.2,Iris-setosa 41 | 5.1,3.4,1.5,0.2,Iris-setosa 42 | 5.0,3.5,1.3,0.3,Iris-setosa 43 | 4.5,2.3,1.3,0.3,Iris-setosa 44 | 4.4,3.2,1.3,0.2,Iris-setosa 45 | 5.0,3.5,1.6,0.6,Iris-setosa 46 | 5.1,3.8,1.9,0.4,Iris-setosa 47 | 4.8,3.0,1.4,0.3,Iris-setosa 48 | 5.1,3.8,1.6,0.2,Iris-setosa 49 | 4.6,3.2,1.4,0.2,Iris-setosa 50 | 5.3,3.7,1.5,0.2,Iris-setosa 51 | 5.0,3.3,1.4,0.2,Iris-setosa 52 | 7.0,3.2,4.7,1.4,Iris-versicolor 53 | 6.4,3.2,4.5,1.5,Iris-versicolor 54 | 6.9,3.1,4.9,1.5,Iris-versicolor 55 | 5.5,2.3,4.0,1.3,Iris-versicolor 56 | 6.5,2.8,4.6,1.5,Iris-versicolor 57 | 5.7,2.8,4.5,1.3,Iris-versicolor 58 | 6.3,3.3,4.7,1.6,Iris-versicolor 59 | 4.9,2.4,3.3,1.0,Iris-versicolor 60 | 6.6,2.9,4.6,1.3,Iris-versicolor 61 | 5.2,2.7,3.9,1.4,Iris-versicolor 62 | 5.0,2.0,3.5,1.0,Iris-versicolor 63 | 5.9,3.0,4.2,1.5,Iris-versicolor 64 | 6.0,2.2,4.0,1.0,Iris-versicolor 65 | 6.1,2.9,4.7,1.4,Iris-versicolor 66 | 5.6,2.9,3.6,1.3,Iris-versicolor 67 | 6.7,3.1,4.4,1.4,Iris-versicolor 68 | 5.6,3.0,4.5,1.5,Iris-versicolor 69 | 5.8,2.7,4.1,1.0,Iris-versicolor 70 | 6.2,2.2,4.5,1.5,Iris-versicolor 71 | 5.6,2.5,3.9,1.1,Iris-versicolor 72 | 5.9,3.2,4.8,1.8,Iris-versicolor 73 | 6.1,2.8,4.0,1.3,Iris-versicolor 74 | 6.3,2.5,4.9,1.5,Iris-versicolor 75 | 6.1,2.8,4.7,1.2,Iris-versicolor 76 | 6.4,2.9,4.3,1.3,Iris-versicolor 77 | 6.6,3.0,4.4,1.4,Iris-versicolor 78 | 6.8,2.8,4.8,1.4,Iris-versicolor 79 | 6.7,3.0,5.0,1.7,Iris-versicolor 80 | 6.0,2.9,4.5,1.5,Iris-versicolor 81 | 5.7,2.6,3.5,1.0,Iris-versicolor 82 | 5.5,2.4,3.8,1.1,Iris-versicolor 83 | 5.5,2.4,3.7,1.0,Iris-versicolor 84 | 5.8,2.7,3.9,1.2,Iris-versicolor 85 | 6.0,2.7,5.1,1.6,Iris-versicolor 86 | 5.4,3.0,4.5,1.5,Iris-versicolor 87 | 6.0,3.4,4.5,1.6,Iris-versicolor 88 | 6.7,3.1,4.7,1.5,Iris-versicolor 89 | 6.3,2.3,4.4,1.3,Iris-versicolor 90 | 5.6,3.0,4.1,1.3,Iris-versicolor 91 | 5.5,2.5,4.0,1.3,Iris-versicolor 92 | 5.5,2.6,4.4,1.2,Iris-versicolor 93 | 6.1,3.0,4.6,1.4,Iris-versicolor 94 | 5.8,2.6,4.0,1.2,Iris-versicolor 95 | 5.0,2.3,3.3,1.0,Iris-versicolor 96 | 5.6,2.7,4.2,1.3,Iris-versicolor 97 | 5.7,3.0,4.2,1.2,Iris-versicolor 98 | 5.7,2.9,4.2,1.3,Iris-versicolor 99 | 6.2,2.9,4.3,1.3,Iris-versicolor 100 | 5.1,2.5,3.0,1.1,Iris-versicolor 101 | 5.7,2.8,4.1,1.3,Iris-versicolor 102 | 6.3,3.3,6.0,2.5,Iris-virginica 103 | 5.8,2.7,5.1,1.9,Iris-virginica 104 | 7.1,3.0,5.9,2.1,Iris-virginica 105 | 6.3,2.9,5.6,1.8,Iris-virginica 106 | 6.5,3.0,5.8,2.2,Iris-virginica 107 | 7.6,3.0,6.6,2.1,Iris-virginica 108 | 4.9,2.5,4.5,1.7,Iris-virginica 109 | 7.3,2.9,6.3,1.8,Iris-virginica 110 | 6.7,2.5,5.8,1.8,Iris-virginica 111 | 7.2,3.6,6.1,2.5,Iris-virginica 112 | 6.5,3.2,5.1,2.0,Iris-virginica 113 | 6.4,2.7,5.3,1.9,Iris-virginica 114 | 6.8,3.0,5.5,2.1,Iris-virginica 115 | 5.7,2.5,5.0,2.0,Iris-virginica 116 | 5.8,2.8,5.1,2.4,Iris-virginica 117 | 6.4,3.2,5.3,2.3,Iris-virginica 118 | 6.5,3.0,5.5,1.8,Iris-virginica 119 | 7.7,3.8,6.7,2.2,Iris-virginica 120 | 7.7,2.6,6.9,2.3,Iris-virginica 121 | 6.0,2.2,5.0,1.5,Iris-virginica 122 | 6.9,3.2,5.7,2.3,Iris-virginica 123 | 5.6,2.8,4.9,2.0,Iris-virginica 124 | 7.7,2.8,6.7,2.0,Iris-virginica 125 | 6.3,2.7,4.9,1.8,Iris-virginica 126 | 6.7,3.3,5.7,2.1,Iris-virginica 127 | 7.2,3.2,6.0,1.8,Iris-virginica 128 | 6.2,2.8,4.8,1.8,Iris-virginica 129 | 6.1,3.0,4.9,1.8,Iris-virginica 130 | 6.4,2.8,5.6,2.1,Iris-virginica 131 | 7.2,3.0,5.8,1.6,Iris-virginica 132 | 7.4,2.8,6.1,1.9,Iris-virginica 133 | 7.9,3.8,6.4,2.0,Iris-virginica 134 | 6.4,2.8,5.6,2.2,Iris-virginica 135 | 6.3,2.8,5.1,1.5,Iris-virginica 136 | 6.1,2.6,5.6,1.4,Iris-virginica 137 | 7.7,3.0,6.1,2.3,Iris-virginica 138 | 6.3,3.4,5.6,2.4,Iris-virginica 139 | 6.4,3.1,5.5,1.8,Iris-virginica 140 | 6.0,3.0,4.8,1.8,Iris-virginica 141 | 6.9,3.1,5.4,2.1,Iris-virginica 142 | 6.7,3.1,5.6,2.4,Iris-virginica 143 | 6.9,3.1,5.1,2.3,Iris-virginica 144 | 5.8,2.7,5.1,1.9,Iris-virginica 145 | 6.8,3.2,5.9,2.3,Iris-virginica 146 | 6.7,3.3,5.7,2.5,Iris-virginica 147 | 6.7,3.0,5.2,2.3,Iris-virginica 148 | 6.3,2.5,5.0,1.9,Iris-virginica 149 | 6.5,3.0,5.2,2.0,Iris-virginica 150 | 6.2,3.4,5.4,2.3,Iris-virginica 151 | 5.9,3.0,5.1,1.8,Iris-virginica 152 | 153 | -------------------------------------------------------------------------------- /notebook-to-production/frameworks_that_scale/exercise1/solution1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from sklearn.preprocessing import MinMaxScaler\n", 11 | "import torch\n", 12 | "from torch.autograd import Variable\n", 13 | "import torch.nn as nn\n", 14 | "import torch.nn.functional as F\n", 15 | "torch.manual_seed(1234)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "# Load and pre-process the training data" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# load the data\n", 32 | "cols = ['f1', 'f2', 'f3', 'f4', 'species']\n", 33 | "data = pd.read_csv('../data/iris.csv', names=cols)\n", 34 | "\n", 35 | "# pre-process the data\n", 36 | "X = data[cols[0:-1]]\n", 37 | "X = MinMaxScaler().fit_transform(X)\n", 38 | "y = pd.get_dummies(data['species'])" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "# Define our network" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# Define our neural net\n", 55 | "class Net(nn.Module):\n", 56 | " def __init__(self, input_size, hidden_size, num_classes):\n", 57 | " super(Net, self).__init__()\n", 58 | " self.fc1 = nn.Linear(input_size, hidden_size) \n", 59 | " self.relu = nn.ReLU()\n", 60 | " self.fc2 = nn.Linear(hidden_size, num_classes) \n", 61 | " \n", 62 | " def forward(self, x):\n", 63 | " out = self.fc1(x)\n", 64 | " out = self.relu(out)\n", 65 | " out = self.fc2(out)\n", 66 | " return out" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "# Train our network" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "input_size = 4\n", 83 | "num_classes = 3\n", 84 | "hidden_size = 5\n", 85 | "learning_rate = 0.1\n", 86 | "num_epoch = 1000\n", 87 | "\n", 88 | "# initialize our network \n", 89 | "net = Net(input_size, hidden_size, num_classes)\n", 90 | "\n", 91 | "#choose optimizer and loss function\n", 92 | "criterion = nn.BCEWithLogitsLoss() \n", 93 | "optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)\n", 94 | "\n", 95 | "# Create variables for our X and y\n", 96 | "X_tensor = Variable(torch.from_numpy(X).float())\n", 97 | "Y_tensor = Variable(torch.from_numpy(y.as_matrix()).float())\n", 98 | "\n", 99 | "#train our network\n", 100 | "for epoch in range(num_epoch):\n", 101 | "\n", 102 | " #feedforward\n", 103 | " optimizer.zero_grad()\n", 104 | " out = net(X_tensor)\n", 105 | " \n", 106 | " # backpropagation\n", 107 | " loss = criterion(out, Y_tensor)\n", 108 | " loss.backward()\n", 109 | " optimizer.step()\n", 110 | " \n", 111 | " if epoch % 100 == 0:\n", 112 | " print ('Epoch [%d/%d], Loss: %.4f' \n", 113 | " %(epoch+1, num_epoch, loss.data[0]))" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "# Use our trained network for inference" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "# Read in and pre-process our test data\n", 130 | "test_infer = pd.read_csv('../data/test.csv', names=cols[0:-1])\n", 131 | "test_infer = MinMaxScaler().fit_transform(test_infer)\n", 132 | "\n", 133 | "# Create a PyTorch Variable with our test data\n", 134 | "X_test = Variable(torch.from_numpy(test_infer).float())\n", 135 | "\n", 136 | "# Run our test data through the network \n", 137 | "out = net(X_test)\n", 138 | "\n", 139 | "# Get our label indices using torch.max and put them in a \"labels\" variable\n", 140 | "_, labels = torch.max(out.data, 1)\n", 141 | "\n", 142 | "# Parse the results\n", 143 | "species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']\n", 144 | "predictions = []\n", 145 | "for label in labels:\n", 146 | " predictions.append(species[label])" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "predictions" 156 | ] 157 | } 158 | ], 159 | "metadata": { 160 | "kernelspec": { 161 | "display_name": "Python 3", 162 | "language": "python", 163 | "name": "python3" 164 | }, 165 | "language_info": { 166 | "codemirror_mode": { 167 | "name": "ipython", 168 | "version": 3 169 | }, 170 | "file_extension": ".py", 171 | "mimetype": "text/x-python", 172 | "name": "python", 173 | "nbconvert_exporter": "python", 174 | "pygments_lexer": "ipython3", 175 | "version": "3.6.4" 176 | } 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 2 180 | } 181 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/templates/template2/template2b/template2b.go: -------------------------------------------------------------------------------- 1 | // All material is licensed under the Apache License Version 2.0, January 2004 2 | // http://www.apache.org/licenses/LICENSE-2.0 3 | 4 | // go build 5 | // ./example2 6 | 7 | // Sample program to quality control a persisted regression model. 8 | package main 9 | 10 | import ( 11 | "encoding/json" 12 | "flag" 13 | "fmt" 14 | "io/ioutil" 15 | "log" 16 | "os" 17 | "path/filepath" 18 | ) 19 | 20 | // ModelInfo includes the information about the 21 | // model that is output from the training. 22 | type ModelInfo struct { 23 | RMSE float64 `json:"rmse"` 24 | Intercept float64 `json:"intercept"` 25 | Coefficients []CoefficientInfo `json:"coefficients"` 26 | } 27 | 28 | // CoefficientInfo include information about a 29 | // particular model coefficient. 30 | type CoefficientInfo struct { 31 | Name string `json:"name"` 32 | Coefficient float64 `json:"coefficient"` 33 | } 34 | 35 | // PredictionData includes the data necessary to make 36 | // a prediction and encodes the output prediction. 37 | type PredictionData struct { 38 | Prediction float64 `json:"predicted_diabetes_progression"` 39 | IndependentVars []IndependentVar `json:"independent_variables"` 40 | DependentVar float64 `json:"dependent_variable"` 41 | } 42 | 43 | // IndependentVar include information about and a 44 | // value for an independent variable. 45 | type IndependentVar struct { 46 | Name string `json:"name"` 47 | Value float64 `json:"value"` 48 | } 49 | 50 | func main() { 51 | 52 | // Declare the input and output directory flags. 53 | inModelFilePtr := flag.String("inModelFile", "", "The file containing the model.") 54 | inTestDirPtr := flag.String("inTestDir", "", "The directory containing files to be used in qc evaluation.") 55 | threshold := flag.Float64("threshold", 0.9, "The threshold for allowing a model to pass QC") 56 | outDirPtr := flag.String("outDir", "", "The output directory") 57 | 58 | // Parse the command line flags. 59 | flag.Parse() 60 | 61 | // Load the model file. 62 | f, err := ioutil.ReadFile(*inModelFilePtr) 63 | if err != nil { 64 | log.Fatal(err) 65 | } 66 | 67 | // Unmarshal the model information. 68 | var modelInfo ModelInfo 69 | if err := json.Unmarshal(f, &modelInfo); err != nil { 70 | log.Fatal(err) 71 | } 72 | 73 | // Create observed and predicted slices for evaluation. 74 | var observed []float64 75 | var predicted []float64 76 | 77 | // Walk over files in the input. 78 | if err := filepath.Walk(*inTestDirPtr, func(path string, info os.FileInfo, err error) error { 79 | 80 | // Skip any directories. 81 | if info.IsDir() { 82 | return nil 83 | } 84 | 85 | // Open any files. 86 | f, err := ioutil.ReadFile(filepath.Join(*inTestDirPtr, info.Name())) 87 | if err != nil { 88 | return err 89 | } 90 | 91 | // Unmarshal the independent variables. 92 | var predictionData PredictionData 93 | if err := json.Unmarshal(f, &predictionData); err != nil { 94 | return err 95 | } 96 | 97 | // Make the prediction. 98 | prediction, err := Predict(&modelInfo, &predictionData) 99 | if err != nil { 100 | return err 101 | } 102 | 103 | // Add the values to our slices. 104 | predicted = append(predicted, prediction) 105 | observed = append(observed, predictionData.DependentVar) 106 | 107 | return nil 108 | }); err != nil { 109 | log.Fatal(err) 110 | } 111 | 112 | // Calculate the MSE. 113 | 114 | // If the RMSE is less than the threshold, pass along the 115 | // quality controlled model. 116 | if rMSE < *threshold { 117 | 118 | // Add the RMSE value to the model data. 119 | modelInfo.RMSE = rMSE 120 | 121 | // Marshal the model information. 122 | outputData, err := json.MarshalIndent(modelInfo, "", " ") 123 | if err != nil { 124 | log.Fatal(err) 125 | } 126 | 127 | // Save the marshalled output to a file. 128 | if err := ioutil.WriteFile(filepath.Join(*outDirPtr, "model.json"), outputData, 0644); err != nil { 129 | log.Fatal(err) 130 | } 131 | } 132 | } 133 | 134 | // Predict makes a prediction based on input JSON. 135 | func Predict(modelInfo *ModelInfo, predictionData *PredictionData) (float64, error) { 136 | 137 | // Initialize the prediction value 138 | // to the intercept. 139 | prediction := modelInfo.Intercept 140 | 141 | // Create a map of independent variable coefficients. 142 | coeffs := make(map[string]float64) 143 | varNames := make([]string, len(modelInfo.Coefficients)) 144 | for idx, coeff := range modelInfo.Coefficients { 145 | coeffs[coeff.Name] = coeff.Coefficient 146 | varNames[idx] = coeff.Name 147 | } 148 | 149 | // Create a map of the independent variable values. 150 | varVals := make(map[string]float64) 151 | for _, indVar := range predictionData.IndependentVars { 152 | varVals[indVar.Name] = indVar.Value 153 | } 154 | 155 | // Loop over the independent variables. 156 | for _, varName := range varNames { 157 | 158 | // Get the coefficient. 159 | coeff, ok := coeffs[varName] 160 | if !ok { 161 | return 0.0, fmt.Errorf("Could not find model coefficient %s", varName) 162 | } 163 | 164 | // Get the variable value. 165 | val, ok := varVals[varName] 166 | if !ok { 167 | return 0.0, fmt.Errorf("Expected a value for variable %s", varName) 168 | } 169 | 170 | // Add to the prediction. 171 | prediction = prediction + coeff*val 172 | } 173 | 174 | return prediction, nil 175 | } 176 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_workflow/exercise2/solutions/solution2/solution2b/solution2b.go: -------------------------------------------------------------------------------- 1 | // All material is licensed under the Apache License Version 2.0, January 2004 2 | // http://www.apache.org/licenses/LICENSE-2.0 3 | 4 | // go build 5 | // ./example2 6 | 7 | // Sample program to quality control a persisted regression model. 8 | package main 9 | 10 | import ( 11 | "encoding/json" 12 | "flag" 13 | "fmt" 14 | "io/ioutil" 15 | "log" 16 | "math" 17 | "os" 18 | "path/filepath" 19 | ) 20 | 21 | // ModelInfo includes the information about the 22 | // model that is output from the training. 23 | type ModelInfo struct { 24 | RMSE float64 `json:"rmse"` 25 | Intercept float64 `json:"intercept"` 26 | Coefficients []CoefficientInfo `json:"coefficients"` 27 | } 28 | 29 | // CoefficientInfo include information about a 30 | // particular model coefficient. 31 | type CoefficientInfo struct { 32 | Name string `json:"name"` 33 | Coefficient float64 `json:"coefficient"` 34 | } 35 | 36 | // PredictionData includes the data necessary to make 37 | // a prediction and encodes the output prediction. 38 | type PredictionData struct { 39 | Prediction float64 `json:"predicted_diabetes_progression"` 40 | IndependentVars []IndependentVar `json:"independent_variables"` 41 | DependentVar float64 `json:"dependent_variable"` 42 | } 43 | 44 | // IndependentVar include information about and a 45 | // value for an independent variable. 46 | type IndependentVar struct { 47 | Name string `json:"name"` 48 | Value float64 `json:"value"` 49 | } 50 | 51 | func main() { 52 | 53 | // Declare the input and output directory flags. 54 | inModelFilePtr := flag.String("inModelFile", "", "The file containing the model.") 55 | inTestDirPtr := flag.String("inTestDir", "", "The directory containing files to be used in qc evaluation.") 56 | threshold := flag.Float64("threshold", 0.9, "The threshold for allowing a model to pass QC") 57 | outDirPtr := flag.String("outDir", "", "The output directory") 58 | 59 | // Parse the command line flags. 60 | flag.Parse() 61 | 62 | // Load the model file. 63 | f, err := ioutil.ReadFile(*inModelFilePtr) 64 | if err != nil { 65 | log.Fatal(err) 66 | } 67 | 68 | // Unmarshal the model information. 69 | var modelInfo ModelInfo 70 | if err := json.Unmarshal(f, &modelInfo); err != nil { 71 | log.Fatal(err) 72 | } 73 | 74 | // Create observed and predicted slices for evaluation. 75 | var observed []float64 76 | var predicted []float64 77 | 78 | // Walk over files in the input. 79 | if err := filepath.Walk(*inTestDirPtr, func(path string, info os.FileInfo, err error) error { 80 | 81 | // Skip any directories. 82 | if info.IsDir() { 83 | return nil 84 | } 85 | 86 | // Open any files. 87 | f, err := ioutil.ReadFile(filepath.Join(*inTestDirPtr, info.Name())) 88 | if err != nil { 89 | return err 90 | } 91 | 92 | // Unmarshal the independent variables. 93 | var predictionData PredictionData 94 | if err := json.Unmarshal(f, &predictionData); err != nil { 95 | return err 96 | } 97 | 98 | // Make the prediction. 99 | prediction, err := Predict(&modelInfo, &predictionData) 100 | if err != nil { 101 | return err 102 | } 103 | 104 | // Add the values to our slices. 105 | predicted = append(predicted, prediction) 106 | observed = append(observed, predictionData.DependentVar) 107 | 108 | return nil 109 | }); err != nil { 110 | log.Fatal(err) 111 | } 112 | 113 | // Calculate the MSE. 114 | var mSE float64 115 | for i, yObserved := range observed { 116 | 117 | // Add the to the mean squared error. 118 | mSE += math.Pow(yObserved-predicted[i], 2) / float64(len(predicted)) 119 | } 120 | 121 | // Calcualte the RMSE. 122 | rMSE := math.Sqrt(mSE) 123 | 124 | // If the RMSE is less than the threshold, pass along the 125 | // quality controlled model. 126 | if rMSE < *threshold { 127 | 128 | // Add the RMSE value to the model data. 129 | modelInfo.RMSE = rMSE 130 | 131 | // Marshal the model information. 132 | outputData, err := json.MarshalIndent(modelInfo, "", " ") 133 | if err != nil { 134 | log.Fatal(err) 135 | } 136 | 137 | // Save the marshalled output to a file. 138 | if err := ioutil.WriteFile(filepath.Join(*outDirPtr, "model.json"), outputData, 0644); err != nil { 139 | log.Fatal(err) 140 | } 141 | } 142 | } 143 | 144 | // Predict makes a prediction based on input JSON. 145 | func Predict(modelInfo *ModelInfo, predictionData *PredictionData) (float64, error) { 146 | 147 | // Initialize the prediction value 148 | // to the intercept. 149 | prediction := modelInfo.Intercept 150 | 151 | // Create a map of independent variable coefficients. 152 | coeffs := make(map[string]float64) 153 | varNames := make([]string, len(modelInfo.Coefficients)) 154 | for idx, coeff := range modelInfo.Coefficients { 155 | coeffs[coeff.Name] = coeff.Coefficient 156 | varNames[idx] = coeff.Name 157 | } 158 | 159 | // Create a map of the independent variable values. 160 | varVals := make(map[string]float64) 161 | for _, indVar := range predictionData.IndependentVars { 162 | varVals[indVar.Name] = indVar.Value 163 | } 164 | 165 | // Loop over the independent variables. 166 | for _, varName := range varNames { 167 | 168 | // Get the coefficient. 169 | coeff, ok := coeffs[varName] 170 | if !ok { 171 | return 0.0, fmt.Errorf("Could not find model coefficient %s", varName) 172 | } 173 | 174 | // Get the variable value. 175 | val, ok := varVals[varName] 176 | if !ok { 177 | return 0.0, fmt.Errorf("Expected a value for variable %s", varName) 178 | } 179 | 180 | // Add to the prediction. 181 | prediction = prediction + coeff*val 182 | } 183 | 184 | return prediction, nil 185 | } 186 | -------------------------------------------------------------------------------- /notebook-to-production/frameworks_that_scale/README.md: -------------------------------------------------------------------------------- 1 | # Using Frameworks that Scale 2 | 3 | This material introduces some methods and frameworks that will help our workflow scale beyond local sample data. Once you are done with this material, you will be exposed to some of the more scalable Python frameworks in the ecosystem (e.g., PyTorch) and have some experience refactoring modeling code for production. 4 | 5 | This guide will walk you through: 6 | 7 | 1. [Discussing scalable Python frameworks for ML/AI](#1-discussing-scalable-python-frameworks) 8 | 2. [Refactoring model training](#2-refactoring-model-training) 9 | 3. [Refactoring model inference](#3-refactoring-model-inference) 10 | 11 | It also includes a [list of resources](#resources) for those that want to dive in a little bit deeper. 12 | 13 | ## 1. Discussing scalable Python frameworks 14 | 15 | *Disclaimer* - "Scalable" is such a subjective term without a well-defined metric, and I would even recommend using "non-scalable" frameworks in many cases. However, in this discussion we will highlight some computational/development advantages of certain frameworks in the Python ecosystem. By no means is this a session for scikit-learn bashing, because scikit-learn is awesome! 16 | 17 | When moving to production size data, we should ask ourselves some questions about how our workflow will "scale." But what does that mean? Do we mean: 18 | 19 | - We should be able to run our workflow to completion on a **certain amount of data**. 20 | - We should be able to run our workflow to completion on **a certain amount of data in a certain amount of time**. 21 | - We should be able to run our workflow to completion on **a certain amount of data in a certain amount of time with certain performance characteristics**. 22 | - Something else? 23 | 24 | Depending on the answer to the above questions. The current form of our workflow (using scikit-learn) might not be appropriate. 25 | 26 | - What other frameworks are out there? 27 | - What are their advantages/disadvantages? 28 | - When are they appropriate? 29 | 30 | ## 2. Refactoring model training 31 | 32 | Let's say that we have decided we need to train our model on a ton of Iris flower data, which will involve massive matrix multiplications, etc. We would love to use a framework built to handle such operations (and utilize accelerator hardware), and, thus, we have decided to use PyTorch. 33 | 34 | This will require us to refactor our training and inference code at the minimum, so let's start our with training. [Here]() is a great starting point for learning how we might train a model in PyTorch that is similar to our scikit-learn model. We will use the `nn` package within PyTorch to build our neural network. 35 | 36 | Generally, we need to define a Python class that will specify our network architecture and feedforward method: 37 | 38 | ``` 39 | class Net(nn.Module): 40 | def __init__(self): 41 | super(Net, self).__init__() 42 | #---------------------------# 43 | # Initialize the network 44 | #---------------------------# 45 | 46 | def forward(self, x): 47 | #--------------------------# 48 | # Define a feedforward pass 49 | #--------------------------# 50 | ``` 51 | 52 | We will then create "Variables" for our X and y feature and label tensors, respectively. By creating these as variables, we will instantly be able to utilize PyTorch's auto-differentiation during back propagation. 53 | 54 | *Note* - If these words (auto-differentiation, back propagation, etc.) are confusing to you, don't worry! They aren't magic. They are just a set of well-defined methods that are commonly paired with neural nets. We can discuss these in class as needed and if possible, but there are also some great resources listed below to help familiarize yourself with these methods. In any event, you want necessarily have to learn these methods or their underpinning mathematics to continue with the subsequent class topics. You can still learn how to deploy a pipeline at scale and return to these topics at your convenience. 55 | 56 | Let's see how this works with PyTorch in practice. Navigate to the [this directory](.) on your workshop instance and spin up Jupyter again: 57 | 58 | ``` 59 | $ jupyter notebook --no-browser --port 8888 --ip=* --NotebookApp.token='' 60 | ``` 61 | 62 | Under the [example1](example1) directory you will see an `example1` notebook. Open up that notebook, and let's see how it works! 63 | 64 | ## 3. Refactoring model inference 65 | 66 | Now we just need to figure out how to use our trained network to make inferences. To do this, we need to: 67 | 68 | - Read in the features we want to feed through our network, 69 | - Feed them forward through the trained network, and 70 | - Parse the results. 71 | 72 | **Exercise** - This brings us to our first official "exercise" in the course. Instead of looking at a pre-baked solution that implements the above steps, try it out on your own! To do this: 73 | 74 | - Start Jupyter again (if it's not still running), 75 | - Navigate to the [exercise1](exercise1) directory, and 76 | - Open up the [template1](exercise1/template1.ipynb) notebook. 77 | 78 | This template notebook has some comments near the bottom where you need to fill in the missing pieces. Try to fill in these pieces without looking at the [solution1](exercise1/solution1.ipynb) notebook, but don't feel bad if you can't get it. When you are ready, look at the [solution1](exercise1/solution1.ipynb) notebook to see how I implemented these step (which is not the only or necessarily best solution). Once everyone has a chance to work on this, we will go over the solution together. 79 | 80 | *Note* - PRs welcome and expected. 81 | 82 | ## Resources 83 | 84 | - [Tensorflow](https://www.tensorflow.org/) 85 | - [PyTorch](http://pytorch.org/) 86 | - [H2O.ai](https://www.h2o.ai/) 87 | - [Spark](https://spark.apache.org/mllib/) 88 | 89 | ___ 90 | All material is licensed under the [Apache License Version 2.0, January 2004](http://www.apache.org/licenses/LICENSE-2.0). 91 | -------------------------------------------------------------------------------- /notebook-to-production/pipeline_stages/README.md: -------------------------------------------------------------------------------- 1 | # Breaking our workflow up into pipeline stages 2 | 3 | This material walks you through breaking up a workflow, contained in a Jupyter notebook, into separate, scalable pipeline stages. Once you are done with this material, you will understand which portions of a ML/AI pipeline might benefit from being managed in isolation. You will also get some experience writing code for specific stages of a data pipeline (pre-processing, training, inference). 4 | 5 | This guide will walk you through: 6 | 7 | 1. [Discussing why we might want to split code into pipeline stages](#1-discussing-why-we-might-want-to-split-code) 8 | 2. [Creating our pre-processing stage](#2-creating-our-pre-processing-stage) 9 | 3. [Creating our training stage](#3-creating-our-training-stage) 10 | 4. [Creating our inference stage](#4-creating-our-inference-stage) 11 | 12 | It also includes a [list of resources](#resources) for those that want to dive in a little bit deeper. 13 | 14 | ## 1. Discussing why we might want to split code 15 | 16 | Sure, you could keep all your code together and even run it in a notebook on another machine. However, keeping all the steps of your Python workflow together may not make sense. Let's think about why. Why would splitting up our code into separate pieces of code (or "pipeline stages") be usefule in terms of: 17 | 18 | - development 19 | - deployment 20 | - scaling 21 | - workflow mangement 22 | - integrations 23 | 24 | By the end of this discussion we should decide: 25 | 26 | - how we want to split our example workflow, and 27 | - why. 28 | 29 | The "why" is always important here. There are trade offs in splitting up your code, and we need to justify the extra complication. 30 | 31 | ## 2. Creating our pre-processing stage 32 | 33 | Let's start at the beginning of the pipeline (pre-processing), and take our first leap out of the notebook. We need to do this because: 34 | 35 | - We need to run this code non-interactively in the pipeline. 36 | - We should (especially before production use) augment this code with unit/integration tests and automated CI/CD processes. (Not covered here) 37 | 38 | The code will resemble the code from our notebook, with some key differences: 39 | 40 | - We won't include unnecessary imports. 41 | - We will add some command line arguments to make our life easier. 42 | - We will need to think about how to get data out of each pipeline stage (such that it can be read as input by the next pipeline stage). 43 | 44 | Let's take a look. Our first example pipeline stage can be found [here](example1/example1.py). 45 | 46 | ## 3. Creating our training stage 47 | 48 | Ok, we are off to the races. Now it's your turn! 49 | 50 | **Exercise 1** - Create a similar Python script that will: 51 | 52 | - read in the `x_train.csv` and `y_train.csv` from our pre-processing stage 53 | - initialize our PyTorch model 54 | - train our model 55 | - save our model to a file 56 | 57 | You can start with [template1.py](exercise1/template1.py) and fill in the missing pieces. Check [solution1.py](exercise1/solution1.py) when you are ready. To modify `template1.py`, you can use an editor built into the terminal directly on your workshop instance (e.g., `vim` or `nano`), or you could copy over the contents of `template1.py` to your local machine and use your editor of choice. 58 | 59 | If you do the latter, you could copy your modified version back over to your workshop instance via `scp`, or you could copy the contents into the template file or a new file. If you need help with this please ask the instructor. You want need to do this sort of editing in subsequent sections of the course, so you can rest easy. 60 | 61 | *Hint* - You can save the model to a file using `torch.save()`. 62 | 63 | ## 4. Creating our inference stage 64 | 65 | We're almost there! We just need our last inference stage. 66 | 67 | **Exercise 2** - Create a Python script that will: 68 | 69 | - read in the saved model from exercise 1 above 70 | - read in a file containing features, on which we need to perform inferences 71 | - use the model to perform the inferences 72 | - save the results 73 | 74 | You can start with [template2.py](exercise2/template2.py) and check [solution2.py](exercise2/solution2.py) when you are ready. 75 | 76 | ## 5. Docker-izing our pipeline stages 77 | 78 | Ok, we have our code for model training, inference, and pre-processing and we need to: 79 | 80 | - scale this code up to larger data sets, 81 | - run it automatically at certain times or based on certain events, 82 | - share it with teammates so they can generate their own results, or 83 | - connect it to other code running in our company's infrastructure. 84 | 85 | How can we do this with a high degree of reproducibility and operational/computation efficiency? And how can we ensure that our engineering team doesn't hate the data science team because they always have to deploy data science things in a "special" way with "special" data science tools. 86 | 87 | Docker solves many of these issues and even has additional benefits. Data scientists and AI researchers are also latching on to these because they can: 88 | 89 | - Docker-ize an application quickly, hand it off to an engineering organization, and have them run it in a manner similar to any other application. 90 | - Experiment with a huge number of tools (Tensorflow, PyTorch, Spark, etc.) without having to install anything other than Docker. 91 | - Manage a diverse set of data pipeline stages in a unified way. 92 | - Leverage the huge number of excellent infrastructure projects for containers (e.g., those powering Google scale work) to create application that auto-scale, self-heal, are fault tolerant, etc. 93 | - Easily define and reproduce environments for experimentation. 94 | 95 | We will be using Docker to make our pipeline stages portable. If you are interested in the specifics of the Docker images we are using, check out the [docker](docker) directory here. 96 | 97 | ## Resources 98 | 99 | - [Python command line arguments](https://docs.python.org/3.3/library/argparse.html) 100 | - [Saving/loading PyTorch models](http://pytorch.org/docs/0.3.1/notes/serialization.html#recommend-saving-models) 101 | 102 | ___ 103 | All material is licensed under the [Apache License Version 2.0, January 2004](http://www.apache.org/licenses/LICENSE-2.0). 104 | -------------------------------------------------------------------------------- /notebook-to-production/introduction/README.md: -------------------------------------------------------------------------------- 1 | # Introduction to Python tooling and ML/AI workflows 2 | 3 | This material introduces some of the commonly used Python tooling for data science and ML/AI. It also introduces the ML/AI model development workflow. Once you are done with this material, you will understand what sets of tools are used in producing AI models, and how data scientists often interact with those tools. 4 | 5 | You can follow this guide as we work through the material in class. Most of the commands/instructions that will be given in class are repeated here, so you can follow along and/or catch up when needed. Specifically, this guide will walk you through: 6 | 7 | 1. [Connecting to your workshop instance](#1-connecting-to-your-workshop-instance) 8 | 2. [Cloning the workshop material](#2-cloning-the-workshop-material) 9 | 3. [Starting Jupyter](#3-starting-jupyter) 10 | 4. [Dealing with data](#4-dealing-with-our-data) 11 | 5. [Developing a ML/AI model in Python](#5-developing-a-mlai-model-in-python) 12 | 6. [The model development/utilization workflow](#6-the-model-developmentutilization-workflow) 13 | 14 | It also includes a [list of resources](#resources) for those that want to dive in a little bit deeper. 15 | 16 | ## 1. Connecting to your workshop instance 17 | 18 | You should have been given an IP for a remote machine at the beginning of the course. The remote machine already has Jupyter, scikit-learn, PyTorch, Docker, etc. installed along with all of the command line tools we will be needing throughout the course. To log into the remote machine on Linux or Mac, open and terminal and: 19 | 20 | ``` 21 | $ ssh pachrat@ 22 | ``` 23 | 24 | On Windows you can use PuTTY or another ssh client. You will be asked for a password, which you should also be given during the workshop. To verify that everything is running correctly on the machine, you should be able to open a Python terminal by running 25 | 26 | ``` 27 | $ python3 28 | ``` 29 | 30 | and then run the following with a similar response: 31 | 32 | ``` 33 | >>> import pandas as pd 34 | >>> pd.DataFrame([[0,1],[1,0]], columns=['a', 'b']) 35 | a b 36 | 0 0 1 37 | 1 1 0 38 | >>> 39 | ``` 40 | 41 | ## 2. Cloning the workshop material 42 | 43 | Once, you are logged into your workshop instance, you will need to retrieve the workshop materials from this git repo. That way, we will all be working off of the same code templates and notebooks. To clone the repo, you can run: 44 | 45 | ``` 46 | $ git clone https://github.com/ardanlabs/training-ai.git 47 | ``` 48 | 49 | This will pull down the workshop materials to your instance. To confirm that the materials are there, you can navigate to the `training-ai/notebook-to-production` directory and the list contents. You should see: 50 | 51 | ``` 52 | $ cd training-ai/notebook-to-production/ 53 | $ ls 54 | README.md deploying_managing frameworks_that_scale introduction pipeline_stages productionizing 55 | ``` 56 | 57 | ## 3. Starting Jupyter 58 | 59 | Now, we are going to start our journey to production with a familiar Python tools. The first of those (which isn't necessarily specific to Python, but is Python focused) is [Jupyter](http://jupyter.org/). Navigate to the `introduction` directory and then start Jupyter as follows: 60 | 61 | ``` 62 | $ cd introduction 63 | $ jupyter notebook --no-browser --port 8888 --ip=* --NotebookApp.token='' 64 | ``` 65 | 66 | You will now be able to visit `:8888` in a browser to use Jupyter. When you are ready to stop using Jupyter, you can type `CTRL+c` in the terminal to stop Jupyter. 67 | 68 | ## 4. Dealing with our data 69 | 70 | While you have Jupyter up and running, click on the `example1_data_munging.ipynb` notebook (which you should see in the `example1` directory). This will bring up our example notebook for parsing and manipulating data. If you are new to Jupyter notebooks, you can: 71 | 72 | - click in any code block/cell to modify or run that code 73 | - type `shift+enter` to execute a code block (or use the widgets at the top of the UI) 74 | - if you get in a pickle, you might try selecting the "Kernel" menu at the top of the UI and then select "Restart Kernel" or similar. 75 | 76 | We will run through and discuss this notebook interactively in class. 77 | 78 | **Exercise** - This brings us to our first official "exercise" in the course. Instead of looking at a pre-baked solution that implements the above steps, try it out on your own! To do this: 79 | 80 | - Start Jupyter again (if it's not still running), 81 | - Navigate to the [template1](exercises/template1) directory, and 82 | - Open up the `template1_` notebook. 83 | 84 | This template notebook has some comments near the bottom where you need to fill in the missing pieces. Try to fill in these pieces without looking at the `solution1_` notebook under `exercises/solution1`, but don't feel bad if you can't get it. When you are ready, look at the solution1 notebook to see how I implemented these step (which is not the only or necessarily best solution). Once everyone has a chance to work on this, we will go over the solution together. 85 | 86 | ## 5. Developing a ML/AI model in Python 87 | 88 | Our example problem for the day will be the [Iris flower classification problem](https://en.wikipedia.org/wiki/Iris_flower_data_set), and we will start by solving that problem using scikit-learn. Scikit-learn is the first ML/AI framework that many people use, and so we will use it as our jumping off point. Restart Jupyter (if you don't have it running), and open `example2_model_training.ipynb` from the `example2` directory. 89 | 90 | **Exercise** - Try implementing a model other than kNN with scikit-learn: 91 | 92 | - [template](exercises/template2) 93 | - [solution](exercises/solution2) 94 | 95 | ## 6. The model development/utilization workflow 96 | 97 | Now that we have gotten our hands dirty building some models in Python, let's take a step back and think about the model development workflow in general. We will do this interactively in class via Q&A and via the class slides. 98 | 99 | ## Resources 100 | 101 | Technical resources: 102 | 103 | - [Jupyter](http://jupyter.org/) 104 | - [Pandas](https://pandas.pydata.org/) 105 | - [scikit-learn](http://scikit-learn.org/stable/) 106 | 107 | ___ 108 | All material is licensed under the [Apache License Version 2.0, January 2004](http://www.apache.org/licenses/LICENSE-2.0). 109 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_with_go/solutions/solution1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise - Regression" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Imports" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import (\n", 24 | " \"io/ioutil\"\n", 25 | " \"fmt\"\n", 26 | " \"os\"\n", 27 | " \"image/color\"\n", 28 | " \"math\"\n", 29 | " \n", 30 | " \"github.com/kniren/gota/dataframe\"\n", 31 | " \"gonum.org/v1/plot\"\n", 32 | " \"gonum.org/v1/plot/plotter\"\n", 33 | " \"gonum.org/v1/plot/plotutil\"\n", 34 | " \"gonum.org/v1/plot/vg\"\n", 35 | " \"github.com/sajari/regression\"\n", 36 | ")" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Import the Data" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "// Open the data file.\n", 53 | "f, err := os.Open(\"../data/basketball.heigh.and.weigh.csv\")\n", 54 | "if err != nil {\n", 55 | " fmt.Println(err)\n", 56 | "}\n", 57 | "\n", 58 | "// Read in the contents to a dataframe.\n", 59 | "dataset := dataframe.ReadCSV(f)\n", 60 | "\n", 61 | "// Close the file.\n", 62 | "f.Close()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "// Output a summary of the dataset to stdout.\n", 72 | "fmt.Println(dataset)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "## Split our data into training and test data" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "// Calculate the number of elements in each set.\n", 89 | "// We will utilize and 80/20 split in this case.\n", 90 | "trainingNum := (4 * dataset.Nrow()) / 5\n", 91 | "testNum := dataset.Nrow() / 5\n", 92 | "if trainingNum+testNum < dataset.Nrow() {\n", 93 | " trainingNum++\n", 94 | "}\n", 95 | "\n", 96 | "// Create the subset indices.\n", 97 | "trainingIdx := make([]int, trainingNum)\n", 98 | "testIdx := make([]int, testNum)\n", 99 | "\n", 100 | "// Enumerate the training indices.\n", 101 | "for i := 0; i < trainingNum; i++ {\n", 102 | " trainingIdx[i] = i\n", 103 | "}\n", 104 | "\n", 105 | "// Enumerate the test indices.\n", 106 | "for i := 0; i < testNum; i++ {\n", 107 | " testIdx[i] = trainingNum + i\n", 108 | "}\n", 109 | "\n", 110 | "// Create the subset dataframes.\n", 111 | "trainingDF := dataset.Subset(trainingIdx)\n", 112 | "testDF := dataset.Subset(testIdx)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "// Output the sizes of our training and test sets.\n", 122 | "fmt.Println(trainingDF.Nrow())\n", 123 | "fmt.Println(testDF.Nrow())" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "## Training our model on the training data" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "// In this case we are going to try and model our weight\n", 140 | "// by height using github.com/sajari/regression.\n", 141 | "var r regression.Regression\n", 142 | "r.SetObserved(\"weight\")\n", 143 | "r.SetVar(0, \"height\")\n", 144 | "\n", 145 | "// Extract our y values.\n", 146 | "y := trainingDF.Col(\"Weight(pounds)\").Float()\n", 147 | "\n", 148 | "// Loop of records in the CSV, adding the training data to the regression value.\n", 149 | "for i, x := range trainingDF.Col(\"Height(inches)\").Float() {\n", 150 | "\n", 151 | " // Add these points to the regression value.\n", 152 | " r.Train(regression.DataPoint(y[i], []float64{x}))\n", 153 | "}\n", 154 | "\n", 155 | "// Train/fit the regression model.\n", 156 | "r.Run()\n", 157 | "\n", 158 | "// Output the trained model parameters.\n", 159 | "fmt.Printf(\"\\nRegression Formula:\\n%v\\n\\n\", r.Formula)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## Evaluating our model on the test data" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "// Extract the observed weights and test height values.\n", 176 | "observed := testDF.Col(\"Weight(pounds)\").Float()\n", 177 | "testHeights := testDF.Col(\"Height(inches)\").Float()\n", 178 | "\n", 179 | "// Calculate the mean squared error.\n", 180 | "var mSE float64\n", 181 | "for idx, oVal := range observed {\n", 182 | " \n", 183 | " // Make our prediction.\n", 184 | " prediction, err := r.Predict([]float64{testHeights[idx]})\n", 185 | " if err != nil {\n", 186 | " fmt.Println(err)\n", 187 | " }\n", 188 | " \n", 189 | " mSE += math.Pow(oVal-prediction, 2) / float64(len(observed))\n", 190 | "}\n", 191 | "\n", 192 | "// Calcualte the RMSE.\n", 193 | "rMSE := math.Sqrt(mSE)\n", 194 | "\n", 195 | "// Output the result to stdout.\n", 196 | "fmt.Printf(\"RMSE: %f\", rMSE)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [] 205 | } 206 | ], 207 | "metadata": { 208 | "kernelspec": { 209 | "display_name": "Go", 210 | "language": "go", 211 | "name": "gophernotes" 212 | }, 213 | "language_info": { 214 | "codemirror_mode": "", 215 | "file_extension": ".go", 216 | "mimetype": "", 217 | "name": "go", 218 | "nbconvert_exporter": "", 219 | "pygments_lexer": "", 220 | "version": "go1.10.3" 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 2 225 | } 226 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_with_go/bonus/bonus1/bonus1.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "image" 7 | "image/png" 8 | "io" 9 | "io/ioutil" 10 | "log" 11 | "os" 12 | "reflect" 13 | 14 | tf "github.com/tensorflow/tensorflow/tensorflow/go" 15 | ) 16 | 17 | const ( 18 | batchSize = 1 19 | channels = 3 20 | ) 21 | 22 | // labelItem contains information about labels. 23 | type labelItem struct { 24 | Name string `json:"name"` 25 | ID int `json:"id"` 26 | DisplayName string `json:"display_name"` 27 | } 28 | 29 | // rbgzToPixel returns a Pixel in uint8. 30 | func rgbaToPixel(r uint32, g uint32, b uint32, a uint32) []uint8 { 31 | return []uint8{uint8(r / 256), uint8(g / 256), uint8(b / 256)} 32 | } 33 | 34 | func loadImageAsTensor(filePath string) (*tf.Tensor, error) { 35 | 36 | // open path for image 37 | file, err := os.Open(filePath) 38 | if err != nil { 39 | log.Fatal("an error opening file: ", err.Error()) 40 | } 41 | 42 | // it is a png image, we decode it to read its information 43 | img, err := png.Decode(file) 44 | if err != nil { 45 | log.Fatal("error decoding png image", err) 46 | } 47 | 48 | // read bounds of image (we'll need that later) 49 | bounds := img.Bounds() 50 | width, height := bounds.Max.X, bounds.Max.Y 51 | file.Close() 52 | file, _ = os.Open(filePath) 53 | 54 | // define the variable that will hold a batch of images to transform in tensor. It has 4 dimensions: N R G B 55 | // where N is the quantity of images in the batch (1) and R G B is the color intensity of a pixel in the corresponding channel 56 | var imageData [][][][]uint8 57 | imageData = append(imageData, loadOneImageFromFile(filePath)) 58 | 59 | lshape := []int64{int64(batchSize), int64(width), int64(height), int64(channels)} 60 | tf.ReadTensor(tf.Uint8, lshape, file) 61 | 62 | // return tensor with image data 63 | return tf.NewTensor(imageData) 64 | } 65 | 66 | // loadOneImageFromFile loads an image from a file and transform it 67 | // to [][][] uint8. each dimension represents a channel (RGB). 68 | func loadOneImageFromFile(imageName string) [][][]uint8 { 69 | fmt.Println("opening", imageName) 70 | existingImageFile, err := os.Open(imageName) 71 | if err != nil { 72 | // Handle error 73 | log.Fatal("error loading image", err) 74 | } 75 | defer existingImageFile.Close() 76 | return getPixels(existingImageFile) 77 | } 78 | 79 | func getPixels(file io.Reader) [][][]uint8 { 80 | img, _, err := image.Decode(file) 81 | 82 | if err != nil { 83 | return nil 84 | } 85 | 86 | bounds := img.Bounds() 87 | width, height := bounds.Max.X, bounds.Max.Y 88 | 89 | var pixels [][][]uint8 90 | for y := 0; y < height; y++ { 91 | var row [][]uint8 92 | for x := 0; x < width; x++ { 93 | row = append(row, rgbaToPixel(img.At(x, y).RGBA())) 94 | } 95 | pixels = append(pixels, row) 96 | } 97 | 98 | return pixels 99 | } 100 | 101 | // Load saved model from folder 102 | func loadModel(modeldir *string) (*tf.SavedModel, error) { 103 | if modeldir == nil { 104 | log.Println("error loading. Model must exist") 105 | return nil, nil 106 | } 107 | return tf.LoadSavedModel(*modeldir, []string{"serve"}, nil) 108 | } 109 | 110 | // Prediction predicts the objects found in an image .png. 111 | func Prediction(modelPath, imagePath, labelsPath string) { 112 | //load labels, we will use them later 113 | labels := readLabels(labelsPath) 114 | 115 | // load saved model 116 | model, err := loadModel(&modelPath) 117 | if err != nil { 118 | log.Println(err) 119 | } 120 | defer model.Session.Close() 121 | 122 | // create input tensor 123 | tensor, err := loadImageAsTensor(imagePath) 124 | if err != nil { 125 | log.Println(err) 126 | } 127 | tensors := map[tf.Output]*tf.Tensor{ 128 | model.Graph.Operation("image_tensor").Output(0): tensor, 129 | } 130 | 131 | //create ouput tensor: call the operations where we'll get the answer of the model 132 | // this model returns: 1. a score in % of the detected element. 2. A class (label). It is a number. see labels file for understanding better. 133 | // 3. the coordinates of the bounding box of the detected object 134 | outputs := []tf.Output{ 135 | model.Graph.Operation("detection_scores").Output(0), 136 | model.Graph.Operation("detection_classes").Output(0), 137 | model.Graph.Operation("detection_boxes").Output(0), 138 | } 139 | 140 | // run the model in a session, and get the result 141 | result, runErr := model.Session.Run( 142 | tensors, 143 | outputs, 144 | nil, 145 | ) 146 | if runErr != nil { 147 | log.Fatal("error running the session with input, err:", runErr.Error()) 148 | return 149 | } 150 | // parse the result to be human readable 151 | parseResult(result, labels) 152 | } 153 | 154 | /** 155 | * Parse the result. result is a vector where each position contains the output of tensors we requested. 156 | * in this case we have 3 outputs: 157 | * result[0]= detection_scores 158 | * result[1]= detection_classes 159 | * result[2]= detection_boxes 160 | */ 161 | func parseResult(result []*tf.Tensor, labels []labelItem) { 162 | //each position of the result vector is of type interface{}, we know it is an array. 163 | //we must read values using reflection 164 | scores := reflect.ValueOf(result[0].Value()).Index(0) 165 | fmt.Println("scores", scores) //scores 166 | 167 | clases := reflect.ValueOf(result[1].Value()).Index(0) 168 | fmt.Println("clases", clases) //clases 169 | 170 | boxes := reflect.ValueOf(result[2].Value()).Index(0) 171 | fmt.Println("boxes", boxes) //bounding boxes 172 | 173 | for i := 0; i < 10; i++ { 174 | value := scores.Index(i) 175 | if value.Float() > 0.5 { // we only show things where the model is "sure", it means score > 50% 176 | item := findLabel(labels, int(clases.Index(i).Float())) 177 | fmt.Println("Detected", item.DisplayName, "with probability", value.Float()) 178 | } 179 | } 180 | } 181 | 182 | // utility to find a labelItem from an id 183 | func findLabel(labels []labelItem, id int) labelItem { 184 | for _, v := range labels { 185 | if int(v.ID) == id { 186 | return v 187 | } 188 | } 189 | return labelItem{} 190 | } 191 | 192 | // load Labels from file 193 | func readLabels(labelsPath string) []labelItem { 194 | var items []labelItem 195 | jsonFile, err := os.Open(labelsPath) 196 | if err != nil { 197 | log.Fatal("labels error ", err) 198 | return items 199 | } 200 | byteValue, _ := ioutil.ReadAll(jsonFile) 201 | err = json.Unmarshal(byteValue, &items) 202 | if err != nil { 203 | log.Fatal("unmarshalling error", err) 204 | return items 205 | } 206 | return items 207 | } 208 | 209 | func main() { 210 | // WARNING!!! change to the path of your model, labels and image 211 | modelPath := "/go/src/machine-learning-with-go/data/ssd_mobilenet_v1_coco_2018_01_28/saved_model" 212 | labelsPath := "/go/src/machine-learning-with-go/data/ssd_mobilenet_labels/labels.json" 213 | imagePath := "/go/src/machine-learning-with-go/data/office.png" 214 | Prediction(modelPath, imagePath, labelsPath) 215 | } 216 | -------------------------------------------------------------------------------- /machine-learning-with-go/ml_with_go/data/ssd_mobilenet_labels/labels.json: -------------------------------------------------------------------------------- 1 | [ { 2 | "name": "/m/01g317", 3 | "id": 1, 4 | "display_name": "person" 5 | }, 6 | { 7 | "name": "/m/0199g", 8 | "id": 2, 9 | "display_name": "bicycle" 10 | }, 11 | { 12 | "name": "/m/0k4j", 13 | "id": 3, 14 | "display_name": "car" 15 | }, 16 | { 17 | "name": "/m/04_sv", 18 | "id": 4, 19 | "display_name": "motorcycle" 20 | }, 21 | { 22 | "name": "/m/05czz6l", 23 | "id": 5, 24 | "display_name": "airplane" 25 | }, 26 | { 27 | "name": "/m/01bjv", 28 | "id": 6, 29 | "display_name": "bus" 30 | }, 31 | { 32 | "name": "/m/07jdr", 33 | "id": 7, 34 | "display_name": "train" 35 | }, 36 | { 37 | "name": "/m/07r04", 38 | "id": 8, 39 | "display_name": "truck" 40 | }, 41 | { 42 | "name": "/m/019jd", 43 | "id": 9, 44 | "display_name": "boat" 45 | }, 46 | { 47 | "name": "/m/015qff", 48 | "id": 10, 49 | "display_name": "traffic light" 50 | }, 51 | { 52 | "name": "/m/01pns0", 53 | "id": 11, 54 | "display_name": "fire hydrant" 55 | }, 56 | { 57 | "name": "/m/02pv19", 58 | "id": 13, 59 | "display_name": "stop sign" 60 | }, 61 | { 62 | "name": "/m/015qbp", 63 | "id": 14, 64 | "display_name": "parking meter" 65 | }, 66 | { 67 | "name": "/m/0cvnqh", 68 | "id": 15, 69 | "display_name": "bench" 70 | }, 71 | { 72 | "name": "/m/015p6", 73 | "id": 16, 74 | "display_name": "bird" 75 | }, 76 | { 77 | "name": "/m/01yrx", 78 | "id": 17, 79 | "display_name": "cat" 80 | }, 81 | { 82 | "name": "/m/0bt9lr", 83 | "id": 18, 84 | "display_name": "dog" 85 | }, 86 | { 87 | "name": "/m/03k3r", 88 | "id": 19, 89 | "display_name": "horse" 90 | }, 91 | { 92 | "name": "/m/07bgp", 93 | "id": 20, 94 | "display_name": "sheep" 95 | }, 96 | { 97 | "name": "/m/01xq0k1", 98 | "id": 21, 99 | "display_name": "cow" 100 | }, 101 | { 102 | "name": "/m/0bwd_0j", 103 | "id": 22, 104 | "display_name": "elephant" 105 | }, 106 | { 107 | "name": "/m/01dws", 108 | "id": 23, 109 | "display_name": "bear" 110 | }, 111 | { 112 | "name": "/m/0898b", 113 | "id": 24, 114 | "display_name": "zebra" 115 | }, 116 | { 117 | "name": "/m/03bk1", 118 | "id": 25, 119 | "display_name": "giraffe" 120 | }, 121 | { 122 | "name": "/m/01940j", 123 | "id": 27, 124 | "display_name": "backpack" 125 | }, 126 | { 127 | "name": "/m/0hnnb", 128 | "id": 28, 129 | "display_name": "umbrella" 130 | }, 131 | { 132 | "name": "/m/080hkjn", 133 | "id": 31, 134 | "display_name": "handbag" 135 | }, 136 | { 137 | "name": "/m/01rkbr", 138 | "id": 32, 139 | "display_name": "tie" 140 | }, 141 | { 142 | "name": "/m/01s55n", 143 | "id": 33, 144 | "display_name": "suitcase" 145 | }, 146 | { 147 | "name": "/m/02wmf", 148 | "id": 34, 149 | "display_name": "frisbee" 150 | }, 151 | { 152 | "name": "/m/071p9", 153 | "id": 35, 154 | "display_name": "skis" 155 | }, 156 | { 157 | "name": "/m/06__v", 158 | "id": 36, 159 | "display_name": "snowboard" 160 | }, 161 | { 162 | "name": "/m/018xm", 163 | "id": 37, 164 | "display_name": "sports ball" 165 | }, 166 | { 167 | "name": "/m/02zt3", 168 | "id": 38, 169 | "display_name": "kite" 170 | }, 171 | { 172 | "name": "/m/03g8mr", 173 | "id": 39, 174 | "display_name": "baseball bat" 175 | }, 176 | { 177 | "name": "/m/03grzl", 178 | "id": 40, 179 | "display_name": "baseball glove" 180 | }, 181 | { 182 | "name": "/m/06_fw", 183 | "id": 41, 184 | "display_name": "skateboard" 185 | }, 186 | { 187 | "name": "/m/019w40", 188 | "id": 42, 189 | "display_name": "surfboard" 190 | }, 191 | { 192 | "name": "/m/0dv9c", 193 | "id": 43, 194 | "display_name": "tennis racket" 195 | }, 196 | { 197 | "name": "/m/04dr76w", 198 | "id": 44, 199 | "display_name": "bottle" 200 | }, 201 | { 202 | "name": "/m/09tvcd", 203 | "id": 46, 204 | "display_name": "wine glass" 205 | }, 206 | { 207 | "name": "/m/08gqpm", 208 | "id": 47, 209 | "display_name": "cup" 210 | }, 211 | { 212 | "name": "/m/0dt3t", 213 | "id": 48, 214 | "display_name": "fork" 215 | }, 216 | { 217 | "name": "/m/04ctx", 218 | "id": 49, 219 | "display_name": "knife" 220 | }, 221 | { 222 | "name": "/m/0cmx8", 223 | "id": 50, 224 | "display_name": "spoon" 225 | }, 226 | { 227 | "name": "/m/04kkgm", 228 | "id": 51, 229 | "display_name": "bowl" 230 | }, 231 | { 232 | "name": "/m/09qck", 233 | "id": 52, 234 | "display_name": "banana" 235 | }, 236 | { 237 | "name": "/m/014j1m", 238 | "id": 53, 239 | "display_name": "apple" 240 | }, 241 | { 242 | "name": "/m/0l515", 243 | "id": 54, 244 | "display_name": "sandwich" 245 | }, 246 | { 247 | "name": "/m/0cyhj_", 248 | "id": 55, 249 | "display_name": "orange" 250 | }, 251 | { 252 | "name": "/m/0hkxq", 253 | "id": 56, 254 | "display_name": "broccoli" 255 | }, 256 | { 257 | "name": "/m/0fj52s", 258 | "id": 57, 259 | "display_name": "carrot" 260 | }, 261 | { 262 | "name": "/m/01b9xk", 263 | "id": 58, 264 | "display_name": "hot dog" 265 | }, 266 | { 267 | "name": "/m/0663v", 268 | "id": 59, 269 | "display_name": "pizza" 270 | }, 271 | { 272 | "name": "/m/0jy4k", 273 | "id": 60, 274 | "display_name": "donut" 275 | }, 276 | { 277 | "name": "/m/0fszt", 278 | "id": 61, 279 | "display_name": "cake" 280 | }, 281 | { 282 | "name": "/m/01mzpv", 283 | "id": 62, 284 | "display_name": "chair" 285 | }, 286 | { 287 | "name": "/m/02crq1", 288 | "id": 63, 289 | "display_name": "couch" 290 | }, 291 | { 292 | "name": "/m/03fp41", 293 | "id": 64, 294 | "display_name": "potted plant" 295 | }, 296 | { 297 | "name": "/m/03ssj5", 298 | "id": 65, 299 | "display_name": "bed" 300 | }, 301 | { 302 | "name": "/m/04bcr3", 303 | "id": 67, 304 | "display_name": "dining table" 305 | }, 306 | { 307 | "name": "/m/09g1w", 308 | "id": 70, 309 | "display_name": "toilet" 310 | }, 311 | { 312 | "name": "/m/07c52", 313 | "id": 72, 314 | "display_name": "tv" 315 | }, 316 | { 317 | "name": "/m/01c648", 318 | "id": 73, 319 | "display_name": "laptop" 320 | }, 321 | { 322 | "name": "/m/020lf", 323 | "id": 74, 324 | "display_name": "mouse" 325 | }, 326 | { 327 | "name": "/m/0qjjc", 328 | "id": 75, 329 | "display_name": "remote" 330 | }, 331 | { 332 | "name": "/m/01m2v", 333 | "id": 76, 334 | "display_name": "keyboard" 335 | }, 336 | { 337 | "name": "/m/050k8", 338 | "id": 77, 339 | "display_name": "cell phone" 340 | }, 341 | { 342 | "name": "/m/0fx9l", 343 | "id": 78, 344 | "display_name": "microwave" 345 | }, 346 | { 347 | "name": "/m/029bxz", 348 | "id": 79, 349 | "display_name": "oven" 350 | }, 351 | { 352 | "name": "/m/01k6s3", 353 | "id": 80, 354 | "display_name": "toaster" 355 | }, 356 | { 357 | "name": "/m/0130jx", 358 | "id": 81, 359 | "display_name": "sink" 360 | }, 361 | { 362 | "name": "/m/040b_t", 363 | "id": 82, 364 | "display_name": "refrigerator" 365 | }, 366 | { 367 | "name": "/m/0bt_c3", 368 | "id": 84, 369 | "display_name": "book" 370 | }, 371 | { 372 | "name": "/m/01x3z", 373 | "id": 85, 374 | "display_name": "clock" 375 | }, 376 | { 377 | "name": "/m/02s195", 378 | "id": 86, 379 | "display_name": "vase" 380 | }, 381 | { 382 | "name": "/m/01lsmm", 383 | "id": 87, 384 | "display_name": "scissors" 385 | }, 386 | { 387 | "name": "/m/0kmg4", 388 | "id": 88, 389 | "display_name": "teddy bear" 390 | }, 391 | { 392 | "name": "/m/03wvsk", 393 | "id": 89, 394 | "display_name": "hair drier" 395 | }, 396 | { 397 | "name": "/m/012xff", 398 | "id": 90, 399 | "display_name": "toothbrush" 400 | } 401 | ] -------------------------------------------------------------------------------- /machine-learning-with-go/ml_with_go/data/5kings_battles_v1.csv: -------------------------------------------------------------------------------- 1 | name,year,battle_number,attacker_king,defender_king,attacker_1,attacker_2,attacker_3,attacker_4,defender_1,defender_2,attacker_outcome,battle_type,major_death,major_capture,attacker_size,defender_size,attacker_commander,defender_commander,summer,location,region,note 2 | Battle of the Golden Tooth,298,1,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Tully,,win,pitched battle,1,0,15000,4000,Jaime Lannister,"Clement Piper, Vance",1,Golden Tooth,The Westerlands, 3 | Battle at the Mummer's Ford,298,2,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Baratheon,,win,ambush,1,0,,120,Gregor Clegane,Beric Dondarrion,1,Mummer's Ford,The Riverlands, 4 | Battle of Riverrun,298,3,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Tully,,win,pitched battle,0,1,15000,10000,"Jaime Lannister, Andros Brax","Edmure Tully, Tytos Blackwood",1,Riverrun,The Riverlands, 5 | Battle of the Green Fork,298,4,Robb Stark,Joffrey/Tommen Baratheon,Stark,,,,Lannister,,loss,pitched battle,1,1,18000,20000,"Roose Bolton, Wylis Manderly, Medger Cerwyn, Harrion Karstark, Halys Hornwood","Tywin Lannister, Gregor Clegane, Kevan Lannister, Addam Marbrand",1,Green Fork,The Riverlands, 6 | Battle of the Whispering Wood,298,5,Robb Stark,Joffrey/Tommen Baratheon,Stark,Tully,,,Lannister,,win,ambush,1,1,1875,6000,"Robb Stark, Brynden Tully",Jaime Lannister,1,Whispering Wood,The Riverlands, 7 | Battle of the Camps,298,6,Robb Stark,Joffrey/Tommen Baratheon,Stark,Tully,,,Lannister,,win,ambush,0,0,6000,12625,"Robb Stark, Tytos Blackwood, Brynden Tully","Lord Andros Brax, Forley Prester",1,Riverrun,The Riverlands, 8 | Sack of Darry,298,7,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Darry,,win,pitched battle,0,0,,,Gregor Clegane,Lyman Darry,1,Darry,The Riverlands, 9 | Battle of Moat Cailin,299,8,Balon/Euron Greyjoy,Robb Stark,Greyjoy,,,,Stark,,win,pitched battle,0,0,,,Victarion Greyjoy,,1,Moat Cailin,The North, 10 | Battle of Deepwood Motte,299,9,Balon/Euron Greyjoy,Robb Stark,Greyjoy,,,,Stark,,win,siege,0,0,1000,,Asha Greyjoy,,1,Deepwood Motte,The North, 11 | Battle of the Stony Shore,299,10,Balon/Euron Greyjoy,Robb Stark,Greyjoy,,,,Stark,,win,ambush,0,0,264,,Theon Greyjoy,,1,Stony Shore,The North,"Greyjoy's troop number based on the Battle of Deepwood Motte, in which Asha had 1000 soldier on 30 longships. That comes out to ~33 per longship. In the Battle of the Stony Shore, Theon has 8 longships, and just we can estimate that he has 8*33 =265 troops." 12 | Battle of Torrhen's Square,299,11,Robb Stark,Balon/Euron Greyjoy,Stark,,,,Greyjoy,,win,pitched battle,0,0,244,900,"Rodrik Cassel, Cley Cerwyn",Dagmer Cleftjaw,1,Torrhen's Square,The North,Greyjoy's troop number comes from the 264 estimate to have arrived on the stony shore minus the 20 Theon takes to attack Winterfell. Thus 264-20=244 13 | Battle of Winterfell,299,12,Balon/Euron Greyjoy,Robb Stark,Greyjoy,,,,Stark,,win,ambush,0,1,20,,Theon Greyjoy,Bran Stark,1,Winterfell,The North,"It isn't mentioned how many Stark men are left in Winterfell, other than ""very few""." 14 | Sack of Torrhen's Square,299,13,Balon/Euron Greyjoy,Balon/Euron Greyjoy,Greyjoy,,,,Stark,,win,siege,0,1,,,Dagmer Cleftjaw,,1,Torrhen's Square,The North, 15 | Sack of Winterfell,299,14,Joffrey/Tommen Baratheon,Robb Stark,Bolton,Greyjoy,,,Stark,,win,ambush,1,0,618,2000,"Ramsay Snow, Theon Greyjoy ","Rodrik Cassel, Cley Cerwyn, Leobald Tallhart",1,Winterfell,The North,"Since House Bolton betrays the Starks for House Lannister, we code this battle as between these two houses. Greyjoy men, numbering only 20, don't play a major part in the fighting and end up dying anyway." 16 | Battle of Oxcross,299,15,Robb Stark,Joffrey/Tommen Baratheon,Stark,Tully,,,Lannister,,win,ambush,1,1,6000,10000,"Robb Stark, Brynden Tully","Stafford Lannister, Roland Crakehall, Antario Jast",1,Oxcross,The Westerlands, 17 | Siege of Storm's End,299,16,Stannis Baratheon,Renly Baratheon,Baratheon,,,,Baratheon,,win,siege,1,0,5000,20000,"Stannis Baratheon, Davos Seaworth","Renly Baratheon, Cortnay Penrose, Loras Tyrell, Randyll Tarly, Mathis Rowan",1,Storm's End,The Stormlands, 18 | Battle of the Fords,299,17,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Tully,,loss,pitched battle,0,0,20000,10000,"Tywin Lannister, Flement Brax, Gregor Clegane, Addam Marbrand, Lyle Crakehall, Leo Lefford","Edmure Tully, Jason Mallister, Karyl Vance",1,Red Fork,The Riverlands, 19 | Sack of Harrenhal,299,18,Robb Stark,Joffrey/Tommen Baratheon,Stark,,,,Lannister,,win,ambush,1,0,100,100,"Roose Bolton, Vargo Hoat, Robett Glover",Amory Lorch,1,Harrenhal,The Riverlands, 20 | Battle of the Crag,299,19,Robb Stark,Joffrey/Tommen Baratheon,Stark,,,,Lannister,,win,ambush,0,0,6000,,"Robb Stark, Smalljon Umber, Black Walder Frey",Rolph Spicer,1,Crag,The Westerlands, 21 | Battle of the Blackwater,299,20,Stannis Baratheon,Joffrey/Tommen Baratheon,Baratheon,,,,Lannister,,loss,pitched battle,1,1,21000,7250,"Stannis Baratheon, Imry Florent, Guyard Morrigen, Rolland Storm, Salladhor Saan, Davos Seaworth","Tyrion Lannister, Jacelyn Bywater, Sandor Clegane, Tywin Lannister, Garlan Tyrell, Mace Tyrell, Randyll Tarly",1,King's Landing,The Crownlands, 22 | Siege of Darry,299,21,Robb Stark,Joffrey/Tommen Baratheon,Darry,,,,Lannister,,win,siege,0,0,,,Helman Tallhart,,1,Darry,The Riverlands, 23 | Battle of Duskendale,299,22,Robb Stark,Joffrey/Tommen Baratheon,Stark,,,,Lannister,,loss,pitched battle,1,0,3000,,"Robertt Glover, Helman Tallhart","Randyll Tarly, Gregor Clegane",1,Duskendale,The Crownlands, 24 | Battle of the Burning Septry,299,23,,,Brotherhood without Banners,,,,Brave Companions,,win,pitched battle,0,0,,,,,1,,The Riverlands, 25 | Battle of the Ruby Ford,299,24,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Stark,,win,pitched battle,0,0,,6000,Gregor Clegane,"Roose Bolton, Wylis Manderly",,Ruby Ford,The Riverlands, 26 | Retaking of Harrenhal,299,25,Joffrey/Tommen Baratheon,,Lannister,,,,Brave Companions,,win,pitched battle,1,0,,,Gregor Clegane,Vargo Hoat,1,Harrenhal,The Riverlands, 27 | The Red Wedding,299,26,Joffrey/Tommen Baratheon,Robb Stark,Frey,Bolton,,,Stark,,win,ambush,1,1,3500,3500,"Walder Frey, Roose Bolton, Walder Rivers",Robb Stark,1,The Twins,The Riverlands,"This observation refers to the battle against the Stark men, not the attack on the wedding" 28 | Siege of Seagard,299,27,Robb Stark,Joffrey/Tommen Baratheon,Frey,,,,Mallister,,win,siege,0,1,,,Walder Frey,Jason Mallister,1,Seagard,The Riverlands, 29 | Battle of Castle Black,300,28,Stannis Baratheon,Mance Rayder,Free folk,Thenns,Giants,,Night's Watch,Baratheon,loss,siege,1,1,100000,1240,"Mance Rayder, Tormund Giantsbane, Harma Dogshead, Magnar Styr, Varamyr","Stannis Baratheon, Jon Snow, Donal Noye, Cotter Pyke",0,Castle Black,Beyond the Wall, 30 | Fall of Moat Cailin,300,29,Joffrey/Tommen Baratheon,Balon/Euron Greyjoy,Bolton,,,,Greyjoy,,win,siege,0,0,,,Ramsey Bolton,,0,Moat Cailin,The North, 31 | Sack of Saltpans,300,30,,,Brave Companions,,,,,,win,razing,0,0,,,Rorge,,0,Saltpans,The Riverlands, 32 | Retaking of Deepwood Motte,300,31,Stannis Baratheon,Balon/Euron Greyjoy,Baratheon,Karstark,Mormont,Glover,Greyjoy,,win,pitched battle,0,0,4500,200,"Stannis Baratheon, Alysane Mormot",Asha Greyjoy,0,Deepwood Motte,The North, 33 | Battle of the Shield Islands,300,32,Balon/Euron Greyjoy,Joffrey/Tommen Baratheon,Greyjoy,,,,Tyrell,,win,pitched battle,0,0,,,"Euron Greyjoy, Victarion Greyjoy",,0,Shield Islands,The Reach, 34 | "Invasion of Ryamsport, Vinetown, and Starfish Harbor",300,33,Balon/Euron Greyjoy,Joffrey/Tommen Baratheon,Greyjoy,,,,Tyrell,,win,razing,0,0,,,"Euron Greyjoy, Victarion Greyjoy",,0,"Ryamsport, Vinetown, Starfish Harbor",The Reach, 35 | Second Seige of Storm's End,300,34,Joffrey/Tommen Baratheon,Stannis Baratheon,Baratheon,,,,Baratheon,,win,siege,0,0,,200,"Mace Tyrell, Mathis Rowan",Gilbert Farring,0,Storm's End,The Stormlands, 36 | Siege of Dragonstone,300,35,Joffrey/Tommen Baratheon,Stannis Baratheon,Baratheon,,,,Baratheon,,win,siege,0,0,2000,,"Loras Tyrell, Raxter Redwyne",Rolland Storm,0,Dragonstone,The Stormlands, 37 | Siege of Riverrun,300,36,Joffrey/Tommen Baratheon,Robb Stark,Lannister,Frey,,,Tully,,win,siege,0,0,3000,,"Daven Lannister, Ryman Fey, Jaime Lannister",Brynden Tully,0,Riverrun,The Riverlands, 38 | Siege of Raventree,300,37,Joffrey/Tommen Baratheon,Robb Stark,Bracken,Lannister,,,Blackwood,,win,siege,0,1,1500,,"Jonos Bracken, Jaime Lannister",Tytos Blackwood,0,Raventree,The Riverlands, 39 | Siege of Winterfell,300,38,Stannis Baratheon,Joffrey/Tommen Baratheon,Baratheon,Karstark,Mormont,Glover,Bolton,Frey,,,,,5000,8000,Stannis Baratheon,Roose Bolton,0,Winterfell,The North, --------------------------------------------------------------------------------