├── Chapter02 ├── Height Histogram.png ├── Index Histogram.png ├── Most Common Words.ipynb ├── Pre-processing BMI Data.ipynb ├── Visualising BMI Data.ipynb ├── Weight Histogram.png └── height_hist.png ├── Chapter03 ├── Create a Linear Regression 2.ipynb ├── Create a Linear Regression.ipynb ├── Train a Deep Learning Classifier.ipynb ├── Train a Random Forest.ipynb ├── Train a logistic classifier.ipynb ├── Train an SVM classifier.ipynb ├── download-fashion-mnist.sh └── download-housing.sh ├── Chapter04 ├── Clustering Scatter.jpg ├── PCA Scatter.jpg ├── Perform Clustering Using K-Means.ipynb ├── Use PCA For Dimensionality Reduction.ipynb └── download-iris.sh ├── Chapter05 ├── Dockerfile ├── Using HTTP to Run a Python Model.ipynb ├── Using os exec to Run a Python Model.ipynb ├── install-python-dependencies.sh ├── main.go ├── model.pickle ├── model.py ├── model_http.py ├── requirements.txt ├── run.sh └── saved_model │ ├── saved_model.pb │ └── variables │ ├── variables.data-00000-of-00001 │ └── variables.index ├── LICENSE ├── README.md └── datasets ├── LICENSE ├── README.md ├── bmi ├── 500_Person_Gender_Height_Weight_Index.csv └── SOCR_Data_MLB_HeightsWeights.csv ├── download-datasets.sh └── start-gophernotes.sh /Chapter02/Height Histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-with-Go-Quick-Start-Guide/f42e51ab1141e7df39cd01a8a4ee81728156b9c7/Chapter02/Height Histogram.png -------------------------------------------------------------------------------- /Chapter02/Index Histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-with-Go-Quick-Start-Guide/f42e51ab1141e7df39cd01a8a4ee81728156b9c7/Chapter02/Index Histogram.png -------------------------------------------------------------------------------- /Chapter02/Most Common Words.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import (\n", 17 | " \"fmt\"\n", 18 | " \"io/ioutil\"\n", 19 | " \"github.com/kniren/gota/dataframe\"\n", 20 | " \"github.com/kniren/gota/series\"\n", 21 | " \"strings\"\n", 22 | " \"strconv\"\n", 23 | ")" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 58, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "var kitchenReviews = \"../datasets/words/processed_acl/kitchen\"" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "Load the data" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 59, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "positives, err := ioutil.ReadFile(kitchenReviews + \"/positive.review\")\n", 49 | "negatives, err2 := ioutil.ReadFile(kitchenReviews + \"/negative.review\")\n", 50 | "if err != nil || err2 != nil {\n", 51 | " fmt.Println(\"Error(s)\", err, err2)\n", 52 | "}" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "The data consists of word(s):frequency pairs separated by spaces:" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 60, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "them_it:1 hovering:1 and_occasional:1 cousin_the:2 fictional_baudelaire:1 their_struggles:1 unfortun" 71 | ] 72 | }, 73 | "execution_count": 60, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "string(positives)[0:100]" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Create a struct to hold the word/frequency pair and use gota's LoadStructs() to convert this to a dataframe" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 61, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "pairsPositive := strings.Split(strings.Replace(string(positives), \"\\n\", \" \", -1), \" \")\n", 96 | "pairsNegative := strings.Split(strings.Replace(string(negatives), \"\\n\", \" \", -1), \" \")" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 62, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "Positive pairs 132222\n", 109 | "Negative Pairs 132222\n", 110 | "Example pair: `them_it:1`" 111 | ] 112 | }, 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "25 " 117 | ] 118 | }, 119 | "execution_count": 62, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "fmt.Println(\"Positive pairs\", len(pairsPositive))\n", 126 | "fmt.Println(\"Negative Pairs\", len(pairsPositive))\n", 127 | "fmt.Printf(\"Example pair: `%s`\", pairsPositive[0])" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 63, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "type Pair struct {\n", 137 | " Phrase string \n", 138 | " Frequency int\n", 139 | "}\n", 140 | "\n", 141 | "// pairsAndFiltesr returns a slice of Pair, split by : to obtain the phrase and frequency,\n", 142 | "// as well as a map of the phrases that can be used as a lookup table later.\n", 143 | "func pairsAndFilters(splitPairs []string) ([]Pair, map[string]bool) {\n", 144 | " var (\n", 145 | " pairs []Pair\n", 146 | " m map[string]bool\n", 147 | " )\n", 148 | " m = make(map[string]bool)\n", 149 | " for _, pair := range splitPairs {\n", 150 | " p := strings.Split(pair, \":\")\n", 151 | " phrase := p[0]\n", 152 | " m[phrase] = true\n", 153 | " if len(p) < 2 {\n", 154 | " continue\n", 155 | " }\n", 156 | " freq, err := strconv.Atoi(p[1])\n", 157 | " if err != nil {\n", 158 | " continue\n", 159 | " }\n", 160 | " pairs = append(pairs, Pair{\n", 161 | " Phrase: phrase,\n", 162 | " Frequency: freq,\n", 163 | " })\n", 164 | " }\n", 165 | " return pairs, m\n", 166 | "}\n", 167 | "\n", 168 | "// exclude returns a slice of Pair that does not contain the phrases in the exclusion map\n", 169 | "func exclude(pairs []Pair, exclusions map[string]bool) []Pair{\n", 170 | " var ret []Pair \n", 171 | " for i := range pairs{\n", 172 | " if !exclusions[pairs[i].Phrase]{\n", 173 | " ret = append(ret, pairs[i])\n", 174 | " }\n", 175 | " }\n", 176 | " return ret\n", 177 | "}\n" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 64, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "parsedPositives, posPhrases := pairsAndFilters(pairsPositive)\n", 187 | "parsedNegatives, negPhrases := pairsAndFilters(pairsNegative)\n", 188 | "parsedPositives = exclude(parsedPositives, negPhrases)\n", 189 | "parsedNegatives = exclude(parsedNegatives, posPhrases)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 65, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "dfPos := dataframe.LoadStructs(parsedPositives)\n", 199 | "dfNeg := dataframe.LoadStructs(parsedNegatives)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 66, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "dfPos = dfPos.Arrange(dataframe.RevSort(\"Frequency\"))\n", 209 | "dfNeg = dfNeg.Arrange(dataframe.RevSort(\"Frequency\"))" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 67, 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "name": "stdout", 219 | "output_type": "stream", 220 | "text": [ 221 | "[46383x2] DataFrame\n", 222 | "\n", 223 | " Phrase Frequency\n", 224 | " 0: tic-tac-toe 10 \n", 225 | " 1: wusthoff 7 \n", 226 | " 2: emperor 7 \n", 227 | " 3: shot_glasses 6 \n", 228 | " 4: pulp 6 \n", 229 | " 5: games 6 \n", 230 | " 6: sentry 6 \n", 231 | " 7: gravel 6 \n", 232 | " 8: the_emperor 5 \n", 233 | " 9: aebleskivers 5 \n", 234 | " ... ... \n", 235 | " \n", 236 | "\n" 237 | ] 238 | }, 239 | { 240 | "data": { 241 | "text/plain": [ 242 | "373 " 243 | ] 244 | }, 245 | "execution_count": 67, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "//most common words in positive reviews\n", 252 | "fmt.Println(dfPos)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 68, 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | "[45760x2] DataFrame\n", 265 | "\n", 266 | " Phrase Frequency\n", 267 | " 0: seeds 9 \n", 268 | " 1: perculator 7 \n", 269 | " 2: probes 7 \n", 270 | " 3: cork 7 \n", 271 | " 4: coffee_tank 5 \n", 272 | " 5: brookstone 5 \n", 273 | " 6: convection_oven 5 \n", 274 | " 7: black_goo 5 \n", 275 | " 8: waring_pro 5 \n", 276 | " 9: packs 5 \n", 277 | " ... ... \n", 278 | " \n", 279 | "\n" 280 | ] 281 | }, 282 | { 283 | "data": { 284 | "text/plain": [ 285 | "412 " 286 | ] 287 | }, 288 | "execution_count": 68, 289 | "metadata": {}, 290 | "output_type": "execute_result" 291 | } 292 | ], 293 | "source": [ 294 | "// most common words in negative reviews\n", 295 | "fmt.Println(dfNeg)" 296 | ] 297 | } 298 | ], 299 | "metadata": { 300 | "kernelspec": { 301 | "display_name": "Go", 302 | "language": "go", 303 | "name": "gophernotes" 304 | }, 305 | "language_info": { 306 | "codemirror_mode": "", 307 | "file_extension": ".go", 308 | "mimetype": "", 309 | "name": "go", 310 | "nbconvert_exporter": "", 311 | "pygments_lexer": "", 312 | "version": "go1.11.4" 313 | } 314 | }, 315 | "nbformat": 4, 316 | "nbformat_minor": 2 317 | } 318 | -------------------------------------------------------------------------------- /Chapter02/Pre-processing BMI Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pre-processing BMI Data\n", 8 | "\n", 9 | "In this example, we will process height/weight data for MLB players using Gota, demonstrating how little code this takes. We will remove unnecessary columns, normalise columns, and convert data types in a few lines of code." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 3, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import (\n", 19 | " \"fmt\"\n", 20 | " \"github.com/kniren/gota/dataframe\"\n", 21 | " \"github.com/kniren/gota/series\"\n", 22 | " \"io/ioutil\"\n", 23 | " \"bytes\"\n", 24 | " \"math/rand\"\n", 25 | ")" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 4, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "const path = \"../datasets/bmi/SOCR_Data_MLB_HeightsWeights.csv\"" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 5, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "b, err := ioutil.ReadFile(path)\n", 44 | "if err != nil {\n", 45 | " fmt.Println(\"Error!\", err)\n", 46 | "}\n", 47 | "df := dataframe.ReadCSV(bytes.NewReader(b))" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 6, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "[1034x6] DataFrame\n", 59 | "\n", 60 | " Name Team Position Height(inches) Weight(pounds) ...\n", 61 | " 0: Adam_Donachie BAL Catcher 74 180 ...\n", 62 | " 1: Paul_Bako BAL Catcher 74 215 ...\n", 63 | " 2: Ramon_Hernandez BAL Catcher 72 210 ...\n", 64 | " 3: Kevin_Millar BAL First_Baseman 72 210 ...\n", 65 | " 4: Chris_Gomez BAL First_Baseman 73 188 ...\n", 66 | " 5: Brian_Roberts BAL Second_Baseman 69 176 ...\n", 67 | " 6: Miguel_Tejada BAL Shortstop 69 209 ...\n", 68 | " 7: Melvin_Mora BAL Third_Baseman 71 200 ...\n", 69 | " 8: Aubrey_Huff BAL Third_Baseman 76 231 ...\n", 70 | " 9: Adam_Stern BAL Outfielder 71 180 ...\n", 71 | " ... ... ... ... ... ...\n", 72 | " ...\n", 73 | "\n", 74 | "Not Showing: Age \n" 75 | ] 76 | }, 77 | "execution_count": 6, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "df" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 7, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "df = df.Select([]string{\"Position\", \"Height(inches)\", \"Weight(pounds)\", \"Age\"})\n", 93 | "df = df.Rename(\"Height\",\"Height(inches)\")\n", 94 | "df = df.Rename(\"Weight\",\"Weight(pounds)\")" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 8, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "[1034x4] DataFrame\n", 106 | "\n", 107 | " Position Height Weight Age \n", 108 | " 0: Catcher 74 180 22.990000\n", 109 | " 1: Catcher 74 215 34.690000\n", 110 | " 2: Catcher 72 210 30.780000\n", 111 | " 3: First_Baseman 72 210 35.430000\n", 112 | " 4: First_Baseman 73 188 35.710000\n", 113 | " 5: Second_Baseman 69 176 29.390000\n", 114 | " 6: Shortstop 69 209 30.770000\n", 115 | " 7: Third_Baseman 71 200 35.070000\n", 116 | " 8: Third_Baseman 76 231 30.190000\n", 117 | " 9: Outfielder 71 180 27.050000\n", 118 | " ... ... ... ... \n", 119 | " \n" 120 | ] 121 | }, 122 | "execution_count": 8, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "df" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 9, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "ename": "ERROR", 138 | "evalue": "reflect.Value.Convert: value of type reflect.Value cannot be converted to type series.Type", 139 | "output_type": "error", 140 | "traceback": [ 141 | "reflect.Value.Convert: value of type reflect.Value cannot be converted to type series.Type" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "df = df.Mutate(series.New(df.Col(\"Height\"), series.Float, \"Height\"))\n", 147 | "df = df.Mutate(series.New(df.Col(\"Weight\"), series.Float, \"Weight\"))" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 10, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "ename": "ERROR", 157 | "evalue": "reflect.Value.Convert: value of type reflect.Value cannot be converted to type series.Comparator", 158 | "output_type": "error", 159 | "traceback": [ 160 | "reflect.Value.Convert: value of type reflect.Value cannot be converted to type series.Comparator" 161 | ] 162 | } 163 | ], 164 | "source": [ 165 | "df = df.Filter(dataframe.F{\"Weight\", \"<\", 260})" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 11, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "data": { 175 | "text/plain": [ 176 | "67" 177 | ] 178 | }, 179 | "execution_count": 11, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": [ 185 | "df.Col(\"Height\").Min()" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 12, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "// rescale maps the given column values onto the range [0,1]\n", 195 | "func rescale(df dataframe.DataFrame, col string) dataframe.DataFrame {\n", 196 | " s := df.Col(col)\n", 197 | " min := s.Min()\n", 198 | " max := s.Max()\n", 199 | " v := make([]float64, s.Len(), s.Len())\n", 200 | " for i := 0; i < s.Len(); i++ {\n", 201 | " v[i] = (s.Elem(i).Float()-min)/(max-min)\n", 202 | " }\n", 203 | " rs := series.Floats(v)\n", 204 | " rs.Name = col\n", 205 | " return df.Mutate(rs)\n", 206 | "}\n", 207 | "\n", 208 | "// meanNormalise maps the given column values onto the range [-1,1] by subtracting mean and dividing by max - min\n", 209 | "func meanNormalise(df dataframe.DataFrame, col string) dataframe.DataFrame {\n", 210 | " s := df.Col(col)\n", 211 | " min := s.Min()\n", 212 | " max := s.Max()\n", 213 | " mean := s.Mean()\n", 214 | " v := make([]float64, s.Len(), s.Len())\n", 215 | " for i := 0; i < s.Len(); i++ {\n", 216 | " v[i] = (s.Elem(i).Float()-mean)/(max-min)\n", 217 | " }\n", 218 | " rs := series.Floats(v)\n", 219 | " rs.Name = col\n", 220 | " return df.Mutate(rs)\n", 221 | "}\n", 222 | "\n", 223 | "// meanNormalise maps the given column values onto the range [-1,1] by subtracting mean and dividing by max - min\n", 224 | "func standardise(df dataframe.DataFrame, col string) dataframe.DataFrame {\n", 225 | " s := df.Col(col)\n", 226 | " std := s.StdDev()\n", 227 | " mean := s.Mean()\n", 228 | " v := make([]float64, s.Len(), s.Len())\n", 229 | " for i := 0; i < s.Len(); i++ {\n", 230 | " v[i] = (s.Elem(i).Float()-mean)/std\n", 231 | " }\n", 232 | " rs := series.Floats(v)\n", 233 | " rs.Name = col\n", 234 | " return df.Mutate(rs)\n", 235 | "}" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 13, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "df = rescale(df, \"Height\")\n", 245 | "df = rescale(df, \"Weight\")" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 14, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "text/plain": [ 256 | "[1034x4] DataFrame\n", 257 | "\n", 258 | " Position Height Weight Age \n", 259 | " 0: Catcher 0.437500 0.214286 22.990000\n", 260 | " 1: Catcher 0.437500 0.464286 34.690000\n", 261 | " 2: Catcher 0.312500 0.428571 30.780000\n", 262 | " 3: First_Baseman 0.312500 0.428571 35.430000\n", 263 | " 4: First_Baseman 0.375000 0.271429 35.710000\n", 264 | " 5: Second_Baseman 0.125000 0.185714 29.390000\n", 265 | " 6: Shortstop 0.125000 0.421429 30.770000\n", 266 | " 7: Third_Baseman 0.250000 0.357143 35.070000\n", 267 | " 8: Third_Baseman 0.562500 0.578571 30.190000\n", 268 | " 9: Outfielder 0.250000 0.214286 27.050000\n", 269 | " ... ... ... ... \n", 270 | " \n" 271 | ] 272 | }, 273 | "execution_count": 14, 274 | "metadata": {}, 275 | "output_type": "execute_result" 276 | } 277 | ], 278 | "source": [ 279 | "df" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 15, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "perm := rand.Perm(df.Nrow())" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 16, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "ename": "ERROR", 298 | "evalue": "untyped constant {float64 7/10} overflows ", 299 | "output_type": "error", 300 | "traceback": [ 301 | "untyped constant {float64 7/10} overflows " 302 | ] 303 | } 304 | ], 305 | "source": [ 306 | "df.Subset(perm[0:0.7*len(perm)])" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 17, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "data": { 316 | "text/plain": [ 317 | "723" 318 | ] 319 | }, 320 | "execution_count": 17, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "int(0.7*float64(len(perm)))" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 20, 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [ 335 | "// split splits the dataframe into training and validation subsets. valFraction (0 <= valFraction <= 1) of the samples\n", 336 | "// are reserved for validation and the rest are for training. \n", 337 | "func split(df dataframe.DataFrame, valFraction float64) (training dataframe.DataFrame, validation dataframe.DataFrame){\n", 338 | " perm := rand.Perm(df.Nrow())\n", 339 | " cutoff := int(valFraction*float64(len(perm)))\n", 340 | " training = df.Subset(perm[:cutoff])\n", 341 | " validation = df.Subset(perm[cutoff:len(perm)])\n", 342 | " return training, validation\n", 343 | "}" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 21, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/plain": [ 354 | "[723x4] DataFrame\n", 355 | "\n", 356 | " Position Height Weight Age \n", 357 | " 0: Third_Baseman 0.250000 0.500000 27.900000\n", 358 | " 1: Relief_Pitcher 0.375000 0.214286 26.560000\n", 359 | " 2: Relief_Pitcher 0.437500 0.428571 24.490000\n", 360 | " 3: Relief_Pitcher 0.250000 0.142857 26.430000\n", 361 | " 4: Outfielder 0.500000 0.428571 26.670000\n", 362 | " 5: Catcher 0.375000 0.357143 34.070000\n", 363 | " 6: Relief_Pitcher 0.375000 0.221429 29.040000\n", 364 | " 7: First_Baseman 0.500000 0.528571 28.450000\n", 365 | " 8: Catcher 0.375000 0.192857 30.420000\n", 366 | " 9: Relief_Pitcher 0.375000 0.392857 25.650000\n", 367 | " ... ... ... ... \n", 368 | " \n", 369 | " [311x4] DataFrame\n", 370 | "\n", 371 | " Position Height Weight Age \n", 372 | " 0: Relief_Pitcher 0.562500 0.428571 25.890000\n", 373 | " 1: Outfielder 0.125000 0.214286 27.550000\n", 374 | " 2: First_Baseman 0.500000 0.500000 26.890000\n", 375 | " 3: Relief_Pitcher 0.625000 0.535714 29.710000\n", 376 | " 4: Starting_Pitcher 0.562500 0.450000 31.440000\n", 377 | " 5: Outfielder 0.250000 0.357143 24.770000\n", 378 | " 6: Relief_Pitcher 0.625000 0.500000 28.540000\n", 379 | " 7: Relief_Pitcher 0.562500 0.464286 33.900000\n", 380 | " 8: Relief_Pitcher 0.437500 0.285714 25.140000\n", 381 | " 9: Starting_Pitcher 0.437500 0.428571 24.340000\n", 382 | " ... ... ... ... \n", 383 | " \n" 384 | ] 385 | }, 386 | "execution_count": 21, 387 | "metadata": {}, 388 | "output_type": "execute_result" 389 | } 390 | ], 391 | "source": [ 392 | "split(df, 0.7)" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 76, 398 | "metadata": {}, 399 | "outputs": [ 400 | { 401 | "data": { 402 | "text/plain": [ 403 | "[Catcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher First_Baseman First_Baseman First_Baseman First_Baseman First_Baseman Shortstop Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher First_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman Second_Baseman Second_Baseman Second_Baseman Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Second_Baseman Shortstop Shortstop Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman First_Baseman First_Baseman Second_Baseman Second_Baseman Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Second_Baseman Second_Baseman Shortstop Shortstop Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher First_Baseman Second_Baseman Second_Baseman Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Second_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher Catcher First_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman Second_Baseman Second_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Designated_Hitter Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher First_Baseman Second_Baseman Second_Baseman Second_Baseman Shortstop Third_Baseman Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman Second_Baseman Second_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman First_Baseman First_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher First_Baseman Second_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Second_Baseman Shortstop Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Second_Baseman Second_Baseman Second_Baseman Second_Baseman Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman First_Baseman First_Baseman Second_Baseman Second_Baseman Second_Baseman Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Second_Baseman Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher Catcher Catcher First_Baseman Second_Baseman Second_Baseman Shortstop Shortstop Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman Second_Baseman Second_Baseman Shortstop Shortstop Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Designated_Hitter Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Shortstop Shortstop Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman First_Baseman First_Baseman Second_Baseman Shortstop Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Second_Baseman Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Catcher Catcher First_Baseman First_Baseman Second_Baseman Second_Baseman Shortstop Third_Baseman Third_Baseman Outfielder Outfielder Outfielder Outfielder Outfielder Outfielder Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Starting_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher Relief_Pitcher]" 404 | ] 405 | }, 406 | "execution_count": 76, 407 | "metadata": {}, 408 | "output_type": "execute_result" 409 | } 410 | ], 411 | "source": [ 412 | "df.Col(\"Position\")" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 79, 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "func UniqueValues(df dataframe.DataFrame, col string) []string {\n", 422 | " var ret []string\n", 423 | " m := make(map[string]bool)\n", 424 | " for _, val := range df.Col(col).Records() {\n", 425 | " m[val] = true\n", 426 | " }\n", 427 | " for key := range m {\n", 428 | " ret = append(ret, key)\n", 429 | " }\n", 430 | " return ret\n", 431 | "}" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 80, 437 | "metadata": {}, 438 | "outputs": [ 439 | { 440 | "data": { 441 | "text/plain": [ 442 | "[Shortstop Outfielder Starting_Pitcher Relief_Pitcher Second_Baseman First_Baseman Third_Baseman Designated_Hitter Catcher]" 443 | ] 444 | }, 445 | "execution_count": 80, 446 | "metadata": {}, 447 | "output_type": "execute_result" 448 | } 449 | ], 450 | "source": [ 451 | "UniqueValues(df, \"Position\")" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": 111, 457 | "metadata": {}, 458 | "outputs": [], 459 | "source": [ 460 | "func OneHotSeries(df dataframe.DataFrame, col string, vals []string) ([]series.Series){\n", 461 | " m := make(map[string]int)\n", 462 | " s := make([]series.Series, len(vals), len(vals))\n", 463 | " //cache the mapping for performance reasons\n", 464 | " for i := range vals {\n", 465 | " m[vals[i]] = i\n", 466 | " }\n", 467 | " for i := range s {\n", 468 | " vals := make([]int, df.Col(col).Len(),df.Col(col).Len())\n", 469 | " for j, val := range df.Col(col).Records() {\n", 470 | " if i == m[val] {\n", 471 | " vals[j] = 1\n", 472 | " }\n", 473 | " }\n", 474 | " s[i] = series.Ints(vals)\n", 475 | " }\n", 476 | " for i := range vals {\n", 477 | " s[i].Name = vals[i]\n", 478 | " }\n", 479 | " return s\n", 480 | "}" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": 112, 486 | "metadata": {}, 487 | "outputs": [], 488 | "source": [ 489 | "ohSeries := OneHotSeries(df, \"Position\", UniqueValues(df, \"Position\"))\n", 490 | "dfEncoded := df.Mutate(ohSeries[0])\n", 491 | "for i := 1; i < len(ohSeries); i++ {\n", 492 | " dfEncoded = dfEncoded.Mutate(ohSeries[i])\n", 493 | "}" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": 114, 499 | "metadata": {}, 500 | "outputs": [], 501 | "source": [] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": 115, 506 | "metadata": {}, 507 | "outputs": [ 508 | { 509 | "data": { 510 | "text/plain": [ 511 | "[1034x13] DataFrame\n", 512 | "\n", 513 | " Position Height Weight Age Shortstop Catcher ...\n", 514 | " 0: Catcher 0.437500 0.214286 22.990000 0 1 ...\n", 515 | " 1: Catcher 0.437500 0.464286 34.690000 0 1 ...\n", 516 | " 2: Catcher 0.312500 0.428571 30.780000 0 1 ...\n", 517 | " 3: First_Baseman 0.312500 0.428571 35.430000 0 0 ...\n", 518 | " 4: First_Baseman 0.375000 0.271429 35.710000 0 0 ...\n", 519 | " 5: Second_Baseman 0.125000 0.185714 29.390000 0 0 ...\n", 520 | " 6: Shortstop 0.125000 0.421429 30.770000 1 0 ...\n", 521 | " 7: Third_Baseman 0.250000 0.357143 35.070000 0 0 ...\n", 522 | " 8: Third_Baseman 0.562500 0.578571 30.190000 0 0 ...\n", 523 | " 9: Outfielder 0.250000 0.214286 27.050000 0 0 ...\n", 524 | " ... ... ... ... ... ... ...\n", 525 | " ...\n", 526 | "\n", 527 | "Not Showing: Second_Baseman , Outfielder , Designated_Hitter ,\n", 528 | "Starting_Pitcher , Relief_Pitcher , First_Baseman , Third_Baseman \n" 529 | ] 530 | }, 531 | "execution_count": 115, 532 | "metadata": {}, 533 | "output_type": "execute_result" 534 | } 535 | ], 536 | "source": [ 537 | "dfEncoded" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": 118, 543 | "metadata": {}, 544 | "outputs": [], 545 | "source": [ 546 | "dfEncoded = dfEncoded.Drop(\"Position\")" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 119, 552 | "metadata": {}, 553 | "outputs": [ 554 | { 555 | "data": { 556 | "text/plain": [ 557 | "[1034x12] DataFrame\n", 558 | "\n", 559 | " Height Weight Age Shortstop Catcher Second_Baseman Outfielder ...\n", 560 | " 0: 0.437500 0.214286 22.990000 0 1 0 0 ...\n", 561 | " 1: 0.437500 0.464286 34.690000 0 1 0 0 ...\n", 562 | " 2: 0.312500 0.428571 30.780000 0 1 0 0 ...\n", 563 | " 3: 0.312500 0.428571 35.430000 0 0 0 0 ...\n", 564 | " 4: 0.375000 0.271429 35.710000 0 0 0 0 ...\n", 565 | " 5: 0.125000 0.185714 29.390000 0 0 1 0 ...\n", 566 | " 6: 0.125000 0.421429 30.770000 1 0 0 0 ...\n", 567 | " 7: 0.250000 0.357143 35.070000 0 0 0 0 ...\n", 568 | " 8: 0.562500 0.578571 30.190000 0 0 0 0 ...\n", 569 | " 9: 0.250000 0.214286 27.050000 0 0 0 1 ...\n", 570 | " ... ... ... ... ... ... ... ...\n", 571 | " ...\n", 572 | "\n", 573 | "Not Showing: Designated_Hitter , Starting_Pitcher , Relief_Pitcher ,\n", 574 | "First_Baseman , Third_Baseman \n" 575 | ] 576 | }, 577 | "execution_count": 119, 578 | "metadata": {}, 579 | "output_type": "execute_result" 580 | } 581 | ], 582 | "source": [ 583 | "dfEncoded" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": null, 589 | "metadata": {}, 590 | "outputs": [], 591 | "source": [] 592 | } 593 | ], 594 | "metadata": { 595 | "kernelspec": { 596 | "display_name": "Go", 597 | "language": "go", 598 | "name": "gophernotes" 599 | }, 600 | "language_info": { 601 | "codemirror_mode": "", 602 | "file_extension": ".go", 603 | "mimetype": "", 604 | "name": "go", 605 | "nbconvert_exporter": "", 606 | "pygments_lexer": "", 607 | "version": "go1.11.4" 608 | } 609 | }, 610 | "nbformat": 4, 611 | "nbformat_minor": 2 612 | } 613 | -------------------------------------------------------------------------------- /Chapter02/Weight Histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-with-Go-Quick-Start-Guide/f42e51ab1141e7df39cd01a8a4ee81728156b9c7/Chapter02/Weight Histogram.png -------------------------------------------------------------------------------- /Chapter02/height_hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-with-Go-Quick-Start-Guide/f42e51ab1141e7df39cd01a8a4ee81728156b9c7/Chapter02/height_hist.png -------------------------------------------------------------------------------- /Chapter03/Create a Linear Regression 2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Create a Linear Regression\n", 8 | "\n", 9 | "In this example we will load California house price data to a gota dataframe and perform a linear regression to predict the median house price based on other variables such as the median income. " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import (\n", 19 | " \"fmt\"\n", 20 | " \"github.com/kniren/gota/dataframe\"\n", 21 | " \"github.com/kniren/gota/series\"\n", 22 | " \"math/rand\"\n", 23 | " \"image\"\n", 24 | " \"bytes\"\n", 25 | " \"math\"\n", 26 | " \"github.com/gonum/stat\"\n", 27 | " \"github.com/gonum/integrate\"\n", 28 | " \"github.com/sajari/regression\"\n", 29 | " \"io/ioutil\"\n", 30 | ")" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "const path = \"../datasets/housing/CaliforniaHousing/cal_housing.data\"" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "columns := []string{\"longitude\", \"latitude\", \"housingMedianAge\", \"totalRooms\", \"totalBedrooms\", \"population\", \"households\", \"medianIncome\", \"medianHouseValue\"}\n", 49 | "b, err := ioutil.ReadFile(path)\n", 50 | "if err != nil {\n", 51 | " fmt.Println(\"Error!\", err)\n", 52 | "}\n", 53 | "df := dataframe.ReadCSV(bytes.NewReader(b), dataframe.Names(columns...))" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 5, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "// Divide divides two series and returns a series with the given name. The series must have the same length.\n", 63 | "func Divide(s1 series.Series, s2 series.Series, name string) series.Series {\n", 64 | " if s1.Len() != s2.Len() {\n", 65 | " panic(\"Series must have the same length!\")\n", 66 | " }\n", 67 | " \n", 68 | " ret := make([]interface{}, s1.Len(), s1.Len())\n", 69 | " for i := 0; i < s1.Len(); i ++ {\n", 70 | " ret[i] = s1.Elem(i).Float()/s2.Elem(i).Float()\n", 71 | " }\n", 72 | " s := series.Floats(ret)\n", 73 | " s.Name = name\n", 74 | " return s\n", 75 | "}\n", 76 | "\n", 77 | "// MultiplyConst multiplies the series by a constant and returns another series with the same name.\n", 78 | "func MultiplyConst(s series.Series, f float64) series.Series {\n", 79 | " ret := make([]interface{}, s.Len(), s.Len())\n", 80 | " for i := 0; i < s.Len(); i ++ {\n", 81 | " ret[i] = s.Elem(i).Float()*f\n", 82 | " }\n", 83 | " ss := series.Floats(ret)\n", 84 | " ss.Name = s.Name\n", 85 | " return ss\n", 86 | "}" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 6, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/plain": [ 97 | "[20639x9] DataFrame\n", 98 | "\n", 99 | " longitude latitude housingMedianAge totalRooms totalBedrooms ...\n", 100 | " 0: -122.220000 37.860000 21.000000 7099.000000 1106.000000 ...\n", 101 | " 1: -122.240000 37.850000 52.000000 1467.000000 190.000000 ...\n", 102 | " 2: -122.250000 37.850000 52.000000 1274.000000 235.000000 ...\n", 103 | " 3: -122.250000 37.850000 52.000000 1627.000000 280.000000 ...\n", 104 | " 4: -122.250000 37.850000 52.000000 919.000000 213.000000 ...\n", 105 | " 5: -122.250000 37.840000 52.000000 2535.000000 489.000000 ...\n", 106 | " 6: -122.250000 37.840000 52.000000 3104.000000 687.000000 ...\n", 107 | " 7: -122.260000 37.840000 42.000000 2555.000000 665.000000 ...\n", 108 | " 8: -122.250000 37.840000 52.000000 3549.000000 707.000000 ...\n", 109 | " 9: -122.260000 37.850000 52.000000 2202.000000 434.000000 ...\n", 110 | " ... ... ... ... ... ...\n", 111 | " ...\n", 112 | "\n", 113 | "Not Showing: population , households , medianIncome ,\n", 114 | "medianHouseValue \n" 115 | ] 116 | }, 117 | "execution_count": 6, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "df" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 7, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "df = df.Mutate(Divide(df.Col(\"totalRooms\"), df.Col(\"households\"), \"averageRooms\"))\n", 133 | "df = df.Mutate(Divide(df.Col(\"totalBedrooms\"), df.Col(\"households\"), \"averageBedrooms\"))\n", 134 | "df = df.Mutate(Divide(df.Col(\"population\"), df.Col(\"households\"), \"averageOccupancy\"))\n", 135 | "df = df.Mutate(MultiplyConst(df.Col(\"medianHouseValue\"), 0.00001))\n", 136 | "df = df.Select([]string{\"medianIncome\", \"housingMedianAge\", \"averageRooms\", \"averageBedrooms\", \"population\", \"averageOccupancy\", \"latitude\", \"longitude\", \"medianHouseValue\" })" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 8, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/plain": [ 147 | "[20639x9] DataFrame\n", 148 | "\n", 149 | " medianIncome housingMedianAge averageRooms averageBedrooms population ...\n", 150 | " 0: 8.301400 21.000000 6.238137 0.971880 2401.000000 ...\n", 151 | " 1: 7.257400 52.000000 8.288136 1.073446 496.000000 ...\n", 152 | " 2: 5.643100 52.000000 5.817352 1.073059 558.000000 ...\n", 153 | " 3: 3.846200 52.000000 6.281853 1.081081 565.000000 ...\n", 154 | " 4: 4.036800 52.000000 4.761658 1.103627 413.000000 ...\n", 155 | " 5: 3.659100 52.000000 4.931907 0.951362 1094.000000 ...\n", 156 | " 6: 3.120000 52.000000 4.797527 1.061824 1157.000000 ...\n", 157 | " 7: 2.080400 42.000000 4.294118 1.117647 1206.000000 ...\n", 158 | " 8: 3.691200 52.000000 4.970588 0.990196 1551.000000 ...\n", 159 | " 9: 3.203100 52.000000 5.477612 1.079602 910.000000 ...\n", 160 | " ... ... ... ... ... ...\n", 161 | " ...\n", 162 | "\n", 163 | "Not Showing: averageOccupancy , latitude , longitude ,\n", 164 | "medianHouseValue \n" 165 | ] 166 | }, 167 | "execution_count": 8, 168 | "metadata": {}, 169 | "output_type": "execute_result" 170 | } 171 | ], 172 | "source": [ 173 | "df" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 9, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "func Split(df dataframe.DataFrame, valFraction float64) (training dataframe.DataFrame, validation dataframe.DataFrame){\n", 183 | " perm := rand.Perm(df.Nrow())\n", 184 | " cutoff := int(valFraction*float64(len(perm)))\n", 185 | " training = df.Subset(perm[:cutoff])\n", 186 | " validation = df.Subset(perm[cutoff:])\n", 187 | " return training, validation\n", 188 | "}" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 10, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "training, validation := Split(df, 0.75)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 12, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "// DataFrameToXYs converts a dataframe with float64 columns to a slice of independent variable columns as floats\n", 207 | "// and the dependent variable (yCol). This can then be used with eg. goml's linear ML algorithms.\n", 208 | "// yCol is optional - if it doesn't exist only the x (independent) variables will be returned.\n", 209 | "func DataFrameToXYs(df dataframe.DataFrame, yCol string) ([][]float64, []float64){\n", 210 | " var (\n", 211 | " x [][]float64\n", 212 | " y []float64\n", 213 | " yColIx = -1\n", 214 | " )\n", 215 | " \n", 216 | " //find dependent variable column index\n", 217 | " for i, col := range df.Names() {\n", 218 | " if col == yCol {\n", 219 | " yColIx = i\n", 220 | " break\n", 221 | " }\n", 222 | " }\n", 223 | " if yColIx == -1 {\n", 224 | " fmt.Println(\"Warning - no dependent variable\")\n", 225 | " }\n", 226 | " x = make([][]float64, df.Nrow(), df.Nrow()) \n", 227 | " y = make([]float64, df.Nrow())\n", 228 | " for i := 0; i < df.Nrow(); i++ {\n", 229 | " var xx []float64\n", 230 | " for j := 0; j < df.Ncol(); j ++ {\n", 231 | " if j == yColIx {\n", 232 | " y[i] = df.Elem(i, j).Float()\n", 233 | " continue\n", 234 | " }\n", 235 | " xx = append(xx, df.Elem(i,j).Float())\n", 236 | " }\n", 237 | " x[i] = xx \n", 238 | " }\n", 239 | " return x, y\n", 240 | "}" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 21, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "trainingX, trainingY := DataFrameToXYs(training, \"medianHouseValue\")\n", 250 | "validationX, validationY := DataFrameToXYs(validation, \"medianHouseValue\")" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "## Linear Regression for Median House Price" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 33, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "model := new(regression.Regression)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 34, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "for i := range trainingX {\n", 276 | " model.Train(regression.DataPoint(trainingY[i], trainingX[i]))\n", 277 | "}\n", 278 | "if err := model.Run(); err != nil {\n", 279 | " fmt.Println(err)\n", 280 | "}" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "## Calculate the Mean Square Error" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 35, 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "name": "stdout", 297 | "output_type": "stream", 298 | "text": [ 299 | "// warning: expression returns 2 values, using only the first one: [int error]\n", 300 | "MSE: 0.51\n" 301 | ] 302 | }, 303 | { 304 | "data": { 305 | "text/plain": [ 306 | "11 " 307 | ] 308 | }, 309 | "execution_count": 35, 310 | "metadata": {}, 311 | "output_type": "execute_result" 312 | } 313 | ], 314 | "source": [ 315 | "//On validation set\n", 316 | "errors := make([]float64, len(validationX), len(validationX))\n", 317 | "for i := range validationX {\n", 318 | " prediction, err := model.Predict(validationX[i])\n", 319 | " if err != nil {\n", 320 | " panic(fmt.Println(\"Prediction error\", err))\n", 321 | " }\n", 322 | " errors[i] = (prediction - validationY[i])*(prediction - validationY[i])\n", 323 | "}\n", 324 | "\n", 325 | "fmt.Printf(\"MSE: %5.2f\\n\", stat.Mean(errors, nil))" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 36, 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "name": "stdout", 335 | "output_type": "stream", 336 | "text": [ 337 | "// warning: expression returns 2 values, using only the first one: [int error]\n", 338 | "MSE: 0.53\n" 339 | ] 340 | }, 341 | { 342 | "data": { 343 | "text/plain": [ 344 | "11 " 345 | ] 346 | }, 347 | "execution_count": 36, 348 | "metadata": {}, 349 | "output_type": "execute_result" 350 | } 351 | ], 352 | "source": [ 353 | "// On training set\n", 354 | "errors := make([]float64, len(trainingX), len(trainingX))\n", 355 | "for i := range trainingX {\n", 356 | " prediction, err := model.Predict(trainingX[i])\n", 357 | " if err != nil {\n", 358 | " panic(fmt.Println(\"Prediction error\", err))\n", 359 | " }\n", 360 | " errors[i] = (prediction - trainingY[i])*(prediction - trainingY[i])\n", 361 | "}\n", 362 | "\n", 363 | "fmt.Printf(\"MSE: %5.2f\\n\", stat.Mean(errors, nil))" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [] 372 | } 373 | ], 374 | "metadata": { 375 | "kernelspec": { 376 | "display_name": "Go", 377 | "language": "go", 378 | "name": "gophernotes" 379 | }, 380 | "language_info": { 381 | "codemirror_mode": "", 382 | "file_extension": ".go", 383 | "mimetype": "", 384 | "name": "go", 385 | "nbconvert_exporter": "", 386 | "pygments_lexer": "", 387 | "version": "go1.11.4" 388 | } 389 | }, 390 | "nbformat": 4, 391 | "nbformat_minor": 2 392 | } 393 | -------------------------------------------------------------------------------- /Chapter03/Create a Linear Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Create a Linear Regression\n", 8 | "\n", 9 | "In this example we will load California house price data to a gota dataframe and perform a linear regression to predict the median house price based on other variables such as the median income. " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import (\n", 19 | " \"fmt\"\n", 20 | " \"github.com/kniren/gota/dataframe\"\n", 21 | " \"github.com/kniren/gota/series\"\n", 22 | " \"math/rand\"\n", 23 | " \"github.com/cdipaolo/goml/linear\"\n", 24 | " \"github.com/cdipaolo/goml/base\"\n", 25 | " \"image\"\n", 26 | " \"bytes\"\n", 27 | " \"math\"\n", 28 | " \"github.com/gonum/stat\"\n", 29 | " \"github.com/gonum/integrate\"\n", 30 | " \"io/ioutil\"\n", 31 | ")" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "const path = \"../datasets/housing/CaliforniaHousing/cal_housing.data\"" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 76, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "columns := []string{\"longitude\", \"latitude\", \"housingMedianAge\", \"totalRooms\", \"totalBedrooms\", \"population\", \"households\", \"medianIncome\", \"medianHouseValue\"}\n", 50 | "b, err := ioutil.ReadFile(path)\n", 51 | "if err != nil {\n", 52 | " fmt.Println(\"Error!\", err)\n", 53 | "}\n", 54 | "df := dataframe.ReadCSV(bytes.NewReader(b), dataframe.Names(columns...))" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 77, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "// Divide divides two series and returns a series with the given name. The series must have the same length.\n", 64 | "func Divide(s1 series.Series, s2 series.Series, name string) series.Series {\n", 65 | " if s1.Len() != s2.Len() {\n", 66 | " panic(\"Series must have the same length!\")\n", 67 | " }\n", 68 | " \n", 69 | " ret := make([]interface{}, s1.Len(), s1.Len())\n", 70 | " for i := 0; i < s1.Len(); i ++ {\n", 71 | " ret[i] = s1.Elem(i).Float()/s2.Elem(i).Float()\n", 72 | " }\n", 73 | " s := series.Floats(ret)\n", 74 | " s.Name = name\n", 75 | " return s\n", 76 | "}\n", 77 | "\n", 78 | "// MultiplyConst multiplies the series by a constant and returns another series with the same name.\n", 79 | "func MultiplyConst(s series.Series, f float64) series.Series {\n", 80 | " ret := make([]interface{}, s.Len(), s.Len())\n", 81 | " for i := 0; i < s.Len(); i ++ {\n", 82 | " ret[i] = s.Elem(i).Float()*f\n", 83 | " }\n", 84 | " ss := series.Floats(ret)\n", 85 | " ss.Name = s.Name\n", 86 | " return ss\n", 87 | "}" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 78, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "[20639x9] DataFrame\n", 99 | "\n", 100 | " longitude latitude housingMedianAge totalRooms totalBedrooms ...\n", 101 | " 0: -122.220000 37.860000 21.000000 7099.000000 1106.000000 ...\n", 102 | " 1: -122.240000 37.850000 52.000000 1467.000000 190.000000 ...\n", 103 | " 2: -122.250000 37.850000 52.000000 1274.000000 235.000000 ...\n", 104 | " 3: -122.250000 37.850000 52.000000 1627.000000 280.000000 ...\n", 105 | " 4: -122.250000 37.850000 52.000000 919.000000 213.000000 ...\n", 106 | " 5: -122.250000 37.840000 52.000000 2535.000000 489.000000 ...\n", 107 | " 6: -122.250000 37.840000 52.000000 3104.000000 687.000000 ...\n", 108 | " 7: -122.260000 37.840000 42.000000 2555.000000 665.000000 ...\n", 109 | " 8: -122.250000 37.840000 52.000000 3549.000000 707.000000 ...\n", 110 | " 9: -122.260000 37.850000 52.000000 2202.000000 434.000000 ...\n", 111 | " ... ... ... ... ... ...\n", 112 | " ...\n", 113 | "\n", 114 | "Not Showing: population , households , medianIncome ,\n", 115 | "medianHouseValue \n" 116 | ] 117 | }, 118 | "execution_count": 78, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [ 124 | "df" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 80, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "df = df.Mutate(Divide(df.Col(\"totalRooms\"), df.Col(\"households\"), \"averageRooms\"))\n", 134 | "df = df.Mutate(Divide(df.Col(\"totalBedrooms\"), df.Col(\"households\"), \"averageBedrooms\"))\n", 135 | "df = df.Mutate(Divide(df.Col(\"population\"), df.Col(\"households\"), \"averageOccupancy\"))\n", 136 | "df = df.Mutate(MultiplyConst(df.Col(\"medianHouseValue\"), 0.00001))\n", 137 | "df = df.Select([]string{\"medianIncome\", \"housingMedianAge\", \"averageRooms\", \"averageBedrooms\", \"population\", \"averageOccupancy\", \"latitude\", \"longitude\", \"medianHouseValue\" })" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 81, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "[20639x9] DataFrame\n", 149 | "\n", 150 | " medianIncome housingMedianAge averageRooms averageBedrooms population ...\n", 151 | " 0: 8.301400 21.000000 6.238137 0.971880 2401.000000 ...\n", 152 | " 1: 7.257400 52.000000 8.288136 1.073446 496.000000 ...\n", 153 | " 2: 5.643100 52.000000 5.817352 1.073059 558.000000 ...\n", 154 | " 3: 3.846200 52.000000 6.281853 1.081081 565.000000 ...\n", 155 | " 4: 4.036800 52.000000 4.761658 1.103627 413.000000 ...\n", 156 | " 5: 3.659100 52.000000 4.931907 0.951362 1094.000000 ...\n", 157 | " 6: 3.120000 52.000000 4.797527 1.061824 1157.000000 ...\n", 158 | " 7: 2.080400 42.000000 4.294118 1.117647 1206.000000 ...\n", 159 | " 8: 3.691200 52.000000 4.970588 0.990196 1551.000000 ...\n", 160 | " 9: 3.203100 52.000000 5.477612 1.079602 910.000000 ...\n", 161 | " ... ... ... ... ... ...\n", 162 | " ...\n", 163 | "\n", 164 | "Not Showing: averageOccupancy , latitude , longitude ,\n", 165 | "medianHouseValue \n" 166 | ] 167 | }, 168 | "execution_count": 81, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "df" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 82, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "func Split(df dataframe.DataFrame, valFraction float64) (training dataframe.DataFrame, validation dataframe.DataFrame){\n", 184 | " perm := rand.Perm(df.Nrow())\n", 185 | " cutoff := int(valFraction*float64(len(perm)))\n", 186 | " training = df.Subset(perm[:cutoff])\n", 187 | " validation = df.Subset(perm[cutoff:])\n", 188 | " return training, validation\n", 189 | "}" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 83, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "training, validation := Split(df, 0.75)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 84, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "// DataFrameToXYs converts a dataframe with float64 columns to a slice of independent variable columns as floats\n", 208 | "// and the dependent variable (yCol). This can then be used with eg. goml's linear ML algorithms.\n", 209 | "// yCol is optional - if it doesn't exist only the x (independent) variables will be returned.\n", 210 | "func DataFrameToXYs(df dataframe.DataFrame, yCol string) ([][]float64, []float64){\n", 211 | " var (\n", 212 | " x [][]float64\n", 213 | " y []float64\n", 214 | " yColIx = -1\n", 215 | " )\n", 216 | " \n", 217 | " //find dependent variable column index\n", 218 | " for i, col := range df.Names() {\n", 219 | " if col == yCol {\n", 220 | " yColIx = i\n", 221 | " break\n", 222 | " }\n", 223 | " }\n", 224 | " if yColIx == -1 {\n", 225 | " fmt.Println(\"Warning - no dependent variable\")\n", 226 | " }\n", 227 | " x = make([][]float64, df.Nrow(), df.Nrow()) \n", 228 | " y = make([]float64, df.Nrow())\n", 229 | " for i := 0; i < df.Nrow(); i++ {\n", 230 | " var xx []float64\n", 231 | " for j := 0; j < df.Ncol(); j ++ {\n", 232 | " if j == yColIx {\n", 233 | " y[i] = df.Elem(i, j).Float()\n", 234 | " continue\n", 235 | " }\n", 236 | " xx = append(xx, df.Elem(i,j).Float())\n", 237 | " }\n", 238 | " x[i] = xx \n", 239 | " }\n", 240 | " return x, y\n", 241 | "}" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 85, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "trainingX, trainingY := DataFrameToXYs(training, \"medianHouseValue\")\n", 251 | "validationX, validationY := DataFrameToXYs(validation, \"medianHouseValue\")" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "## Linear Regression for Median House Price" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 90, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "model := linear.NewLeastSquares(base.BatchGA, 1e-2, 6, 150, trainingX, trainingY)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 91, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "name": "stdout", 277 | "output_type": "stream", 278 | "text": [ 279 | "Sorry! Learning diverged. Some value of the parameter vector theta is ±Inf or NaN\n" 280 | ] 281 | } 282 | ], 283 | "source": [ 284 | "//Train\n", 285 | "err := model.Learn()\n", 286 | "if err != nil {\n", 287 | " fmt.Println(err)\n", 288 | "}" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "## Calculate the Mean Square Error" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 92, 301 | "metadata": {}, 302 | "outputs": [ 303 | { 304 | "name": "stdout", 305 | "output_type": "stream", 306 | "text": [ 307 | "// warning: expression returns 2 values, using only the first one: [int error]\n", 308 | "MSE: +Inf\n" 309 | ] 310 | }, 311 | { 312 | "data": { 313 | "text/plain": [ 314 | "11 " 315 | ] 316 | }, 317 | "execution_count": 92, 318 | "metadata": {}, 319 | "output_type": "execute_result" 320 | } 321 | ], 322 | "source": [ 323 | "//On validation set\n", 324 | "errors := make([]float64, len(validationX), len(validationX))\n", 325 | "for i := range validationX {\n", 326 | " prediction, err := model.Predict(validationX[i])\n", 327 | " if err != nil {\n", 328 | " panic(fmt.Println(\"Prediction error\", err))\n", 329 | " }\n", 330 | " errors[i] = (prediction[0] - validationY[i])*(prediction[0] - validationY[i])\n", 331 | "}\n", 332 | "\n", 333 | "fmt.Printf(\"MSE: %5.2f\\n\", stat.Mean(errors, nil))" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 93, 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "name": "stdout", 343 | "output_type": "stream", 344 | "text": [ 345 | "// warning: expression returns 2 values, using only the first one: [int error]\n", 346 | "MSE: +Inf\n" 347 | ] 348 | }, 349 | { 350 | "data": { 351 | "text/plain": [ 352 | "11 " 353 | ] 354 | }, 355 | "execution_count": 93, 356 | "metadata": {}, 357 | "output_type": "execute_result" 358 | } 359 | ], 360 | "source": [ 361 | "// On training set\n", 362 | "errors := make([]float64, len(trainingX), len(trainingX))\n", 363 | "for i := range trainingX {\n", 364 | " prediction, err := model.Predict(trainingX[i])\n", 365 | " if err != nil {\n", 366 | " panic(fmt.Println(\"Prediction error\", err))\n", 367 | " }\n", 368 | " errors[i] = (prediction[0] - trainingY[i])*(prediction[0] - trainingY[i])\n", 369 | "}\n", 370 | "\n", 371 | "fmt.Printf(\"MSE: %5.2f\\n\", stat.Mean(errors, nil))" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [] 380 | } 381 | ], 382 | "metadata": { 383 | "kernelspec": { 384 | "display_name": "Go", 385 | "language": "go", 386 | "name": "gophernotes" 387 | }, 388 | "language_info": { 389 | "codemirror_mode": "", 390 | "file_extension": ".go", 391 | "mimetype": "", 392 | "name": "go", 393 | "nbconvert_exporter": "", 394 | "pygments_lexer": "", 395 | "version": "go1.11.4" 396 | } 397 | }, 398 | "nbformat": 4, 399 | "nbformat_minor": 2 400 | } 401 | -------------------------------------------------------------------------------- /Chapter03/Train a Random Forest.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Train a Random Forest\n", 8 | "\n", 9 | "In this example we will load California house price data to a gota dataframe and use a random forest to predict the median house price based on other variables such as the median income. " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import (\n", 19 | " \"fmt\"\n", 20 | " \"github.com/kniren/gota/dataframe\"\n", 21 | " \"github.com/kniren/gota/series\"\n", 22 | " \"math/rand\"\n", 23 | " \"github.com/fxsjy/RF.go/RF/Regression\"\n", 24 | " \"bytes\"\n", 25 | " \"math\"\n", 26 | " \"github.com/gonum/stat\"\n", 27 | " \"github.com/gonum/integrate\"\n", 28 | " \"io/ioutil\"\n", 29 | ")" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "const path = \"../datasets/housing/CaliforniaHousing/cal_housing.data\"" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "columns := []string{\"longitude\", \"latitude\", \"housingMedianAge\", \"totalRooms\", \"totalBedrooms\", \"population\", \"households\", \"medianIncome\", \"medianHouseValue\"}\n", 48 | "b, err := ioutil.ReadFile(path)\n", 49 | "if err != nil {\n", 50 | " fmt.Println(\"Error!\", err)\n", 51 | "}\n", 52 | "df := dataframe.ReadCSV(bytes.NewReader(b), dataframe.Names(columns...))" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "data": { 62 | "text/plain": [ 63 | "[20639x9] DataFrame\n", 64 | "\n", 65 | " longitude latitude housingMedianAge totalRooms totalBedrooms ...\n", 66 | " 0: -122.220000 37.860000 21.000000 7099.000000 1106.000000 ...\n", 67 | " 1: -122.240000 37.850000 52.000000 1467.000000 190.000000 ...\n", 68 | " 2: -122.250000 37.850000 52.000000 1274.000000 235.000000 ...\n", 69 | " 3: -122.250000 37.850000 52.000000 1627.000000 280.000000 ...\n", 70 | " 4: -122.250000 37.850000 52.000000 919.000000 213.000000 ...\n", 71 | " 5: -122.250000 37.840000 52.000000 2535.000000 489.000000 ...\n", 72 | " 6: -122.250000 37.840000 52.000000 3104.000000 687.000000 ...\n", 73 | " 7: -122.260000 37.840000 42.000000 2555.000000 665.000000 ...\n", 74 | " 8: -122.250000 37.840000 52.000000 3549.000000 707.000000 ...\n", 75 | " 9: -122.260000 37.850000 52.000000 2202.000000 434.000000 ...\n", 76 | " ... ... ... ... ... ...\n", 77 | " ...\n", 78 | "\n", 79 | "Not Showing: population , households , medianIncome ,\n", 80 | "medianHouseValue \n" 81 | ] 82 | }, 83 | "execution_count": 4, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "df" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "// Divide divides two series and returns a series with the given name. The series must have the same length.\n", 99 | "func Divide(s1 series.Series, s2 series.Series, name string) series.Series {\n", 100 | " if s1.Len() != s2.Len() {\n", 101 | " panic(\"Series must have the same length!\")\n", 102 | " }\n", 103 | " \n", 104 | " ret := make([]interface{}, s1.Len(), s1.Len())\n", 105 | " for i := 0; i < s1.Len(); i ++ {\n", 106 | " ret[i] = s1.Elem(i).Float()/s2.Elem(i).Float()\n", 107 | " }\n", 108 | " s := series.Floats(ret)\n", 109 | " s.Name = name\n", 110 | " return s\n", 111 | "}\n", 112 | "\n", 113 | "// MultiplyConst multiplies the series by a constant and returns another series with the same name.\n", 114 | "func MultiplyConst(s series.Series, f float64) series.Series {\n", 115 | " ret := make([]interface{}, s.Len(), s.Len())\n", 116 | " for i := 0; i < s.Len(); i ++ {\n", 117 | " ret[i] = s.Elem(i).Float()*f\n", 118 | " }\n", 119 | " ss := series.Floats(ret)\n", 120 | " ss.Name = s.Name\n", 121 | " return ss\n", 122 | "}" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 6, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "df = df.Mutate(Divide(df.Col(\"totalRooms\"), df.Col(\"households\"), \"averageRooms\"))\n", 132 | "df = df.Mutate(Divide(df.Col(\"totalBedrooms\"), df.Col(\"households\"), \"averageBedrooms\"))\n", 133 | "df = df.Mutate(Divide(df.Col(\"population\"), df.Col(\"households\"), \"averageOccupancy\"))\n", 134 | "df = df.Mutate(MultiplyConst(df.Col(\"medianHouseValue\"), 0.00001))\n", 135 | "df = df.Select([]string{\"medianIncome\", \"housingMedianAge\", \"averageRooms\", \"averageBedrooms\", \"population\", \"averageOccupancy\", \"latitude\", \"longitude\", \"medianHouseValue\" })" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 7, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "[20639x9] DataFrame\n", 147 | "\n", 148 | " medianIncome housingMedianAge averageRooms averageBedrooms population ...\n", 149 | " 0: 8.301400 21.000000 6.238137 0.971880 2401.000000 ...\n", 150 | " 1: 7.257400 52.000000 8.288136 1.073446 496.000000 ...\n", 151 | " 2: 5.643100 52.000000 5.817352 1.073059 558.000000 ...\n", 152 | " 3: 3.846200 52.000000 6.281853 1.081081 565.000000 ...\n", 153 | " 4: 4.036800 52.000000 4.761658 1.103627 413.000000 ...\n", 154 | " 5: 3.659100 52.000000 4.931907 0.951362 1094.000000 ...\n", 155 | " 6: 3.120000 52.000000 4.797527 1.061824 1157.000000 ...\n", 156 | " 7: 2.080400 42.000000 4.294118 1.117647 1206.000000 ...\n", 157 | " 8: 3.691200 52.000000 4.970588 0.990196 1551.000000 ...\n", 158 | " 9: 3.203100 52.000000 5.477612 1.079602 910.000000 ...\n", 159 | " ... ... ... ... ... ...\n", 160 | " ...\n", 161 | "\n", 162 | "Not Showing: averageOccupancy , latitude , longitude ,\n", 163 | "medianHouseValue \n" 164 | ] 165 | }, 166 | "execution_count": 7, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "df" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 8, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "func Split(df dataframe.DataFrame, valFraction float64) (training dataframe.DataFrame, validation dataframe.DataFrame){\n", 182 | " perm := rand.Perm(df.Nrow())\n", 183 | " cutoff := int(valFraction*float64(len(perm)))\n", 184 | " training = df.Subset(perm[:cutoff])\n", 185 | " validation = df.Subset(perm[cutoff:])\n", 186 | " return training, validation\n", 187 | "}" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 9, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "training, validation := Split(df, 0.75)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 10, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "// DataFrameToXYs converts a dataframe with float64 columns to a slice of independent variable columns as floats\n", 206 | "// and the dependent variable (yCol). This can then be used with eg. goml's linear ML algorithms.\n", 207 | "// yCol is optional - if it doesn't exist only the x (independent) variables will be returned.\n", 208 | "func DataFrameToXYs(df dataframe.DataFrame, yCol string) ([][]float64, []float64){\n", 209 | " var (\n", 210 | " x [][]float64\n", 211 | " y []float64\n", 212 | " yColIx = -1\n", 213 | " )\n", 214 | " \n", 215 | " //find dependent variable column index\n", 216 | " for i, col := range df.Names() {\n", 217 | " if col == yCol {\n", 218 | " yColIx = i\n", 219 | " break\n", 220 | " }\n", 221 | " }\n", 222 | " if yColIx == -1 {\n", 223 | " fmt.Println(\"Warning - no dependent variable\")\n", 224 | " }\n", 225 | " x = make([][]float64, df.Nrow(), df.Nrow()) \n", 226 | " y = make([]float64, df.Nrow())\n", 227 | " for i := 0; i < df.Nrow(); i++ {\n", 228 | " var xx []float64\n", 229 | " for j := 0; j < df.Ncol(); j ++ {\n", 230 | " if j == yColIx {\n", 231 | " y[i] = df.Elem(i, j).Float()\n", 232 | " continue\n", 233 | " }\n", 234 | " xx = append(xx, df.Elem(i,j).Float())\n", 235 | " }\n", 236 | " x[i] = xx \n", 237 | " }\n", 238 | " return x, y\n", 239 | "}" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 11, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "func FloatsToInterfaces(f []float64) []interface{} {\n", 249 | " iif := make([]interface{}, len(f), len(f))\n", 250 | " for i := range f {\n", 251 | " iif[i] = f[i]\n", 252 | " }\n", 253 | " return iif\n", 254 | "}" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 12, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "tx, trainingY := DataFrameToXYs(training, \"medianHouseValue\")\n", 264 | "vx, validationY := DataFrameToXYs(validation, \"medianHouseValue\")\n", 265 | "\n", 266 | "var (\n", 267 | " trainingX = make([][]interface{}, len(tx), len(tx))\n", 268 | " validationX = make([][]interface{}, len(vx), len(vx))\n", 269 | ")\n", 270 | "\n", 271 | "for i := range tx {\n", 272 | " trainingX[i] = FloatsToInterfaces(tx[i])\n", 273 | "}\n", 274 | "for i := range vx {\n", 275 | " validationX[i] = FloatsToInterfaces(vx[i])\n", 276 | "}" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "## Random Forest Regression for Median House Price" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 13, 289 | "metadata": {}, 290 | "outputs": [ 291 | { 292 | "name": "stdout", 293 | "output_type": "stream", 294 | "text": [ 295 | ">> 2019-03-28 14:22:36.7404696 +0000 UTC m=+2.826843301 buiding 24th tree...\n", 296 | ">> 2019-03-28 14:22:36.7503545 +0000 UTC m=+2.836728201 buiding 10th tree...\n", 297 | ">> 2019-03-28 14:22:36.751142 +0000 UTC m=+2.837515501 buiding 11th tree...\n", 298 | ">> 2019-03-28 14:22:36.7619805 +0000 UTC m=+2.848354701 buiding 0th tree...\n", 299 | ">> 2019-03-28 14:22:36.7627932 +0000 UTC m=+2.849166601 buiding 13th tree...\n", 300 | ">> 2019-03-28 14:22:36.7629636 +0000 UTC m=+2.849336801 buiding 15th tree...\n", 301 | ">> 2019-03-28 14:22:36.7636428 +0000 UTC m=+2.850016501 buiding 1th tree...\n", 302 | ">> 2019-03-28 14:22:36.7642266 +0000 UTC m=+2.850603901 buiding 2th tree...\n", 303 | ">> 2019-03-28 14:22:36.7779664 +0000 UTC m=+2.864340701 buiding 3th tree...\n", 304 | ">> 2019-03-28 14:22:36.7888561 +0000 UTC m=+2.875229801 buiding 16th tree...\n", 305 | ">> 2019-03-28 14:22:36.7991267 +0000 UTC m=+2.885500401 buiding 4th tree...\n", 306 | ">> 2019-03-28 14:22:36.8002891 +0000 UTC m=+2.886662201 buiding 5th tree...\n", 307 | ">> 2019-03-28 14:22:36.8037908 +0000 UTC m=+2.890163901 buiding 6th tree...\n", 308 | ">> 2019-03-28 14:22:36.8071287 +0000 UTC m=+2.893501201 buiding 7th tree...\n", 309 | ">> 2019-03-28 14:22:36.8085332 +0000 UTC m=+2.894905701 buiding 8th tree...\n", 310 | ">> 2019-03-28 14:22:36.8113502 +0000 UTC m=+2.897723301 buiding 17th tree...\n", 311 | ">> 2019-03-28 14:22:36.8123859 +0000 UTC m=+2.898758901 buiding 9th tree...\n", 312 | ">> 2019-03-28 14:22:36.8130964 +0000 UTC m=+2.899469801 buiding 18th tree...\n", 313 | ">> 2019-03-28 14:22:36.8135007 +0000 UTC m=+2.899873801 buiding 19th tree...\n", 314 | ">> 2019-03-28 14:22:36.8141428 +0000 UTC m=+2.900515901 buiding 20th tree...\n", 315 | ">> 2019-03-28 14:22:36.8145103 +0000 UTC m=+2.900883801 buiding 21th tree...\n", 316 | ">> 2019-03-28 14:22:36.8148672 +0000 UTC m=+2.901240301 buiding 22th tree...\n", 317 | ">> 2019-03-28 14:22:36.8151736 +0000 UTC m=+2.901546101 buiding 23th tree...\n", 318 | ">> 2019-03-28 14:22:36.7629129 +0000 UTC m=+2.849285901 buiding 14th tree...\n", 319 | ">> 2019-03-28 14:22:36.7627516 +0000 UTC m=+2.849125001 buiding 12th tree...\n", 320 | "2019-03-28 14:24:30.9391264 +0000 UTC m=+117.168148201 tranning progress 4%\n", 321 | "2019-03-28 14:24:30.9411954 +0000 UTC m=+117.170218001 tranning progress 8%\n", 322 | "2019-03-28 14:25:48.9207423 +0000 UTC m=+195.256984401 tranning progress 12%\n", 323 | "2019-03-28 14:25:49.155797 +0000 UTC m=+195.492035501 tranning progress 16%\n", 324 | "2019-03-28 14:26:00.72691 +0000 UTC m=+207.063147201 tranning progress 20%\n", 325 | "2019-03-28 14:26:03.6705441 +0000 UTC m=+210.006778601 tranning progress 24%\n", 326 | "2019-03-28 14:26:05.8480766 +0000 UTC m=+212.184316801 tranning progress 28%\n", 327 | "2019-03-28 14:26:06.7271483 +0000 UTC m=+213.063383801 tranning progress 32%\n", 328 | "2019-03-28 14:26:07.4139438 +0000 UTC m=+213.750181201 tranning progress 36%\n", 329 | "2019-03-28 14:26:19.5352162 +0000 UTC m=+225.906911701 tranning progress 40%\n", 330 | "2019-03-28 14:26:27.3380032 +0000 UTC m=+233.709697401 tranning progress 44%\n", 331 | "2019-03-28 14:26:38.2135588 +0000 UTC m=+244.585253601 tranning progress 48%\n", 332 | "2019-03-28 14:26:40.2513229 +0000 UTC m=+246.623018401 tranning progress 52%\n", 333 | "2019-03-28 14:26:45.1981762 +0000 UTC m=+251.605583201 tranning progress 56%\n", 334 | "2019-03-28 14:26:48.4072404 +0000 UTC m=+254.814649801 tranning progress 60%\n", 335 | "2019-03-28 14:26:50.039184 +0000 UTC m=+256.446592001 tranning progress 64%\n", 336 | "2019-03-28 14:26:50.3687841 +0000 UTC m=+256.776193301 tranning progress 68%\n", 337 | "2019-03-28 14:27:07.6486591 +0000 UTC m=+274.056068101 tranning progress 72%\n", 338 | "2019-03-28 14:27:08.9237193 +0000 UTC m=+275.331126101 tranning progress 76%\n", 339 | "2019-03-28 14:27:11.3675923 +0000 UTC m=+277.775000101 tranning progress 80%\n", 340 | "2019-03-28 14:27:15.8133047 +0000 UTC m=+282.256330401 tranning progress 84%\n", 341 | "2019-03-28 14:27:16.5408996 +0000 UTC m=+282.983928001 tranning progress 88%\n", 342 | "2019-03-28 14:27:17.9679703 +0000 UTC m=+284.410997901 tranning progress 92%\n", 343 | "2019-03-28 14:27:21.792748 +0000 UTC m=+288.235776501 tranning progress 96%\n", 344 | "2019-03-28 14:27:23.9331517 +0000 UTC m=+290.376180101 tranning progress 100%\n", 345 | "all done.\n" 346 | ] 347 | } 348 | ], 349 | "source": [ 350 | "model := Regression.BuildForest(trainingX, trainingY, 25, len(trainingX), 1)" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "## Calculate the Mean Square Error" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 14, 363 | "metadata": {}, 364 | "outputs": [ 365 | { 366 | "name": "stdout", 367 | "output_type": "stream", 368 | "text": [ 369 | "// warning: expression returns 2 values, using only the first one: [int error]\n", 370 | "MSE: 0.29\n" 371 | ] 372 | }, 373 | { 374 | "data": { 375 | "text/plain": [ 376 | "11 " 377 | ] 378 | }, 379 | "execution_count": 14, 380 | "metadata": {}, 381 | "output_type": "execute_result" 382 | } 383 | ], 384 | "source": [ 385 | "//On validation set\n", 386 | "errors := make([]float64, len(validationX), len(validationX))\n", 387 | "for i := range validationX {\n", 388 | " prediction := model.Predicate(validationX[i])\n", 389 | " if err != nil {\n", 390 | " panic(fmt.Println(\"Prediction error\", err))\n", 391 | " }\n", 392 | " errors[i] = (prediction - validationY[i])*(prediction - validationY[i])\n", 393 | "}\n", 394 | "\n", 395 | "fmt.Printf(\"MSE: %5.2f\\n\", stat.Mean(errors, nil))" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 15, 401 | "metadata": {}, 402 | "outputs": [ 403 | { 404 | "name": "stdout", 405 | "output_type": "stream", 406 | "text": [ 407 | "// warning: expression returns 2 values, using only the first one: [int error]\n", 408 | "MSE: 0.05\n" 409 | ] 410 | }, 411 | { 412 | "data": { 413 | "text/plain": [ 414 | "11 " 415 | ] 416 | }, 417 | "execution_count": 15, 418 | "metadata": {}, 419 | "output_type": "execute_result" 420 | } 421 | ], 422 | "source": [ 423 | "// On training set\n", 424 | "errors := make([]float64, len(trainingX), len(trainingX))\n", 425 | "for i := range trainingX {\n", 426 | " prediction := model.Predicate(trainingX[i])\n", 427 | " if err != nil {\n", 428 | " panic(fmt.Println(\"Prediction error\", err))\n", 429 | " }\n", 430 | " errors[i] = (prediction - trainingY[i])*(prediction - trainingY[i])\n", 431 | "}\n", 432 | "\n", 433 | "fmt.Printf(\"MSE: %5.2f\\n\", stat.Mean(errors, nil))" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "metadata": {}, 440 | "outputs": [], 441 | "source": [] 442 | } 443 | ], 444 | "metadata": { 445 | "kernelspec": { 446 | "display_name": "Go", 447 | "language": "go", 448 | "name": "gophernotes" 449 | }, 450 | "language_info": { 451 | "codemirror_mode": "", 452 | "file_extension": ".go", 453 | "mimetype": "", 454 | "name": "go", 455 | "nbconvert_exporter": "", 456 | "pygments_lexer": "", 457 | "version": "go1.11.4" 458 | } 459 | }, 460 | "nbformat": 4, 461 | "nbformat_minor": 2 462 | } 463 | -------------------------------------------------------------------------------- /Chapter03/download-fashion-mnist.sh: -------------------------------------------------------------------------------- 1 | mkdir -p datasets/mnist && \ 2 | wget http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz -O datasets/mnist/images.gz && \ 3 | wget http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz -O datasets/mnist/labels.gz 4 | -------------------------------------------------------------------------------- /Chapter03/download-housing.sh: -------------------------------------------------------------------------------- 1 | mkdir -p datasets/housing && \ 2 | wget https://ndownloader.figshare.com/files/5976036 -O datasets/housing/data.tgz && \ 3 | tar xzvf datasets/housing/data.tgz -C datasets/housing && \ 4 | rm datasets/housing/data.tgz 5 | -------------------------------------------------------------------------------- /Chapter04/Clustering Scatter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-with-Go-Quick-Start-Guide/f42e51ab1141e7df39cd01a8a4ee81728156b9c7/Chapter04/Clustering Scatter.jpg -------------------------------------------------------------------------------- /Chapter04/PCA Scatter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-with-Go-Quick-Start-Guide/f42e51ab1141e7df39cd01a8a4ee81728156b9c7/Chapter04/PCA Scatter.jpg -------------------------------------------------------------------------------- /Chapter04/Perform Clustering Using K-Means.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Perform Clustering Using K-Means\n", 8 | "\n", 9 | "In this example we will load the Iris dataset and use unsupervised learning to use the features to predict the species of iris." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import (\n", 19 | " \"fmt\"\n", 20 | " \"github.com/kniren/gota/dataframe\"\n", 21 | " \"github.com/kniren/gota/series\"\n", 22 | " \"io/ioutil\"\n", 23 | " \"bytes\"\n", 24 | " \"math/rand\"\n", 25 | ")" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 4, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "const path = \"../datasets/iris/iris.csv\"" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 5, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "b, err := ioutil.ReadFile(path)\n", 44 | "if err != nil {\n", 45 | " fmt.Println(\"Error!\", err)\n", 46 | "}\n", 47 | "df := dataframe.ReadCSV(bytes.NewReader(b))\n", 48 | "df.SetNames(\"petal length\", \"petal width\", \"sepal length\", \"sepal width\", \"species\")" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 6, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/plain": [ 59 | "[150x5] DataFrame\n", 60 | "\n", 61 | " petal length petal width sepal length sepal width species\n", 62 | " 0: 5.100000 3.500000 1.400000 0.200000 0 \n", 63 | " 1: 4.900000 3.000000 1.400000 0.200000 0 \n", 64 | " 2: 4.700000 3.200000 1.300000 0.200000 0 \n", 65 | " 3: 4.600000 3.100000 1.500000 0.200000 0 \n", 66 | " 4: 5.000000 3.600000 1.400000 0.200000 0 \n", 67 | " 5: 5.400000 3.900000 1.700000 0.400000 0 \n", 68 | " 6: 4.600000 3.400000 1.400000 0.300000 0 \n", 69 | " 7: 5.000000 3.400000 1.500000 0.200000 0 \n", 70 | " 8: 4.400000 2.900000 1.400000 0.200000 0 \n", 71 | " 9: 4.900000 3.100000 1.500000 0.100000 0 \n", 72 | " ... ... ... ... ... \n", 73 | " \n" 74 | ] 75 | }, 76 | "execution_count": 6, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "df" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 7, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "// DataFrameToXYs converts a dataframe with float64 columns to a slice of independent variable columns as floats\n", 92 | "// and the dependent variable (yCol). This can then be used with eg. goml's linear ML algorithms.\n", 93 | "// yCol is optional - if it doesn't exist only the x (independent) variables will be returned.\n", 94 | "func DataFrameToXYs(df dataframe.DataFrame, yCol string) ([][]float64, []float64){\n", 95 | " var (\n", 96 | " x [][]float64\n", 97 | " y []float64\n", 98 | " yColIx = -1\n", 99 | " )\n", 100 | " \n", 101 | " //find dependent variable column index\n", 102 | " for i, col := range df.Names() {\n", 103 | " if col == yCol {\n", 104 | " yColIx = i\n", 105 | " break\n", 106 | " }\n", 107 | " }\n", 108 | " if yColIx == -1 {\n", 109 | " fmt.Println(\"Warning - no dependent variable\")\n", 110 | " }\n", 111 | " x = make([][]float64, df.Nrow(), df.Nrow()) \n", 112 | " y = make([]float64, df.Nrow())\n", 113 | " for i := 0; i < df.Nrow(); i++ {\n", 114 | " var xx []float64\n", 115 | " for j := 0; j < df.Ncol(); j ++ {\n", 116 | " if j == yColIx {\n", 117 | " y[i] = df.Elem(i, j).Float()\n", 118 | " continue\n", 119 | " }\n", 120 | " xx = append(xx, df.Elem(i,j).Float())\n", 121 | " }\n", 122 | " x[i] = xx \n", 123 | " }\n", 124 | " return x, y\n", 125 | "}" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 8, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "features, classification := DataFrameToXYs(df, \"species\")" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "# Train K Means\n", 142 | "\n", 143 | "Now that we have pre-processed the data we will use the K Means algorithm to group it into 3 clusters - one for each species." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 27, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "import (\n", 153 | " \"gonum.org/v1/plot\"\n", 154 | " \"gonum.org/v1/plot/plotter\"\n", 155 | " \"gonum.org/v1/plot/plotutil\"\n", 156 | " \"gonum.org/v1/plot/vg\"\n", 157 | " \"gonum.org/v1/plot/vg/draw\"\n", 158 | " \"github.com/cdipaolo/goml/cluster\"\n", 159 | " \"github.com/cdipaolo/goml/base\"\n", 160 | " \"bufio\"\n", 161 | " \"strconv\"\n", 162 | ")" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 10, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "model := cluster.NewKMeans(3, 30, features)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 11, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "if err := model.Learn(); err != nil {\n", 181 | "\tpanic(err)\n", 182 | "}" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 25, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "// PredictionsToScatterData gets predictions from the model based on the features and converts to map from label to XYs\n", 192 | "func PredictionsToScatterData(features [][]float64, labels []float64, model base.Model, featureForXAxis, featureForYAxis int) (map[int]plotter.XYs, map[int][]float64) {\n", 193 | " ret := make(map[int]plotter.XYs)\n", 194 | " labelMap := make(map[int][]float64)\n", 195 | " if features == nil {\n", 196 | " panic(\"No features to plot\")\n", 197 | " }\n", 198 | " \n", 199 | " for i := range features {\n", 200 | " var pt struct{X, Y float64}\n", 201 | " pt.X = features[i][featureForXAxis]\n", 202 | " pt.Y = features[i][featureForYAxis]\n", 203 | " p, _ := model.Predict(features[i])\n", 204 | " labelMap[int(p[0])] = append(labelMap[int(p[0])], labels[i])\n", 205 | " ret[int(p[0])] = append(ret[int(p[0])], pt)\n", 206 | " }\n", 207 | " return ret, labelMap\n", 208 | "}" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 18, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "/**\n", 218 | " NB. This is required because gophernotes comes with an old version of goml. When it gets updated we can remove most of this.\n", 219 | "*/\n", 220 | "\n", 221 | "type LegacyXYs plotter.XYs\n", 222 | "\n", 223 | "func (xys LegacyXYs) Len() int {\n", 224 | "\treturn len(xys)\n", 225 | "}\n", 226 | "\n", 227 | "func (xys LegacyXYs) XY(i int) (float64, float64) {\n", 228 | "\treturn xys[i].X, xys[i].Y\n", 229 | "}" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 26, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "scatterData, labels := PredictionsToScatterData(features, classification, model, 2, 3)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 65, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "\n", 248 | "func PlotClusterData(labelsToXYs map[int]plotter.XYs, classes map[int][]float64, xLabel, yLabel string) ([]uint8, error) {\n", 249 | " p, err := plot.New()\n", 250 | " if err != nil {\n", 251 | " return nil, err\n", 252 | " }\n", 253 | " p.Title.Text = \"Iris Dataset K-Means Example\"\n", 254 | " //p.X.Min = 4\n", 255 | " //p.X.Max = 9\n", 256 | " p.X.Padding = 0\n", 257 | " p.X.Label.Text = xLabel\n", 258 | " //p.Y.Min = 1.5\n", 259 | " //p.Y.Max = 4.5\n", 260 | " p.Y.Padding = 0\n", 261 | " p.Y.Label.Text = yLabel\n", 262 | " for i := range labelsToXYs {\n", 263 | " s, err := plotter.NewScatter(LegacyXYs(labelsToXYs[i])) //Remove LegacyXYs when gophernotes updated to use latest goml\n", 264 | " s.GlyphStyleFunc = func (ii int) func(jj int) draw.GlyphStyle {\n", 265 | " return func(j int) draw.GlyphStyle {\n", 266 | " var gs draw.GlyphStyle\n", 267 | " if j >= len(classes[ii]){\n", 268 | " gs.Shape = plotutil.Shape(10)\n", 269 | " } else {\n", 270 | " gs.Shape = plotutil.Shape(int(classes[ii][j]))\n", 271 | " }\n", 272 | " gs.Color = plotutil.Color(ii)\n", 273 | " gs.Radius = 2.\n", 274 | " return gs\n", 275 | " }\n", 276 | " }(i)\n", 277 | " //s.Color = plotutil.Color(i)\n", 278 | " //s.Shape = plotutil.Shape(i)\n", 279 | " p.Add(s)\n", 280 | " n := strconv.Itoa(i)\n", 281 | " p.Legend.Add(n)\n", 282 | " if err != nil {\n", 283 | " return nil, err\n", 284 | " }\n", 285 | " }\n", 286 | " w, err := p.WriterTo(5*vg.Inch, 4*vg.Inch, \"jpg\")\n", 287 | " if err != nil{\n", 288 | " return nil, err\n", 289 | " }\n", 290 | " if err := p.Save(5*vg.Inch, 4*vg.Inch, \"Clustering Scatter.jpg\"); err != nil {\n", 291 | " return nil, err\n", 292 | " }\n", 293 | " var b bytes.Buffer\n", 294 | " writer := bufio.NewWriter(&b)\n", 295 | " w.WriteTo(writer)\n", 296 | " return b.Bytes(), nil\n", 297 | "}" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 66, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "image/jpeg": "/9j/2wCEAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDIBCQkJDAsMGA0NGDIhHCEyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMv/AABEIAYAB4AMBIgACEQEDEQH/xAGiAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgsQAAIBAwMCBAMFBQQEAAABfQECAwAEEQUSITFBBhNRYQcicRQygZGhCCNCscEVUtHwJDNicoIJChYXGBkaJSYnKCkqNDU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6g4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2drh4uPk5ebn6Onq8fLz9PX29/j5+gEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoLEQACAQIEBAMEBwUEBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXxFxgZGiYnKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo6ery8/T19vf4+fr/2gAMAwEAAhEDEQA/APf6KKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAqjrOppoui3uqSwyzRWkLzyJFt3FVBJxkgdAe9XqwPHB/4oPxAgyXk064jRQMlmaNgAB3JJAoA1dOvRqOmW18InhWeMSqkhG4AjIzgkdPenWt01wsxktZrfypWjHnbfnA/jGCflPbOD7CvPrXT9O1XxL4bivbaKe3bw7JE4kXjzA8G1T/tDa5A6jaSOlVFtoJ9N1OJbyGzKeJJZ4POtvOt5BsO0SqMfu2+bByPmAwcigD1KRikbMqF2AJCggFvbniuRt/iDbzaHHrb6Hq6aU28vdBIpBEqsVZmVJC+BtPIU8CtTwrcP/wAIjZXF3p8WlssRL2yZCRAE8gEAhSBkAjgHFcb4W1ZLL4UDT47ee41TyblI7Jbd2ZnaSTYCMcA5HJwADnNAHpUFxDdW8VxBKskMyh43U5DqRkEHuMU1LqCW5mtklVpoQpkQHlA2cZ+uDXj+tadfaZ4OfQZLCM6jpvh6HyLoWrzySyjduWF14TYVXnkncvGAK2r17fT9Z8a3iaRHez3VtbvAvkEieFowshDKMlepYDk4+lAHpmRjOeKMjGc8V5Otokj6/aNeRWNq95ZXFtImnMtqxEYJLQk/6slACcgZxzTlS9uU0CTUtMstP0tI76KWL7DJNaCYyqEk2ZUqrqHKlum4+ooA9Q+1QfbPsnmr9o8vzfLzztzjP0zWbqPiCPTde0rSXs7iWXUi4ikjKbF2LubdlgeBzwDXKaHpNvpXjnTVuSbxxoUUFvfzW5DSujsCckHDbCucnOKm8cWqah4y8KWj3Nxbxk3Ymlt22tGrxbR82Pl3H5QevpzQB1lhq66je3UMFrMbe3bZ9rJXypHH3lXnJweCcYyCM5Bql4q8WWfhGztry/t7mS2mnELSQBT5WQTuYEg7QFYkjPArI8NXmrafbXHhO78uPVLCIJYXssBMF3Dj5HwpHzADDKCOmRwadrVtcXMOj2HiWexuTcX0qS/ZYWiTyWtZk5DMx6uBnOPmFAGv4p8VWHhLRDql6k00ZdY0jtwGdyfTJA4AJPPQVsxuXjRnQxswyUYgkH044rxvVdP1k/DXU7bWYpGuNLjGlWPBLXIWVSZgO+6NYwO/yv61u+ILF9V8Uamt1epbWt3Z2w028Nk85jILlvKcMAkm7B9SNvpQB6VnnFGecV5hq880/ieO5FhFa3Frr1vHJIto/myQ7AvmtNnaEYEjbjHHXOapW9jZwLbX8NpDHex+Lpds6RASC3aZ8/NjPllT9MEUAeuZGcZ5rC1PxKNO16y0ddMvLq5vIpJYjC0YUiPbuyWcYPzLXN+FY3s/F7xwRQX9tPHcytftbNFdWrNKrGGZuj5JG3ocL0wM0/xY0J+Ivhx5bqe2ihs7wSzQdULGLaCcHGcH8qAOy06+e/gkeSzuLSSOQxtFPt3ZGOcqSCDn1q5nAya4HU49P1zwpBpFvJJPfWs0Vtp97MuGM8aIwmyR90Z+b1wV5JANK2v4JdC0e2k0eDz5b+RNSkuLZp0huQr5kKrjeHbhG4XDDHQCgD0vpRketeR6JY2123g621a0hnSEalDPHcwfKiiT92rK3QYA2g0mgafZWVp4Mure0ihvU1e6t5ZVjAkFvi4Cq567MGHGeOUx2oA9cJ6gYz6Vl6BrX9t2VzO1v5DW93PaOu/eC0UhQkHA4JXPSvOPDVnavaeHX0m2jXVYdaujdzwxYKWwluAwlYD7pG0AE9duOlSKlzLpsLIA1gnie+mvFe2adfKZpzG7opBaPcUOenKt0FAHrFFYvhSxXT9BSCK8F3CZZZInERjVVZywRVJJ2rnA56AVtUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFUNXSxNkJNQYi3icOVBPznkBcDlsk8L3OKztGs9Rh8JTQX0zpcyfaHj86Us0Ebu7Roz5OSilQTk9O9AHQUVzHhyG20nVJ9INo0F6baO4dlu5LhHXcy8F8FTkHjAzkcnBxpvZ6uXYrq8SqTwPsgOB/wB9UAalFZWmz3o1S9sby4juPJjikR1i2H594IIyf7o/OtWgAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAoPIIzj3oooAzdC0WHQNNFhbzzzRCSSUGYqSC7l2+6BxuYn8a0qKKACiiigAooooAKKKKAEJCgkkADkk0tU7+Ga4SONI45IS2ZUd8bgOg6HjPX6e9SWSTx2qx3G3cpIGGLfL2ySBziqtpcV9bFikyAQMjJ6ClrLura8ku3njihZ4yPs7NKRtHfI2nryD7YoirhJ2NPIzjIz1xS1lm2vPt32nyofM8zG/zT/qs4242+nzdfve1alElYE7hRRRUjCiiigAooooAKKKKAKWp6TYazbpb6japcRI4kVX7MARn8ifzqCLw5o8EUUUenwiOJZFRSCQBIMOOfUAZrUooAq2mm2di8j28CpJIAryElnYDOAWOSQMnAzxk4q1RRQBlWn/ACNGp/8AXtbfzlrVrKtP+Ro1P/r2tv5y1q0AFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAVVe9RL1bfYxB4aT+FWPIU+5AP6eoq1VN9MtXZywl+d/MYCdwC2Qc4zjsPypO/QuHJ9ouUUUUyAqG6uEtLZ53BIUcKvVj0AHuTgfjU1QXVnDeIiThiEcOu12Qhh0OQQe9NWvqJ3toFndC7tllClGyVdCclGHBB+hqeoLazhtPM8kSZkbc5eRnJOMZyxPYD8qnola+gK9tQooopDCiiigAooooAKKKKACiiigAooooAKKKKACiiigDKtP8AkaNT/wCva2/nLWrWVaf8jRqf/XtbfzlrVoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigArLlu5Vvi4W42RuI/KEDkOD1bOMcHGPYH1rUoqotITTZlR3kv23zCtz5bsY/KMD4VR0bOO5z+BHpTYL+ZGaaSK6dZUZ/K8hx5ZAyFGR3HH1HvWvRVcy7E8r7mdp88pmaKXzn3L5m94mUKe6gkDjkY79fStGimu6xozuwVVGST0AqG7spKyKuoSssaRJ5y+acNJFGzFF79AcE9B9c9qkspnntlaRWVwSrbkK5I4yAex61LFKk0SSxtuRxlT6ikmnit4w8zhFJC5PqafTlsLre5Rv7iZbgJEJ18pfMGyJmEjf3SQDxjOfqPStCNxJGjhWUMAcMMEfUdjTqKG01YaTvcy/tcv2/ftuNnmeT5XkPjGcb84x175xtrUooobTBJoKKKKkYUUVHPPFbQtLM4SNerHtRuBJWXfXUyXJMa3GIAGCpCzCYnqMgen6n2rUoqouzFJXQUUUVIwooooAKKKKAMq0/wCRo1P/AK9rb+ctatZVp/yNGp/9e1t/OWtWgAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAoopruscbSOwVFBLE9hQA6q95BJcxKiSKmHDNuTcGA7dR3xUEd+6JIbqPy2C+ZGo6sp4A/3s4BHuPWhb2VIHWaNBdqQojU8MW+7j265PsfSrUZJ3RDkmie0t5LcSiSVH3uXARCoXPXue+T+NMurSW4mR1liCKrDZJEXBJ4z94dsj8TUf2yY24jVU+2lvK2nO0NjJb1245/EDrVm1uUuod6kBgSrrnJRh1U+4ofMveBcr0Ftonhto4pJPMZRgvjGf1NS0UVDdy1oFFFFABRRRQAVVuraWeWF45Y1EeTteMsCTwDwR0Gfzp91M0MQ8sAzOdkYPQsfX2HJPsDS20xnhyw2yKdsi/3WHX/63sRVK61JdnoJaQvb2qQvIJCmQCFwMZ4GMnoMDr2qeiik3d3KStoFFFFIAooooAKKKhuluXt2W0liim42vLGZFHPOVDKT+dAFC0/5GjU/+va2/nLWrWFpKXieI9UF7PBNJ9nt8NDCYhjMvGCzfzrdoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiis3UYp7iZEEMjRIu9GjZQRJ2PJHTr759qTdi4RUnZuxpUVjzrfzskzWbiaJFaIJIu0Pj5s5P/Afpn1p8iXbXv2kWsnmIwWPDrt8v+IHnPPXp2X0qo+8KpHkSd7mrTXRZEZHUMjDBVhkEVmxJdC+FwbaQSO5WQl12+X2A5zx16d29a1KbVjNO5WGn2QBAs7cBhg/ul5Gc+nqB+VH9n2QUr9jt8EgkeUvOOnb3P51Zopcz7j5V2KcunWxgdYrS1D4JTdENobHU8fSotPsZLKQBY4Y4fL2sqEkswPDdBz1z68elaNFPndrC5Fe4UUUVJQUUUUAFFRXLSrbyGBA8uPkBPequnxS27vH5MiQEBgZGUnf/F0J69frn1qktLib1sW5reG4ULNFHIAcgOoOD+NEUENupWGJI1JyQigAn8KkqjqMck4jh8h5YGyZQjAE46Dkjvz+HvQtdLiempeoqC0MxtU+0KVlGQckZODweOORz+NT0mrMpahRRRSAKKKKACiiigDKtP8AkaNT/wCva2/nLWrWVaf8jRqf/XtbfzlrVoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigCvPeRW88ULhiZD1A4TsCfTJIA9z9aY+pW8eopZNu8xh94D5QTkhSfUgE/h9Mtn02O4eZjNMomUK4UjGAMY5HHf8zVd9CikaQm7uwJJRKyhx94Ywc4zxgd+1S+Y6Iqjb3matV4byOeeWJQwMZ6kcN2JHrg5B9/wqwRkEfyqnBpyQPCwmmYQgqgYjGCMenP8A9atFa2pzO99C5RRRUjCiiigAooooAr/bI/tv2XDbsfex8ueu3PrjnHpViqf9mx78+dNjzfN27h97OfTPt9OKuVUrdBK/UKKKKkYUUUUAFFFFABRRRQAUUUUAFFFFAGVaf8jRqf8A17W385a1ayrT/kaNT/69rb+ctatABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRTIpY5lLROrqCVypyMg4I/On0A1bcw9f1GW0ltbeO8jsxOHLTum7btxjAPBznFUv+EgnuLG0ZZkgmKNM5KZWTaxUIPZiDk9gPeupoqHF33OqFenGCi4Xa/4Pl/ViK2nW6tYrhAwWVA4DDBAI71nzaiyX5IkIhjcRNFs5fPVhxng4/AN14rSaWNG2tIin0JxT6ppmMZxTegUUUUzMrahd/YbCW42lig+VfUk4H4ZIrKtNWcWs0DXP2iZGVVuBHhSGzyR2xhuO+B61vUVcZJKzREotu6ZU0+4M8LKzmR4m2GTGN/cH8iM++aj1G6eN44I5fJdgXMjLkYHQfif0z04q/RSur3sOzta5FbTi5to5gpXeM7WHI9qoXWotFeMRIVjgIDx7MmTPJI47AjGOpyK1KKE0nsDTa3Cmu4jjZ2ztUEnAzTqKkozLC+eW42SPv8ANUyKAv8Aqv8AZJ+mOfXPtWnRRTk03oKKaWoUUUUhhRRRQAUUUUAFFFFABRRRQBlWn/I0an/17W385a1ayrT/AJGjU/8Ar2tv5y1q0AFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAVFcpLJbSJDII5GGFcjOPepaiuZvs9tLMVLbFLbR3xQxxvdWK9laS20rkmJYmVQI4weCOM8+2B+Aq7VeC4kknkhlh8tlVW4bcCCSP6UtzO0HlKkfmPK+xRux/CT1/CkrJFzUpS13J6KhtpzOjlo9jI5Rlznn61NTIaadmZt5YzTNdBIbdxPHsDSOQVOCP7p/nWlRVJ751kcC3JiSVYi+8ZycDOP+BU5S01CFNtuxdooopCCiiigAooqtd3LwBFhi82VskJnHyjqf6D3IppX0E3bUs0U2ORZY1kQ7kYBlPqKdSGFFFFABRRRQAUUUUAFFFFAGVrWuDR/IUabqN8824hbKASFQMZJyRjqK86tIFNhbnUrDx7LfmNftEr3MqwtJgbiU8zaEznjbjHau/wDEerjTrZLdYNVkluVfa+mwCWSIDGW5yB94dQa87sZp20ywgn1DxvcWBWFAj6dBFFIMqFBcJuVScc5zjvQB6/RWfrjzx6FfS207QTRwO6SKqkqQpPAYEdu4qDTpru78IWk6uXvZrBHD8AmQxg59OtAGvRXO+HXNvdzWF3eapJfmFJmhv3jbC5I3IY+MZyDzkcdM83nbXt7bItN2Z+XdI+ce/wAtACWn/I0an/17W385a1awtJN4fEeqfbVgWT7Pb4ELEjGZfUCt2gAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKycyeb1u/P+0dNr7Nm/wCm3G3/ADmk3YuEOY1qZLEk0TxSDKOCrDOODT6oa39p/sS8+x+Z9o8o7PL+9n2x3oewU4800k7FiG0it3Z08wswAJeRnOBnHUn1NOnt47hVEgb5W3KVcqQcEdQQehNc9brNHc3It5dUktNkOWuFl3BvMO/buGfu9cVeLPnhr37N54ydsm/bsPtuxuxUqR0SoyUr839W/M1III7dCkYbBJYlmLEn6nmpKqWBYwvkylfMOzzQQ238efXrVO/ydTAma+W38kbfswkI3bjnOwemOtXFXOaaak7s16rNYW7zGUiTcXDkCVgpYYwducdh2rKU6i1vbSNb3LTQQRsuGUeZJtG7dlh/u8jjJPpViRrw332gWtxlGCIgddpQ/eJ+br3/AOAj1NS3rY1hTaV1K1/M1qKKKowCiiigAqGW0t53Dywo7AYDEc49KmooTtsDVxkUUcEYjiRUQdFUYAp9FFABRRRQAUUUUAFFFFABRRRQBXuryG1kto5hJm5l8mPZEzjdtLc4B2jCnk4H515gmo2LrDbjXPE82ioyNFYvojojKpBVGnMIPl8AckcdWIr1evPbPXNah8QR6BYRzXcMJEc8muFLdnUDBMO355B7lCD/AHqAO6vbRb6ymtXkdElUo5TGdp4I5B7VTTQrdLK2tPtN4YLeBrdUExUFGULhtuMkAcE8itSigChZ6Tb2l5Je75Z7uSNYmmmfLbFJIUDgAZY9Bz3zV+iigDKtP+Ro1P8A69rb+ctatZVp/wAjRqf/AF7W385a1aACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKAIZrqK3dUcvuYEgJGzHAxnoD6inxSJNEksZyjgMpx1FQz28r3Ec0MqIyoyEMm4EEg+o9Kktofs9tFDu3bFC5xjOKWty2o8qa3JaKKKZBBeu8dhcSREiRYmKkDJyAccVRsb1Zr/AMuO5uJo2hL/AL6HZ0I6HaM9a03XfGyHgMCKq29pNFNE8s6OIojGoWPb128nk/3RUu9zaEocjT3/AK8v8ixNMlvEZJCQoIHAJOScDge5qu+p2qJuZpPv7AvkvuLYzjbjPQE9KmuoDcQbFcIwZXDEZ5Vg3T8KqSadcO6zC6jE6zCQN5JK/cKYxu9GPeh36BTVNr33/X3EqapaSIGR5GySu0QvuBGM5XGR1H51ZilSeISRnKn1BHt0NUINNngcyi6QzszszGH5Tu29Bu4+6O9XLaE28AjLbjlmJxjJJJPH40K/UKippe4/6+4moooqjEKKKKACmSyJDE8khwijJNPqC6tvtSIplkj2uHymOSOmcg98H8BTVr6ie2g+CZLiFZUztYdCMEeoPuKkqC2tvs3mfvpJPMfed+3g4A4wB6Zqeh2voCvbUKKKKQwooooAKKKKACiiigAooooAK5Ox8BWINtPrd5e67dwbWR7+YtGjjusQwg+pBPvWvrOmahqRgFlrd1paJuMhto4maQnGM+YrAAc9PWvP21mKWXyIPG3ii7dioVF0YeXLuOFG9bcDax43Bh7GgD1WiiigAooooAyrT/kaNT/69rb+ctatZVp/yNGp/wDXtbfzlrVoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACoLyZoLYuhUNuVQWGQMsBn9anpGRXUq6hlPUEZBoHFpNNla2mla5lhlkik2Ijho1K9Swx1P939atUyOGKEERRogPXaoGafSQ5NN3QVj6nqNzbXF0kM9rEsFsswE0ZYuSX4GGGPuDsetbFV57CzunD3FpBM4GA0kYYgenNEk2tC6UoxleSMh9Yuzaz3Sy2iLDs/ctGSzZRGPO4f3iOnapv7Sus+Z51r5f2vyPJ8s78eZszu3de/StF7G0klEr2sDSDGGaME8dOaX7Faef5/2WHzs58zyxuz65qeV9zX2tK3w/l/Xz3Mq21W6kjtJnltWS4ZgYkjIZMI7Dncf7uOnepm1C5ii3tJbyFrZ5htQjaRt6/Mcj5vbpV9LG0il82O1gSTn51jAPPXmnLa26btkES7hg4QDI9KEmKVWm3dR/IoyXlzC0sZlt3YeVtcRkAb32nI3c/mKs2k0skk8crxuY2ADRqVHIz0yalFrbqjIsEQVvvAIMH606OKOFSsUaICckKoFNJmcpxasl/Wg+iiiqMgooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigBDjBzjHfNeYrIt8dNmis/E1loISGD7aslv5dzEr5iMgJMipz98AHaTn1Hp/avKtP0u3uNGuUuNR8V2un29vA0SyX0X76CQlVYIoO0fKcKecdhQB6rRWZouj/2Lbzw/b7y9MsvmmW7k3uPlVcZwOPl/WtOgAooooAyrT/kaNT/AOva2/nLWrWVaf8AI0an/wBe1t/OWtWgAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiq8t0Y5/JS3lmYKGOwqAASR3I9DUkEqz28cyAhZFDAHrgjNFynFpXJKKKKCQooooAKKKguLnyGjUQyStISAqbewz3IoGk27Ir3tt513DI1pHcoqMu18cEleefoapnTX8kLJYwzH7KkS5K/Iw3Z6/UdPSry6jv8sJazs778oCmV2ttOctjr6ZqzBMLi3jmUModQwDdRn1qbJs3VSpTjb+vzFiUpEit1CgGsz7A3m5+xxeZ9o8z7RkZ279316cVrUU2rmcajjew2VS8TqvUqQKzbCyMFzC4sorcJAyOU25Ykrjp9DWpRQ1cUajiml1CqeoW5nEH7hJ1SXc0bYwRtYd+OpFXKKbVxRk4u6KlhAYIXUxLCGkLLGuMKPw4q3RRQtAlJyd2FFFFBIUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFAFDUdQeynsIIrYzyXdx5I+faEARnLE+mExj1Irzy1g0V/FV7eWXh28m0vTbxLea8OqSeTHMjAkrbltpWNmyT0BzgV6Hq2kw6xaLBLNcwMj+ZFPbSmOSNsEZUj2JHORzWfb+ENPs/D9volpNdQWUUgkkCuC1wd29vMZgSdx64wTnHSgDfooooAKKKhumuUt2a0iilm42pLIY1PPOWCsR+VAFC0/5GjU/+va2/nLWrWFpL3j+I9UN7BBDJ9nt8LDMZRjMvOSq/yrdoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigCvLaJLN5u+VH27SUfGR/k1LFEsMKRIMIihVGc8Din0UWG5NqzCiiufhvblr6FPtVy0hu5EkgMACLGC+OdnoF5zSbsaU6Tmm10OgoqtqDyxabdSW+fOWF2jwMncAccd+aoafdPLqZjju7m5t/JLMZoQm1sjGCEXtmhuzsEaTlFyXQ2KhntkuChZnVkOVZGwemKi1CSSOGMpJJGDIA7RpuIX6YP8qTT5HkE+6SSRFlxG0iBSRtU+g7k0X6CUZKPOmA06JQm2SZWTdhhIcncdxz+NWYYkghSJM7EUKMnJwKfWXfzyx3Eyi4uIlECtGI4gwZstnkqfReKHZDjzVHa5qUVjS3NyPPzcXCSqV8uNYQVb5VP909ye9bNCdxTpuCTfUKKKKZmFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQBlWn/I0an/ANe1t/OWtWsq0/5GjU/+va2/nLWrQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFAGVaf8jRqf/XtbfzlrVrKtP8AkaNT/wCva2/nLWrQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFAGVaf8jRqf/XtbfzlrVrKtP+Ro1P8A69rb+ctatABRRRQAUUU13WONnc4VQST6CgB1FQxXAkfYY3jbG4BwOR68VNQncL3CiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACisjXtTm09dPht9onvrxLVHcZCZDMzY7najY98VDper3d9o2pOURr6ynuLb92hw7ITtIGT1G04z1JoA3aKwfDl3dTtNHfaoLq4CI5gaya2eIHPJVjkgkEA4x8pq0+oamrsF0SVlBwG+0RjPv1oAS0/wCRo1P/AK9rb+ctatYWkzXE/iPVGuLRrZ/s9uAjOrZGZecit2gAooooAKbIiyxtG4yrAqR7GnUUAQxW+yTzHleVsbQXxwPwA9vyqaobqRordnTggjJxnaMjJ/AZP4VHaSmR5FWbz41A2ycdTnIyOOOPzqbpOxN0nYtUUUVRQUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFAFHVNMj1OGFHdo5IJlnhkXGUdehweo5II9CaqW+gvZ23kWupXESu08k7BULSySknfnHBUk4A46A5xWzRQBmWekGHVZdUublri8eBbcEIEREDFuB6knkknoMYrToooAyrT/AJGjU/8Ar2tv5y1q1lWn/I0an/17W385a1aACmGaISiIyoJD0TcM/lT6ptbSlnQeX5byCQuSdwwQemPbg5/lSbfQTb6Fyoo7q3mkaOKeJ3T7yq4JH1FSOu5GXJGRjIOCKyU0u4aO3hl8lEt4mjV4ySz5XbyMDA7kZPOKUm1shSbWxppPDIrMksbKv3irAgfWljljmXdFIrr0ypyKqPazTks4iRlUBVUkhsMDzwOOMY9zU8EcgmkmlCKzhV2ocjjPOcDnn9BQm7gm7kskiRLukdUX1Y4FKrK6hlIKkZBB4NQzxyNJHLGEZkz8rnAOffBwf/r062iMMW1sZLFiF6DJzgU7u47u5LRRRTGFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFct4nmvLXxF4WaC+uI4LjUTBLboQEdfImbnjJ5Ud8cdKAOpooooAKKKKACiiigAooooAKKKKAMq0/5GjU/+va2/nLWrWVaf8jRqf/XtbfzlrVoAKoPqDqXkEKm2SXymff8ANnIBO3HTJ9av1Xaxt2n84xnfuDEbjtLDoSucE++KmV+hMr9CqmqswimaBRayyGNHDktnJAJXHAOPXvVmK6d2i8yIIkoyhD5PTOCMccfWoJrSC3mSaJAsjScMzMUQkHJ25wCen1NSWUSK8vClo22BlJxjAOACTj0wPSpXNezZK5r2ZYnl8mIvt3HIUD1JOB+ppsMzvI8UqKkigN8rbgQc45wPQ1I6LIhR1DKeCDTYoUhB2A5JySzFifxPNXrcvW5JRRRTGFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABWHreh3Wrahpd1FfQwDTrj7TGjW5fc+x05O8cYc9uoHNbled6/Nc31x42uFvbu2l0O0Q2JjuHjWOQQmYuVBAbJIB3A8KR0JoA9DGcDJBPfAparafdG9021uyhQzwpJsPVdwBx+tWaACiiigAooooAKKKKACiiigDAwf+EsvywlMP2WDcIic5zJjpzjrWzbCQW6iTO7n7xycZ4z74xVC0/5GjU/+va2/nLWhLcxwttbeWxkhELYHqcVNrO4rWdzOvVmM9x8t0ZSB9lMRYIDjvjj72c7u2KhkS6+0yfLcm8M4Mbgt5IjyOvO3GAcg85rbVldQykFSMgjuKWpdO/UhwucwkN+zxII5xqBmbz5Jd5gZOePQjkYAHbtSW8F4kccdlFdwzrbuLgyk7GfbgbcnGdwBBHb8q3k1C3ecRAvyxVXKEKzDOQG6Z4P5UQX8FxIEQuCw3IWQqHHqpPWs1Tj3IVOPczLZZTK5tI75IDEPMW4LZLbgTt3c527skcZxWnagiSQqJRDgbRJnOec9ecdP1q1RWsYWNIwsVrsNmPIkMIJ3iPOfbpzj6e1OtQ4gAfd1O3ecttzxn8Knoqra3KtrcKKKKYwooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigArnNY8IRatcX7i/uLaHUoEt7+GNVImRcjgkZUlWKkjsexANdHXHeNdPC6bdS6dJOPEF66R2DJMwZJBtHy46IACzdsZzQB2CqFUKoAAGAB2paRc7RuIJxyQKWgAooooAKKKKACiiigAooooAikjbbK0HlpcMuBIybhkZxkAgkDPTIrM+xa4ZXkOqWAZk2fJYuOmcHmU9Mmtiik1cGrmRFaa9FEkaalpu1FCj/QJOg/7bU7yNf/AOglpn/gvk/+PVq0UwMH+x9W+zxQ/wBp2RWOQvtaybaeuBjzc4GfWnJpesxrahdVs2NuuF32TEE4xniUdsj8a3KKnkRPIjK8jX/+glpn/gvk/wDj1Hka/wD9BLTP/BfJ/wDHq1aKooyvI1//AKCWmf8Agvk/+PVU1GbxBYWqTfbtMfdPDDj7DIP9ZIqZ/wBd23Z/CugrO1u3lubCKOFC7C7tpCB/dWdGY/gAT+FAEfka/wD9BLTP/BfJ/wDHqPI1/wD6CWmf+C+T/wCPVq0UAZXka/8A9BLTP/BfJ/8AHqPI1/8A6CWmf+C+T/49WrRQBleRr/8A0EtM/wDBfJ/8eo8jX/8AoJaZ/wCC+T/49WrRQBleRr//AEEtM/8ABfJ/8eo8jX/+glpn/gvk/wDj1atFAHP383iCyW3b7dpj+bOkP/HjIMbjjP8Arqt+Rr//AEEtM/8ABfJ/8erSkijlCiSNX2sGXcM4I6Ee9PoAyvI1/wD6CWmf+C+T/wCPUeRr/wD0EtM/8F8n/wAerVooAyvI1/8A6CWmf+C+T/49R5Gv/wDQS0z/AMF8n/x6tWigDK8jX/8AoJaZ/wCC+T/49R5Gv/8AQS0z/wAF8n/x6tWigDK8jX/+glpn/gvk/wDj1VLybxBa3NhD9u0xvtc5hz9hkG3Ebvn/AF3P3MfjXQVnalbyzX2kPGhZYbtpJCP4V8iVc/mwH40AR+Rr/wD0EtM/8F8n/wAeo8jX/wDoJaZ/4L5P/j1atFAGV5Gv/wDQS0z/AMF8n/x6jyNf/wCglpn/AIL5P/j1atFAGV5Gv/8AQS0z/wAF8n/x6jyNf/6CWmf+C+T/AOPVq0UAZXka/wD9BLTP/BfJ/wDHqPI1/wD6CWmf+C+T/wCPVq0UAc/dy+ILa6sYft2mN9qmMWfsMg24jd8/67n7mPxq35Gv/wDQS0z/AMF8n/x6tJ4o5Hjd41Zo23ISMlTgjI9DgkfiafQBleRr/wD0EtM/8F8n/wAeo8jX/wDoJaZ/4L5P/j1atFAGV5Gv/wDQS0z/AMF8n/x6jyNf/wCglpn/AIL5P/j1atFAGV5Gv/8AQS0z/wAF8n/x6jyNf/6CWmf+C+T/AOPVq0UAZXka/wD9BLTP/BfJ/wDHqqTTeIIdTtLP7dph+0JI277DJ8u3b287vuroKzrq3lfXdOnVCYoo5g7ehO3H8jQBH5Gv/wDQS0z/AMF8n/x6jyNf/wCglpn/AIL5P/j1atFAGV5Gv/8AQS0z/wAF8n/x6ucvvBviW71ufVYPGsllLMix+XBp8bIigDhd5YgEjJGeT+FdxRQBkR2viBI1Q6rpzlQAWbT3y3ucTYp3ka//ANBLTP8AwXyf/Hq1aKAMryNf/wCglpn/AIL5P/j1T2kWqpNm8vLOWLH3YbVo2z9TI38qvUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABWP4ilurTTp76C/+yx20EkhXylfzHwNgOe3BGBgnIwa2Kx9a0OfVrm0mi1a6svsxLKkUcTqzHGGIdW5HY9smgC/p0txcaZaTXkIgupIUeaIHOxyoLL+ByKs1HbxvFbRRyzNPIiBWlcAFyBySAAAT7ACpKACiiigAooooAKKKKACiiigAooooAKKKKACiiigArmvGOvXWj6TMdNCNeRosrs4ysUZYLkj1JyAPYnnaRXRLLG0jRrIpdfvKDyPqKwNf8JaD4l+0RXtvCbuRFVplx5qqDkf596AOiopkUUcEKQxIEjjUKqjoAOAKfQAUUUwSxtI0YdS6/eUHkfUUAPooooAKKKKACiiigAooooAKKKKACiiigArkb3xG48RahZvevYWlg0Efmi1aRZJJMMd7YIRcFR1HJPPauurE1LwxaanPcvJNPHHdrGt1EhXbMIzlc5BI9DgjI/A0AbdFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAcd4wtdXGq2OoW+kLrmlRQSR3Om+aEbcxUiVVb5ZCApGDyM8daqWXi6wg0zQ7XQ4pVj1D7S6C7ikZrYRP86Mi5bIdgmM8Ducc9TeaO1zqIvoNS", 308 | "text/plain": [ 309 | "image/jpeg" 310 | ] 311 | }, 312 | "execution_count": 66, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "b, _ := PlotClusterData(scatterData, labels, \"Sepal length\", \"Sepal width\")\n", 319 | "display.JPEG(b)" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [] 328 | } 329 | ], 330 | "metadata": { 331 | "kernelspec": { 332 | "display_name": "Go", 333 | "language": "go", 334 | "name": "gophernotes" 335 | }, 336 | "language_info": { 337 | "codemirror_mode": "", 338 | "file_extension": ".go", 339 | "mimetype": "", 340 | "name": "go", 341 | "nbconvert_exporter": "", 342 | "pygments_lexer": "", 343 | "version": "go1.11.4" 344 | } 345 | }, 346 | "nbformat": 4, 347 | "nbformat_minor": 2 348 | } 349 | -------------------------------------------------------------------------------- /Chapter04/Use PCA For Dimensionality Reduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Use PCA For Dimensionality Reduction\n", 8 | "\n", 9 | "In this example we will use Principal Components Analysis (PCA) to reduce the dimensionality in the feature set from 4 to 2 in a way that best explains the output variance." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import (\n", 19 | " \"fmt\"\n", 20 | " \"github.com/kniren/gota/dataframe\"\n", 21 | " \"github.com/kniren/gota/series\"\n", 22 | " \"io/ioutil\"\n", 23 | " \"bytes\"\n", 24 | ")" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "const path = \"../datasets/iris/iris.csv\"" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "b, err := ioutil.ReadFile(path)\n", 43 | "if err != nil {\n", 44 | " fmt.Println(\"Error!\", err)\n", 45 | "}\n", 46 | "df := dataframe.ReadCSV(bytes.NewReader(b))\n", 47 | "df.SetNames(\"petal length\", \"petal width\", \"sepal length\", \"sepal width\", \"species\")" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "[150x5] DataFrame\n", 59 | "\n", 60 | " petal length petal width sepal length sepal width species\n", 61 | " 0: 5.100000 3.500000 1.400000 0.200000 0 \n", 62 | " 1: 4.900000 3.000000 1.400000 0.200000 0 \n", 63 | " 2: 4.700000 3.200000 1.300000 0.200000 0 \n", 64 | " 3: 4.600000 3.100000 1.500000 0.200000 0 \n", 65 | " 4: 5.000000 3.600000 1.400000 0.200000 0 \n", 66 | " 5: 5.400000 3.900000 1.700000 0.400000 0 \n", 67 | " 6: 4.600000 3.400000 1.400000 0.300000 0 \n", 68 | " 7: 5.000000 3.400000 1.500000 0.200000 0 \n", 69 | " 8: 4.400000 2.900000 1.400000 0.200000 0 \n", 70 | " 9: 4.900000 3.100000 1.500000 0.100000 0 \n", 71 | " ... ... ... ... ... \n", 72 | " \n" 73 | ] 74 | }, 75 | "execution_count": 4, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "df" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "// Standardise maps the given column values by subtracting mean and rescaling by standard deviation\n", 91 | "func Standardise(df dataframe.DataFrame, col string) dataframe.DataFrame {\n", 92 | " s := df.Col(col)\n", 93 | " std := s.StdDev()\n", 94 | " mean := s.Mean()\n", 95 | " v := make([]float64, s.Len(), s.Len())\n", 96 | " for i := 0; i < s.Len(); i++ {\n", 97 | " v[i] = (s.Elem(i).Float()-mean)/std\n", 98 | " }\n", 99 | " rs := series.Floats(v)\n", 100 | " rs.Name = col\n", 101 | " return df.Mutate(rs)\n", 102 | "}" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "// DropColumn returns a new dataframe that does not include the given column\n", 112 | "func DropColumn(df dataframe.DataFrame, col string) dataframe.DataFrame {\n", 113 | " var s []series.Series\n", 114 | " for _, c := range df.Names() {\n", 115 | " if c == col {\n", 116 | " continue\n", 117 | " }\n", 118 | " s = append(s, df.Col(c))\n", 119 | " }\n", 120 | " return dataframe.New(s...)\n", 121 | "}" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "df = Standardise(df, \"petal length\")\n", 131 | "df = Standardise(df, \"petal width\")\n", 132 | "df = Standardise(df, \"sepal length\")\n", 133 | "df = Standardise(df, \"sepal width\")\n", 134 | "labels := df.Col(\"species\").Float()\n", 135 | "df = DropColumn(df, \"species\")" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 8, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "[150x4] DataFrame\n", 147 | "\n", 148 | " petal length petal width sepal length sepal width\n", 149 | " 0: -0.897674 1.015602 -1.335752 -1.311052 \n", 150 | " 1: -1.139200 -0.131539 -1.335752 -1.311052 \n", 151 | " 2: -1.380727 0.327318 -1.392399 -1.311052 \n", 152 | " 3: -1.501490 0.097889 -1.279104 -1.311052 \n", 153 | " 4: -1.018437 1.245030 -1.335752 -1.311052 \n", 154 | " 5: -0.535384 1.933315 -1.165809 -1.048667 \n", 155 | " 6: -1.501490 0.786174 -1.335752 -1.179859 \n", 156 | " 7: -1.018437 0.786174 -1.279104 -1.311052 \n", 157 | " 8: -1.743017 -0.360967 -1.335752 -1.311052 \n", 158 | " 9: -1.139200 0.097889 -1.279104 -1.442245 \n", 159 | " ... ... ... ... \n", 160 | " \n" 161 | ] 162 | }, 163 | "execution_count": 8, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "df" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 9, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "import (\n", 179 | " \"github.com/gonum/matrix/mat64\"\n", 180 | ")" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 10, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "// DataFrameToMatrix converts the given dataframe to a gonum matrix\n", 190 | "func DataFrameToMatrix(df dataframe.DataFrame) mat64.Matrix {\n", 191 | " var x []float64 //slice to hold matrix entries in row-major order\n", 192 | " \n", 193 | " for i := 0; i < df.Nrow(); i++ {\n", 194 | " for j := 0; j < df.Ncol(); j ++ {\n", 195 | " x = append(x, df.Elem(i,j).Float())\n", 196 | " } \n", 197 | " }\n", 198 | " return mat64.NewDense(df.Nrow(), df.Ncol(), x)\n", 199 | "}" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 11, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "features := DataFrameToMatrix(df)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "# Create PCA\n", 216 | "\n", 217 | "Now that we have pre-processed the data we will perform PCA on the features matrix, keeping the first 2 components." 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 12, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "import (\n", 227 | " \"gonum.org/v1/plot\"\n", 228 | " \"gonum.org/v1/plot/plotter\"\n", 229 | " \"gonum.org/v1/plot/plotutil\"\n", 230 | " \"gonum.org/v1/plot/vg\"\n", 231 | " \"github.com/cdipaolo/goml/cluster\"\n", 232 | " \"github.com/cdipaolo/goml/base\"\n", 233 | " \"github.com/gonum/stat\"\n", 234 | " \"bufio\"\n", 235 | " \"strconv\"\n", 236 | ")" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 13, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "model := stat.PC{}\n", 246 | "if ok := model.PrincipalComponents(features, nil); !ok {\n", 247 | " fmt.Println(\"Error!\")\n", 248 | "}" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 14, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "variances := model.Vars(nil)\n", 258 | "components := model.Vectors(nil)" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 15, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "name": "stdout", 268 | "output_type": "stream", 269 | "text": [ 270 | "Component 1: 0.730\n", 271 | "Component 2: 0.229\n", 272 | "Component 3: 0.037\n", 273 | "Component 4: 0.005\n" 274 | ] 275 | } 276 | ], 277 | "source": [ 278 | "// Print the amount of variance explained by each component\n", 279 | "total_variance := 0.0\n", 280 | "for i := range variances {\n", 281 | " total_variance += variances[i]\n", 282 | "}\n", 283 | "\n", 284 | "for i := range variances {\n", 285 | " fmt.Printf(\"Component %d: %5.3f\\n\", i + 1, variances[i] / total_variance)\n", 286 | "}" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 16, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "// Transform the data into the new space\n", 296 | "transform := mat64.NewDense(df.Nrow(), 4, nil)\n", 297 | "transform.Mul(features, components)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 17, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "// PCA keeps top 2 components (one for x axis, one for y) and returns this as map from label to XYs\n", 307 | "// Matrix must have at least 2 columns or this will panic\n", 308 | "func PCAToScatterData(m mat64.Matrix, labels []float64) map[int]plotter.XYs {\n", 309 | " ret := make(map[int]plotter.XYs)\n", 310 | " nrows, _ := m.Dims()\n", 311 | " for i := 0; i < nrows; i++ {\n", 312 | " var pt struct{X, Y float64}\n", 313 | " pt.X = m.At(i, 0)\n", 314 | " pt.Y = m.At(i, 1)\n", 315 | " ret[int(labels[i])] = append(ret[int(labels[i])], pt)\n", 316 | " }\n", 317 | " return ret\n", 318 | "} " 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 18, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "/**\n", 328 | " NB. This is required because gophernotes comes with an old version of goml. When it gets updated we can remove most of this.\n", 329 | "*/\n", 330 | "\n", 331 | "type LegacyXYs plotter.XYs\n", 332 | "\n", 333 | "func (xys LegacyXYs) Len() int {\n", 334 | " return len(xys)\n", 335 | "}\n", 336 | "\n", 337 | "func (xys LegacyXYs) XY(i int) (float64, float64) {\n", 338 | " return xys[i].X, xys[i].Y\n", 339 | "}" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 19, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "scatterData := PCAToScatterData(transform, labels)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": 20, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [ 357 | "func PlotPCAData(labelsToXYs map[int]plotter.XYs, xLabel, yLabel string) ([]uint8, error) {\n", 358 | " p, err := plot.New()\n", 359 | " if err != nil {\n", 360 | " return nil, err\n", 361 | " }\n", 362 | " p.Title.Text = \"Iris Dataset PCA Example\"\n", 363 | " //p.X.Min = 4\n", 364 | " //p.X.Max = 9\n", 365 | " p.X.Padding = 0\n", 366 | " p.X.Label.Text = xLabel\n", 367 | " //p.Y.Min = 1.5\n", 368 | " //p.Y.Max = 4.5\n", 369 | " p.Y.Padding = 0\n", 370 | " p.Y.Label.Text = yLabel\n", 371 | " for i := range labelsToXYs {\n", 372 | " s, err := plotter.NewScatter(LegacyXYs(labelsToXYs[i])) //Remove LegacyXYs when gophernotes updated to use latest goml\n", 373 | " s.Color = plotutil.Color(i)\n", 374 | " s.Shape = plotutil.Shape(i)\n", 375 | " p.Add(s)\n", 376 | " n := strconv.Itoa(i)\n", 377 | " p.Legend.Add(n)\n", 378 | " if err != nil {\n", 379 | " return nil, err\n", 380 | " }\n", 381 | " }\n", 382 | " w, err := p.WriterTo(5*vg.Inch, 4*vg.Inch, \"jpg\")\n", 383 | " if err != nil{\n", 384 | " return nil, err\n", 385 | " }\n", 386 | " if err := p.Save(5*vg.Inch, 4*vg.Inch, \"PCA Scatter.jpg\"); err != nil {\n", 387 | " return nil, err\n", 388 | " }\n", 389 | " var b bytes.Buffer\n", 390 | " writer := bufio.NewWriter(&b)\n", 391 | " w.WriteTo(writer)\n", 392 | " return b.Bytes(), nil\n", 393 | "}" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 21, 399 | "metadata": {}, 400 | "outputs": [ 401 | { 402 | "data": { 403 | "image/jpeg": "/9j/2wCEAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDIBCQkJDAsMGA0NGDIhHCEyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMv/AABEIAYAB4AMBIgACEQEDEQH/xAGiAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgsQAAIBAwMCBAMFBQQEAAABfQECAwAEEQUSITFBBhNRYQcicRQygZGhCCNCscEVUtHwJDNicoIJChYXGBkaJSYnKCkqNDU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6g4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2drh4uPk5ebn6Onq8fLz9PX29/j5+gEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoLEQACAQIEBAMEBwUEBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXxFxgZGiYnKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo6ery8/T19vf4+fr/2gAMAwEAAhEDEQA/APf6KKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKyPEOvxeHbKC6mtLi4SW4jtwINuQ0jBVzuYcZIrXrjviQw/sGxQZLf2rZvtUZO1Z0Zjx2ABJNAHX7j5e4qQcZ28Z+lQ2dy11YwXElvLatKgYwz4Dxk/wtgkZ+hNcZp+n6Td+PPEt1e21tIsf2aaGSRRgfuiHZT074P1wa5SK3b/hAfDswWC4vbPSHjbSr+0aWO6zt3IuOUm+UAdTz070Aera5q0ehaJe6rNBLNDZwtPIkW3dsUEnGSB0HrWQfGtvbNZvqWlajp9reFFivJ1jaHc+NoZkdimSQPmAHvS+N3eX4b64HiKTz6ZMiwj5m3tGQEGOpyccVzviWU+JPh7b+G9ISS4vrxLaFiIn2W6hkLu7Ywu0Ke+c4wDQB6RnnFQ293BdI7wSrIscjRsVPAZThh9QQRXm+rTy3HimK6WwitJ7bX4Y5HW0fzXh2bPMab7oRgcYxjjrnNQ6eLXTNIuLH+xrcyS+IrhLpprJnSCMyzPFIyKBvXG0Lzgbwc0AeqEgDJNFePpbyjw7bBpYBdWN7qYgsb+xd7adPOO2Mp1jO0rs6nBIAIq5d6fdanqU6anjR1utNshYNJZPO1q4DF0icMAsiv14yRt9KAPTorqCeaeKKVXkt3CSqDyjFQwB98MD+NYV94uSx1260n+ydQuJba0W9keHyivlFmXIBcEnKNwBmqHhW1trPxh4sR7VIryW9WZH8naXhMEOSGxyN4bv1z3qhfWb6l8Vr6BNQns4ZdEht3kgVcu3mylkDMpAbawPHOD2oA6L/AIS7Tpn06LT1m1C41G2+128NuF3eTx+8YsQFXLAcnOTgA4NaNjqMl3cT282n3VpJCqN++2FXDZwVKse6nrg+3NchFpMfhPx/a3q2zJojaKmmxTKpb7M8TlgrYHyqynO48ZXnqK2dU8UwJp2oGwZmljjRIJzGfKeWQsFAOMMFwGYjgA0AWdE8UWWu6lqlhbxzRzadIEfzQAJFOQHTB5UlWGfaneIPEMfh8WHmWVzdG9ultIxAU4kbO3O5hxwea4+a0ufCPi7w3fy3VvPZzWx0mf7PbMhWIDfHI2XfIDDBbjG8+taXxHKvB4dTzJE261byu8QyyRqG3P0OAMjn3oA6fT9SlvLieCfTbqyeJUf9+UIcMWHBRmHG05B9R61oVxUtzoM+iappV5LcarZujSSvMvLFzhUViAN3oeMcHjrWFZTS6R4b1ix1PTY7/WraS3gupWjMiT25ZFWT5QCyRofmQDqpJxuzQB6kDkZFJkeo5ryGW2Ty721uIYmt18UWUscYtTFGIWSEuyxtnCH589vvU3UtH0u0s/FjWenW0U1prFrJpxihAaLiAuYsDIG4SZ28cN70Aew5GcVladrJvtb1bTGtxG2ntEN4fcJA67gcYGDjtzXnGrWlpdX3jVLG2jk146jbtpzwxZljc28B3hgMquSSx4GM561e1xL2e68cJpwd55GsDiJGdpI12iYKFOW+XcCBzzigD06iuZ8I2MFsdRuLS9hmt7qVHEFvaNbxQsECnapJwTgEj1z6101ABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUVnPrunxX62UkkiStMIFLQOEaTbu2h8bSce/t1oA0aKzdU12z0eWCK5S8kknVmRLWymuDhduSRGrYHzL19aJte063nSGadkkYRkgxNiPecJvOMIWPA3Y5oA0qCcdaKyvEP/INh/6/rP8A9KI6ANWiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAge4cTtFHCXKqGJ3AdSf8KfBL50KybSu4dD2pj27NOZUnkjJUKQoUjgn1B9afDEIYVjDFgvc9TUq9yVe5JRRRVFBRRRQAUUUUAZmnaLDpupanfRXE7vqMwmmSQrtVgioNuFBHyqo5J6Vp0UUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAVyeoXg1LxPa2d1p2pLa2NyskTraOyTTYwrlwMBF3Z9yMnAHzdZRQBzmpXMd35V0ttrFvcpFOkDwwNuB3gYIwRyUUjdwRzXPXGka7cWOpW97bO+oavDYkyxAFIpECibnooUguM9c8ZPFeiUUAU73SdO1Jka+0+1umQYUzwq5X6ZHFYus6Do9na21xa6TYQTJf2m2SK2RWX/SIxwQM101ZXiH/AJBsP/X9Z/8ApRHQBq0UUUAFFFFABRRRQAUySWOGMySuqIOrMcAU+q13G7GCVE8zypNxTIBPBHGeM896GVFJuzJopop03xSK65xlTnmn1Vtkka5muHiMQkVVCEgnjPJwSO+Pwq1SQSSTsgooopkhRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFZXiH/AJBsP/X9Z/8ApRHWrXOa9fzvFDbnS7xI/wC0LUfaGMWzAuI+eHLYP079qAOjooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKzpZh5l3vvTGY2wq7lGPkU9x6k1ehYtDGxOSVBP5VKld2EncfRRRVDCiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKyvEP/INh/wCv6z/9KI61ayvEP/INh/6/rP8A9KI6ANWiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACqUlxcCG4mQRbIt3ykHJxV2q72MEm/cJMPncBKwBz14BxUyT6CafQnKqTkqCfpS0UVQwooooAKKKKACiq18wW3BZyi70DMG24G4Z57VHatEbuRYJjImwE/vS4ByfUnFTza2FfWxdorJiCfZ7YCOcXGY9xKOOcjdknj1rWoi7iTuFFFFUUFFFBIAyeBQAUVStruSSYeYqrFMN0BA5IHY+5HzD2z6VdptWEncKKKKQwooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKyvEP/INh/6/rP8A9KI61a5zXra/WKGV9QD2/wDaFqfJ8gDj7RHgbs9uKAOjooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAqKWcROqbHdmBICjPAxn+YqWoJoZGmSWKREZVZfmQsDnHuPSk720E720HwzLMGwrKVO0hhgg4z/WpKhgieLzDI6uztuJVdo6AdMn0qahXtqC8wqCaaVZ44oo0YsrMSzEYwR7H1qeopbdZXVyzqyggFWxwcZ/kKHe2gO9tCvFeTySyRC3UvH9795x1IGOParMEvnRByu05IIznBBx/SoVsRG7PFPMhYYbBBzyT3B9TU0MQhiCBmbknLdSScnp9aUebqKN+pDd3MkJRII/NlOXK5/gHX8ew9z7VYjkWWNZEYMjAFSO4qCSyiknabdKrsACVkYcDOOh9zUlvAltCIo920En5mJPJyeT9a0draAr31JaKKKkoKKKKACqeorHIkUcxcQu5WTazDI2twSO1S/bLXfs+0w7s7cbxnPTFT0t9hOzVjNtbW3e4d1ad0jZTHvnkYA49zWlRRTu+oJWCiiigYUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFZXiH/AJBsP/X9Z/8ApRHWrWV4h/5BsP8A1/Wf/pRHQBq0UUUAFFFRzTRwJvkbAzgcZJPoAOpoGk3oiSio4Z47hSY2zg4YEEEH0IPIqSgGmtGFFFFAgooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKAIbiV4wgjClnfaNx4HBP8ASq0s8/nCB3WM5X54/Q7vUf7NW5YUmUK+7g5BVipB+oqF4ra3BaQO/mYXDbpCcZIGOfeokmS0xYGcXUsZnaVQiMN23gktnoB6CrNQW7QHcIU2EY3Dyyh9uCBU9VHYa2CiiimMKKKKAKAjuPIEHkjHmZ37xjG/P8qv0UjMqKWYgKBkk9hSSsJKwtFUra6lebE6hUmG+DjBx6H3xg/iR2zV2qasCdwooopDCiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigArK8Q/8g2H/AK/rP/0ojrVrnNe0bS4oob2PTbNLs6hasZ1gUPuNxHk7sZycn86AOjqP7RCJ/JM0fm9dm4bvyqSspoZfskln9mcys5Im425JyJM56jg465HpzSbNIRUtzVqvdRSM0MsQVniYttY4BBBB57Hn/OaddzNBbNIoBbIUZ6AkgAn25qJHnhuo4ppVlWUHBCbSpH9P5cdc0MIJ/Ev67jreOXz5biZVRpFVQinOAM8k+vzH8hVmiimRJ3dwooooEFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFADZWKxOw6hSRWSs9wbu3SK+DBlUsHVWySrHtj+7WxVd7K3eXzNhV8YyjlePwI9aiUW9iZJvYfbSNLAGfG7JBx04JH9KS4jkYxPEFLRvuwzYB+Ujrg+tSRxrFGEQEKPU5/U0ksqwx73zjIGFGSSTiq6aj6alOK6VZppZgVzhPkR2UbSQedoHU1frMTzplltl2qCzPl1YHDMT6Vp1MGxRCiiirKCiiigApksSTxPFIu5HGGB7in1VvnWOKMySeXGZAGbft4+v1xRe2onsKtjbrIkmxiyHKlnY4OMdz7mrNU7RozdTCGYyRhEP+sLgHLZ5JPtVyhSctWEbW0Ciqt3JKSlvbMFmfJ3EZCKOpI/Ifj7VLbTefAH27W6Mv91hwR+dO2lwvrYlooopDCiiigAooooAKKKKACiiigAooooAKKKKACisLxPLdeTYWlrqL6b9suhA92kasyDY7ALuBAJKgZI7+uKwPEN7r3hzw+1vc38t4zz26W2opEEkGZkDJKF4BKlsMAAeQQDjcAd5RRRQAUUUUAFZXiH/AJBsP/X9Z/8ApRHWrWV4h/5BsP8A1/Wf/pRHQBq0UUUAI6LIjI6hlYYIIyCKihtIYGLRqdxGNzMWIHoCTwPapqbJIkMTyucIilmPoBQNN7IjnuBBsARpHc4VExk9+/Apsd2jJKZFaExcyLJj5R1zkHGPeq0skss1uWia2csfKdsMOnKsAeMgZ69hz2LL+ymksLjMnmTSlA5RMYQHoFzz3OM5OT7VN30NlCOil1/z+4uw3kM0mxd6sRkB42TI9sgZqeufjDJcC5jtI4IoEJ2x7lWSQ/KuQVGMAtkj29K1VkuYbiKOd4pFlyAUQqVIGfU5GAf8mhSCpSUX7pboooqjAKKKKACiiigAooooAKKKKACiiigAooooAKKKKAKt66p5AeUxI0mGbdt42sev1Aqkqvdz3EdvfNsjMbKchxnqefwrXqncrC1wFazWeQrnJVeBn3qJR6kSiFis0iJczujM8Sj5Vx7/ANauVUS6URRqlu+SWQRrtG3acHvjFTQzedv/AHboyNtKtj0B7E+tONrWHG2xLRRRVFBRRUVxOttA0rBmA4CqMlieAB7k0JXDYlrIu7iO51Q2DQSTLHFvKoVGSeO5HQEf99A+lWPtE08U88EyeWvKfJnI2g+vvVCGx1LyLdtloJ1l85pvNbLE/e429CCRjPHHoKuna7uZTlfRGtZNO1qv2iNkkXKncRlgDweCRyOasUUVLd2aJWRDLZ20775reGR8Y3OgJx6frToYIbdSsMSRqTkhFABPrxUlFF3sFluFFFFIZBJdxx3Udu27c46gcL6Z9M4OPpU9Z8lhcutwv2mHEzFsmEll/u87u2B+WavoGCKGYMwHJAxk1UkuhKb6i0UUVJQUUUUAFFFFABRRRQAUUUUAQ3V1DZWslzcSCOGJdzuRwo9a5aD4neE7q1gkttUimuJwmy0jO6Ys2Pl2jvzXXnpx1ryTTvFIudPtGl+IccNxJGm6wttPjCIxA/dKdu7A+6DnPegD1uiiigAooooAK5zXr64eKGBtKvEj/tC1H2hmi2YFxHzgOWwf93PPOK6OsrxD/wAg2H/r+s//AEojoA1ahuZjbwFwu5iyqq5xkkgDn0yampksSTxGOQZU+hwfqD2NA42ur7EEU063IguBGS6F1aPIHBGQQfqOfr0qeWNJoXicZR1KsPUGo4bVYpDIXkkkxtDSHJA9B2H9axZrqRpJUimK36Ena8jcncdqKmcEYAyfQ59xLdtzaNP2kvd6GnJE8KefdXBljtwXVQgBJAPJ9T9MCs+bUrw3rQOfIckiNEdSowoYlyy9MHse341b+0z3lrMfsiGEbo5EMh3MRkMF49QQOmfamPpMd3Jb3hnM0iL+6M0SsMH1AAJ/P+tS7vY1hyxf73+vu/Et2VwuoWIMsO1mULLE46EgHH0wR+dSQ2cMD70DlsbQXkZ8D0GScdqW3g8hXy5eSRt7sRjJwB07DAA/CpqtLuc0patR2CiiimQFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFV0voH2YMmHxtYxMAc9OSMVYqlHb3IgggYRbItmWDHJ247Y9qlt9CW30LtQywM8wlSZo2C7eADkfjU1ZFxc2oW88y9CTKWCr9oK4wOOM0SaS1CTSWpdFo6CPZMd6szbmUHO45PAxRG0ds0gmuY/Mdt5yQvYDpn2qyv3Rnriqv7yK5mb7M8iuQQylfQDuR6UmrbA1bYsNLGiB2kVVPRicA0qSxygmN1cDupzVMQyR2tiPJLtDjegIyPkI7nHU1Lbq/nTSNC0QbaAGxk4+hNNNgmyzVW5t5pbiGSOaNRHk7XjLZY8Z4YdBkfjVqirTsNq5Qt9Ljj84zrBKZHL/LFtxkDI5Jzzk/jV8AAYHAprusaM7sFVRkk9hVNNUjlkiEaMY24dzx5bE4CkHvkEe3HrRyt62F7sdCW7lkVVhgIE8vCkjIUd2I9v5kUR3a/ZGmlGwx5Eijnaw6gevt65FPmtIJ3V5YlZlGAT1AqCTT1CqLYpDiQSMCm4OQMDPI6cH8BVLltZifNe6LFvOLiESBWQ8hkbqpHUGiC4iuULwuGUMVJx3H+fxHNVJLS7CXBSaNnm2ggAxgY4JzzyV4z7Cn28NxHd7zDBFCY9rKkhbkfdwNo7ZH5elDiugJvqXaKKKgsKKKKACiiigAooooAKKKKACiiigAooooAK810zxBqKTXWlyNq91qdwluqwT6WyQ2zl2WRQRGEEQXb8xJyOhJr0o9OOteR2NwJ9Otpr+78eyXskaNM32eZLcuQCeNgUR5/DFAHrlFFFABRRRQAVleIf8AkGw/9f1n/wClEdatZXiH/kGw/wDX9Z/+lEdAGrRRRQAVBNarLJ5gkkik27S0bYyPQ/mffmp6KBptbGZcWaW8aw27SqJ22eUHwrHBJJJBI4BzgjP45q1bySLM1tMI9yoGVoxgEdOnbH9almhSePY+cZyCDgg+oNNgtkhZn3O8j4DO5ySB0HsOam2pq5qUbS3/AK/r+tJqKKKoxCiiigAooooAKKKKACiiigAooooAKKKKACiiigAqK5kaG1mlXG5EZhnpkCpaa6LIjI4DKwwQe4pPYGZy39wssBkTMMrbAwjC9eAfvn+VSSR3H2a5gWEHzN+1t4A+bP8AjTU0vYYwJE2owI+Q7uCD1z7elaNRGLe5mk+pVe4m3TeXDGyxHBLSEE/KD0wfWrCNvjVwMbgDUD2hZpStxKgkOWVQuOgHcHsKjhuZhBbuYI1icKBiUkgHpxtp3s9Sr2eot1fww287LMnmIrYB/vD/AOvUb3FxBexw/POD97CqOoOMcj+7SvBcm1ntxFEVkMmGMnZiT0x71WkgeKe4urWC3jEQ29gTgZOPl46+tTJshtmrDKJoUkAKhhnB6in1HbxiK3SMEnaOp70y6u4rRVMhPzHGFGcDuT7AZJNaxTZpeyuyterdXMvkQGEIhR3EmfnHPHHbIFVLPTbiaOd5rx1jusu8cYU/MevJXpjAH0Far2sUkhkPmBiACVkZcgfQ+9JJi0sXMQ4ijJUEk9B3pqcokOF3dkygqiqWLEDGT1NLVYPcRzxpI8Tq+R8sZUjAz6mlu7lrdVEcRmlY5CA4OB1P+e5A70l7zLvoWKKbHIssayIwZGAKkdxTqBlW7u2tyqxxea333APKoOp9z6DvSW9+lxcNGq/uyD5cmciTHDY+h/OllsRJLLKtxPE8gAJRhxjpjIPv+dQQaStvFBEl3cmOBgyBtn5Z25xgkfQ1p7liPeuaNFFFZlhRRRQAUUUUAFFFFABRRRQAUUUZGcd6ACvMrSVft1sEvfFLeJfMj8+CWO5+zZ3DeCCvlCL72GB6dCTXppzjjrXkdlqbRWNtqmfHE2tFEmlU2V01tK/BZBHt2BDyBgZAOc0AeuVwN1eXgv7+8Eswu4PEdrZRRiQ4EDLAGTHTBV2c++PQY6vRdai1y3nnitby2WKXytl3btC5O1WztYAgfN+lWW02xe9W9a0gN0vSUoN3TAOfXBIz6E+tAHP68zXHieCxnmlisxpV1cDZIVzIrxDfwRygbI92z1ArlotW1W70C71S6kmTUrW00yS3QSEfPIqs3A4+d2ZD6gV6GdC0k2qWx062aBHd1jMYIDOSz/mSc+uall0ywnuIriWzgeaLGx2jBK45GPoenpQAl7b3k7IbW++zAD5h5Ifd+fSsXWbTUYrW2efVPOjF9abo/s6rn/SI+4rpqyvEP/INh/6/rP8A9KI6ANWiiigCveSvGkaxsEaWQJvIyFznn9MD3IqOKR4Ll4Zp/MQR+ZvcAFecc4wMenHY1akjSWNo5EV0bgqwyDVOezSOJPIgVlWQSPGMZkwD69SDgjPpUu5rBxa5WJezwy2PmpKklurjzdjAgrnkcfqO4470lksJujJZxGO2Mfzfuyis2eCAQO2cn3HpxSuJH/tdLhbCR1ERypHOcjDkDPQEgd+TxjmrWlXMMjXCLticyki3LDcnAzkdsnJ/H1pXuzZwcaen9f13NOkZlRGdjhVGSfQVWS/hkdQN4RzhJChCsfY/y9e1WWVXRkYAqwwQe4qrnM4uL94qpeuTG0lu0cUpARiwJBPTcO2fxq3VRLJwY1luGkiiIKqVAJI6bj3x+HTvVuhXHPl+yFFFFMgKKKKACiiigAooooAKKKKACiiigAooooAKKKKACqyWSoI182UpHjapIxx07VZopNJiaTKl20YnhE0pjiIbJ8wpk8Y5BHvUFvbRzi6WO4lMLSY+WTcCCq55OT6960qKTjd3E43YAYGK42aeQTy6kbtkuNxQwyRZ2pvOUznHTnHqOtdfLMkKgucZOBgEkn8Kj3W94kkTKHXHzo6EcH2P0q4z5WTUjzaXKmmNcwJHBdsDvXdD8u3aB/AeTyBg9fX0rQdFkjaNxlWBBHqDUKWFrHIsi28YdDlWxyDjHH4E1YpzabuVFWVmQpaxRyBx5hYdN8jNj8zTJLQvcmdbiaNigTChSMAk9wfX9BVmkZlRGdiFVRkk9hUrTYdkRW1uLaHyw7uNxbL4zyc9gBU1Vre+t7kqscnzspfYwIYAHByD054qzTd76grW0CiiikMKKKKACiiigAoqK3uYrpWaFtwVip4I5H+evepaGrAncKKKKACiiigArHnZ2nkufs8/2iJiIQIyRtHUZ/2v/ifStiiqjKxMo3EJwCcge5ryHTtQgvdJtbq41fxvc3EsSPJNFaTC0dyBkgiMJ5Wc85xjv3r1+vKtOnvIUs9Ni8UeIlsFEdtDcHRI0tH6KqiQR7gp4AfIHIw3epKPVaKKKACiiigArK8Q/wDINh/6/rP/ANKI61axfFcpg0MTBdxju7VgvrieM4oGld2RtUVRDTwS25e584TNtK7QAPlJyuO3HfPFXqSY5R5Qooopklee3d5Flhl8qUKVyV3Aj3H/ANesCC0m8t7Vnkhe3kV3lym0YAJcnqcjIHt15zTjqM6W6XiS7phIqPC0h3SNkBlCdFxkgd+Ofe9BfW19cwi4tYSzj9zLguM9du4qOfpnoazbTZ3xjUpRel1+Vv6/rYfZw3c9hZxziNI1WNyRncduCBjHHIGeT9B20GniWVYmlQSN0QsMn8KZdTNDECgBd2CLu6Ak4yaxLgXiQ3li9sJricPLFLGuN3PXknBAIxzxj2BLb5TGMfau70/rVnRUVl2ywC6g+xRsmAfPPllARjvkctnHv1rUqk7mM48rsFFFFMgKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAqOWeGHHmyxx7um9gM1JVeZZRcxSxx7wqMpG7HUr/gaT2ExJD9oEMts0UvlyE/f4PykdQD606FJftEksqou5VUBWLdCT6D1qNHa3VmljO6WU7UXk9P/AKxqaKcSuybHRlAJDDHBzj+RqVuJbktFVzewAnJkwGKlvLbAIOOuMdasVSaY7pkN1I8UO6MqGLquWGQMsB6j1qnMLq5ZIRLbvGQshwhAdeeOp4zj8M+tSanG08CQeQ8sUjjzdhAIUc9yOpAH50zTYPIlmUW8sUYx5ZkYHg5yo5PQ/wAx6VSi17yZDu5WKcdlqizyztHbea7iQYkIAYcenQr8v69a3aKKcpcxUY8ohOATzx6VhQX8f9oIwSf7XKQJUMLD5Tj5en8GVJ7fMfWt6uZFnqS5k8rfdRSb1cKozgn/AKacBstnj+L2FXSUXe5FRtWsdNRSA5AOMe1LWRqFV7jfc2TC1kQmQDD7uCpPOCPbODTL5pXg8uCN5NzbJPLZQVXGT1I57fjmmWIkjmlj+yvDbn503FeCeoABPHf8TVpWVyW9bCwQXCXSuY4IovL2MqOTnH3ew6c/nV2iipbuNKwUUVVvxK9uI4o2cOwWTaQCF74yR9PxzQldg3ZE8Usc0ayROrowyGU5Bp9UrNZUnlX7O0MDfMoYrw3cAAng9frn1q7RJWYJ3QUUUUhlS+1CGwNskqSyPdTeRGkabiW2s3PoMKxyfSuVvNH1DSYreG5127n0FJI4vssdlHvVNwCq8mclOgJC5xnJ71ueI49RS3tr7SrRLy6spvNFq0gj85SjIyhjwD82Rn+7jjOawRruv+Jiulr4Sv8ATIJcLd3d/JGqxpkbggUkuSMgHgAnPagDt6KKKACiiigArnNe0bS4oob2PTbNLo6hasZ1gUPk3EeTuxnJyfzro6yvEP8AyDYf+v6z/wDSiOgC9FZ20D74oURsYyB0HoPSpmO1SQCSBnA70tFA223dmbHNOLe2ujc+Z5zIDFtXbhiPu8Z4znkngH8J3v40Lny5TEhIeYAbVI6988dyBgfhUqWltHMZkgjWQ5O4KM89arvZzmOW3SVBbyliSVO9QxJYDnHc4Pb3qdUbXpyev9fcFxpVvcNKWLqJhiVUOA/9R26YqpBplyot4Jm3pAVCuWGAFIIwoA5OAOenOPfZAAAA6CijlQlXmlYqSXNncZt3JZWbbnawXcD0DYxnI7HORUsNrHAzMu9nYYLO5Y49MntVRbW5FtHZFYvJTaPN3HJVSMcY+9x6+/tWjTXmKdoq0XoFFFFMyCiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACq0ple6ESS+WuzccKDnn3qzVW7EJeMSWondshRtUkdz1NKWwpbDUWa4hRvNUSRTPhimQcFl6AjtUsMMizPLLIjsyqvyptAxn3PrRashR40gMAjbaUwB1APYkd6npJLcSXUotBc+TJAEi2M7Hf5hzgsT02+/rVm4laGHcihmLKoBOBywH9alpksSzR7HzjIPBwcg5H6ii1loFrLQjjlmM5ilRB8u4FWJ7/SpUdJFyjKwBIypzyDgj86qXEEkMLvbCWWdgEBLglQTyRuOOOv4UyzSSC5KR2ksNuyc72U4YYA6MTyP5e9XGPu3YrtOxfJABJIAHUmmQTx3MKzRNujboahvoppoVjhWNlZh5iu5XK9xkA9eB9M0lrHcpczNIkSRSYbakhbDdzyo6jH4/WnZWuO7vYsuxVGYKWIBIUdT7ViW12i3UcgR/tMkvlXAOACT2GTztxx7Bu9bNx5xt5Bb7POKnYXPAPvWE9ne/bVzYfuUj8sPHMrHH3twzglt2Ovv61dO1ncio3dWNS5uplmPkgNHB80/GSc9h7gfN+Q71PPK6WrywJ5z7cooP3vT8KghsopYlluLZUnfmQA/xd+hq3HGkMSRRqFRAFVR0AHapdilczrNWt7lFjgn2Sg+czgDL9d5578g/h6Vp0UUpSu7lRVtAoooqRhRUNzcx2sJlk3bQcYUZJ+gqVWDKGUggjII707dQv0FooopAFFFFABRRRQAUUUUAFFFFABWV4h/5BsP/X9Z/wDpRHWrXOa9fXDxQwNpN5HH/aFqPtDPDsIFxHzgSFsH/dzzzigDo6KKKACiiigCOaZII97k4yAABkknoAKbBcrMzJseORcFkcYIB6H0I+lVNRSWe5t4IpvJYBpVbGdzDAx+TH/Iqnpl5cSu11crvh3iCOVSOhIwSMDOSQM9scgc1LlrY6I0b0+bqbtFFFUc4UUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQBDdllsp2QkMI2II65xVVXt/tNuLe4ZyXIYecz8bW7EnvitCipauyWrsKhmhaRo3STYyE4OM9RU1FU1cpq5FBC0RkLSb2dtxOMdgP6VLRRQlYNivcFzNDGsrRhs5KgZ4HuDRas+6dHkaTZJtBYDONqnsB6mpZIY5gBLGjgcgMoNEcUcK7Y41Rc5woxU2d7k2dx9FRzTJbwvLIcKoyf8+tVbWaZJxFcyKzTAumMfKe6cdcDoe/PpVqLauNuzsXqKjnmW3haVgSF6KvVj0AHuTxUEt1vtozbn95P8seR09SR7c8e2KEmwbSJrm4S1t3mkPyqPzPQD6k4FVLO4ZJ/InuEleUF0KsOD/Eo9h1Htn0qe2cyI9vPh5YjtYkD5x2bHv/ADB9KS5eGyhMojjD8Kg4GWPAGapfykv+Yp6zfmAR2sMgWeX5vvhSFHuemcd/erenXqX9msqMrMPlfb0Df5wfxrKjlzq4t5L13fZtEylOp5K4KnHJ65/uDk1tWtstrGyK7vuYsS2Op69ABVzSjFLqTBtybJqKKKxNQooooAz9Q86FvtivGUhQ7UZSTuPfr+Htk+tVdJc2swt3uo5xMSyiPOIyBnaMn7uM46AY962qKtT92xDh71woooqCwooooAKKKKACiiigAooooAKyvEP/ACDYf+v6z/8ASiOtWsrxD/yDYf8Ar+s//SiOgDVooooAK56/mQ3LR3F7NbyfvCAkpU8Y2BR3yCT6kjHbFdDTXRZFKuoZT1BGRSkrmtKooO7RQslF1BJFcBZ40cAFwHGcAkZ74ORn+oqCOyldpbVLsxW1q6iOMIvYKwye6gnGOOnJrXVQqhVAAHAA7VFNZ29wwaaFHOMZYZyPQ+opcpSrWb7feFpMbizhmYAGRAxA6cipqKKoxer0CiiigQUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQA10SRCjqrKeoYZBqNLW3jcPHbxIw6FUAIqaindisitdW800sLxyogjJO14ywJ6A8EdOfz9qji09SZPtQgnDPvVfJwEJ64yT16/nV2inzO1hcqvcihtoLfd5EEcW7rsQLn8qkZFcYdQw9CM0tFK5VjOGjoImjW5mVWJJ2rHnJOeu3PXmtGiihyb3EopbBRRRSGFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABWV4h/wCQbD/1/Wf/AKUR1q1zmvWt8sUMr6iXg/tC1PkeSo4+0R4G7rx/SgDo6KKKACiiigBksqQRmSRtqjv/ACFRx3cEiOwfaIxlxIpQqPUg4IFJdRPIsbxbTJE+9VY4DcEYP4E1Wltbi5MkrrHG+ECJuyDtbd8xx3xj259cUm2axjFrVluG6huCRFICw5IwQceuD296quxmubkSXTwCHG0KwGBtB3nPXnI544qVFmmvI55IfJWNGXBYEsTj07cVPJBDMytLEjlDlSyg4+lLVheMH/TG2kjzWcMkgw7IC3GOcenapqKKozbu7hRRRQIKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigArK8Q/8g2H/AK/rP/0ojrVrK8Q/8g2H/r+s/wD0ojoA1aKKy5hb/abg3sbu+R5JCsTt2j7mOhznpz+GKTdi4Q5malFRWolFpCJ/9dsXf/vY5/WpaZLVnYKKiuJhbwlypY5Cqo6kk4A/M1HFcSmcQTxLG5UupR9wIBAPOBzyO3elcai2rlmiiimSFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUU15I4yod1Uu21dxxk+g96dQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFZPiEj+zoRkZ+3Wf/pRHWtXO69o+mRRQ3kenWiXR1C1YzrAofJuI8ndjOTk/nQB0VFFFABRUU86wIGYMxJ2qqjJY+gpILkTM6NG8UiAEo+M4PQ8Eg9D3ouVyu1x08K3ERjYkcggr1BByCPxFRw2xSbzpZnlk27VLAAKO+AB3wPyqxRSsCk0rBRRRTJCiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAzbqOWa5kElpM8artiaNk4JwS3LDBzjHHGPerts0r20ZnTZLj5l46/hUtFU5XViVGzuFFFFSUFFFee69d315ceM7iDULu1k0K0Q2axTFEEohMxZlHDg5VcNkYB96APQqKr2F0L7TrW7ClBPCkoU9RuAOP1qxQAUUUUAFFFFABRRRQAUUUUAFZXiH/kGw/8AX9Z/+lEdatZXiH/kGw/9f1n/AOlEdAGrRRRQBDcwNMqFHCSRtvRiMjOCOR6YJpsEEomaed1aQqFARcBR1/E//WqxRSsVzu1gooopkhRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFczrPhH+1bjUjFqL2tvqtultfxLEGMiLkfIx+4xVipODxjgEZrpq5TxS9zb+I/CjxXtykc2pGGSBH2xuvkTNyB15VepI4oA6pVVEVFACqMADsKWiigAooooAKKKKACsHUtT1LTNTsVcWs1te3q2scMcbCUKULF927BwVJI2/d71palpVhq9stvqNrFcwq4cJIMgNgjP6mspND1aLX/t0ep2JtF2pHbyWDF4YRjKI4lAGcfe256ZyAAAB3iHVL6xvLOG1", 404 | "text/plain": [ 405 | "image/jpeg" 406 | ] 407 | }, 408 | "execution_count": 21, 409 | "metadata": {}, 410 | "output_type": "execute_result" 411 | } 412 | ], 413 | "source": [ 414 | "b, _ := PlotPCAData(scatterData, \"Component 1\", \"Component 2\")\n", 415 | "display.JPEG(b)" 416 | ] 417 | } 418 | ], 419 | "metadata": { 420 | "kernelspec": { 421 | "display_name": "Go", 422 | "language": "go", 423 | "name": "gophernotes" 424 | }, 425 | "language_info": { 426 | "codemirror_mode": "", 427 | "file_extension": ".go", 428 | "mimetype": "", 429 | "name": "go", 430 | "nbconvert_exporter": "", 431 | "pygments_lexer": "", 432 | "version": "go1.11.4" 433 | } 434 | }, 435 | "nbformat": 4, 436 | "nbformat_minor": 2 437 | } 438 | -------------------------------------------------------------------------------- /Chapter04/download-iris.sh: -------------------------------------------------------------------------------- 1 | mkdir -p datasets/iris && \ 2 | wget https://raw.githubusercontent.com/scikit-learn/scikit-learn/master/sklearn/datasets/data/iris.csv -O datasets/iris/iris.csv 3 | -------------------------------------------------------------------------------- /Chapter05/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow 2 | 3 | # gcc for cgo 4 | RUN apt-get update && apt-get install -y --no-install-recommends \ 5 | curl \ 6 | git \ 7 | wget \ 8 | g++ \ 9 | gcc \ 10 | libc6-dev \ 11 | make \ 12 | pkg-config \ 13 | && rm -rf /var/lib/apt/lists/* 14 | 15 | # Install TensorFlow C library 16 | RUN curl -L \ 17 | "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-1.13.1.tar.gz" | \ 18 | tar -C "/usr/local" -xz 19 | RUN ldconfig 20 | # Hide some warnings 21 | ENV TF_CPP_MIN_LOG_LEVEL 2 22 | 23 | 24 | #### GOLANG #### 25 | 26 | ENV GOLANG_VERSION 1.9.2 27 | 28 | RUN set -eux; \ 29 | \ 30 | dpkgArch="$(dpkg --print-architecture)"; \ 31 | case "${dpkgArch##*-}" in \ 32 | amd64) goRelArch='linux-amd64'; goRelSha256='de874549d9a8d8d8062be05808509c09a88a248e77ec14eb77453530829ac02b' ;; \ 33 | armhf) goRelArch='linux-armv6l'; goRelSha256='8a6758c8d390e28ef2bcea511f62dcb43056f38c1addc06a8bc996741987e7bb' ;; \ 34 | arm64) goRelArch='linux-arm64'; goRelSha256='0016ac65ad8340c84f51bc11dbb24ee8265b0a4597dbfdf8d91776fc187456fa' ;; \ 35 | i386) goRelArch='linux-386'; goRelSha256='574b2c4b1a248e58ef7d1f825beda15429610a2316d9cbd3096d8d3fa8c0bc1a' ;; \ 36 | ppc64el) goRelArch='linux-ppc64le'; goRelSha256='adb440b2b6ae9e448c253a20836d8e8aa4236f731d87717d9c7b241998dc7f9d' ;; \ 37 | s390x) goRelArch='linux-s390x'; goRelSha256='a7137b4fbdec126823a12a4b696eeee2f04ec616e9fb8a54654c51d5884c1345' ;; \ 38 | *) goRelArch='src'; goRelSha256='665f184bf8ac89986cfd5a4460736976f60b57df6b320ad71ad4cef53bb143dc'; \ 39 | echo >&2; echo >&2 "warning: current architecture ($dpkgArch) does not have a corresponding Go binary release; will be building from source"; echo >&2 ;; \ 40 | esac; \ 41 | \ 42 | url="https://golang.org/dl/go${GOLANG_VERSION}.${goRelArch}.tar.gz"; \ 43 | wget -O go.tgz "$url"; \ 44 | echo "${goRelSha256} *go.tgz" | sha256sum -c -; \ 45 | tar -C /usr/local -xzf go.tgz; \ 46 | rm go.tgz; \ 47 | \ 48 | if [ "$goRelArch" = 'src' ]; then \ 49 | echo >&2; \ 50 | echo >&2 'error: UNIMPLEMENTED'; \ 51 | echo >&2 'TODO install golang-any from jessie-backports for GOROOT_BOOTSTRAP (and uninstall after build)'; \ 52 | echo >&2; \ 53 | exit 1; \ 54 | fi; \ 55 | \ 56 | export PATH="/usr/local/go/bin:$PATH"; \ 57 | go version 58 | 59 | ENV GOPATH /go 60 | ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH 61 | 62 | RUN mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 777 "$GOPATH" 63 | 64 | # pre-install tensorflow go library 65 | RUN \ 66 | go get github.com/tensorflow/tensorflow/tensorflow/go \ 67 | github.com/tensorflow/tensorflow/tensorflow/go/op \ 68 | github.com/petar/GoMNIST \ 69 | github.com/kniren/gota/... 70 | 71 | RUN mkdir $GOPATH/src/model 72 | 73 | WORKDIR $GOPATH/src/model 74 | 75 | ADD . $GOPATH/src/model 76 | 77 | CMD ["go", "run", "main.go"] -------------------------------------------------------------------------------- /Chapter05/Using HTTP to Run a Python Model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import (\n", 10 | " \"fmt\"\n", 11 | " mnist \"github.com/petar/GoMNIST\"\n", 12 | " \"github.com/kniren/gota/dataframe\"\n", 13 | " \"github.com/kniren/gota/series\"\n", 14 | " \"image\"\n", 15 | " \"bytes\"\n", 16 | " \"math\"\n", 17 | ")" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "set, err := mnist.ReadSet(\"../datasets/mnist/images.gz\", \"../datasets/mnist/labels.gz\")" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAACAklEQVR4nFyQPWgUQRzF3/8/s+ttTu4uJhKSFBH8gIBRYgKSQoRUNnaChXZWFloFrCwFO2uxUSs77bSwlCApYiEYTbAx4VJ4wZhcvN2dj7/M3l3uY4op5sd7895TCIfCNfsmUafHlh/Pr6fonYDmn25932mIiPzY8Lsv53oaVF5f4mbLeF098gKUkvjT3WP4cWbPa0tgw+DwKpM3NgAd2MJMQ6tkaoSNdhTZwx0Ld2+lo1x52PDKPa/vTO2yiU9eedDQFX0GhQluWSUl+8IsvL+aTIwfPHO69G/qQsf28rY6gQo+HM2uvL2p1xds2flfS5sFnPttWSV7uJhNPiFDS6hPe59ee1XAR0nTJaldHDsVTZg0rt0ebVUpXmzbrk6cq5S33GfvndLk+HCzrLj+rttz9Pz969vV/SiMSZxWv94pnotA+LOWLUtcVh5EvpSXVtu7FlUohhwqLx0fhX0o6kIx+PlXZxAK/2QRDsDShWC0crGKhIg51+T6bCHwToRJmIjFs4f0IIBpUkLFn2xIDSrhEIOIQD6yEg0pU2XZi/KitUOtXwnAF4MwQUhs0lbqLuTCPEAWO4K+haSoHnoqCLEbCERATgRPLhiQU0OBwibMBGrrB6vUYbPM5FmWG4gaClQr63HmGLBqe+Qs2B9DEnz5th+Bm0LWm9paaAb8DwAA//+/q9DV4L+stgAAAABJRU5ErkJggg==" 37 | }, 38 | "execution_count": 3, 39 | "metadata": { 40 | "image/png": { 41 | "height": 28, 42 | "width": 28 43 | } 44 | }, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "set.Images[1]" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "func MNISTSetToDataframe(st *mnist.Set, maxExamples int) dataframe.DataFrame {\n", 59 | " length := maxExamples\n", 60 | " if length > len(st.Images) {\n", 61 | " length = len(st.Images)\n", 62 | " }\n", 63 | " s := make([]string, length, length)\n", 64 | " l := make([]int, length, length)\n", 65 | " for i := 0; i < length; i++ {\n", 66 | " s[i] = string(st.Images[i])\n", 67 | " l[i] = int(st.Labels[i])\n", 68 | " }\n", 69 | " var df dataframe.DataFrame\n", 70 | " images := series.Strings(s)\n", 71 | " images.Name = \"Image\"\n", 72 | " labels := series.Ints(l)\n", 73 | " labels.Name = \"Label\"\n", 74 | " df = dataframe.New(images, labels)\n", 75 | " return df\n", 76 | "}" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 5, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "df := MNISTSetToDataframe(set, 1000)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 6, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "categories := []string{\"tshirt\", \"trouser\", \"pullover\", \"dress\", \"coat\", \"sandal\", \"shirt\", \"shoe\", \"bag\", \"boot\"}" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 7, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "func NormalizeBytes(bs []byte) []int {\n", 104 | " ret := make([]int, len(bs), len(bs))\n", 105 | " for i := range bs {\n", 106 | " ret[i] = int(bs[i])\n", 107 | " }\n", 108 | " return ret\n", 109 | "}\n", 110 | "func ImageSeriesToInts(df dataframe.DataFrame, col string) [][]int {\n", 111 | " s := df.Col(col)\n", 112 | " ret := make([][]int, s.Len(), s.Len())\n", 113 | " for i := 0; i < s.Len(); i++ {\n", 114 | " b := []byte(s.Elem(i).String())\n", 115 | " ret[i] = NormalizeBytes(b)\n", 116 | " }\n", 117 | " return ret\n", 118 | "}" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 8, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "testImages := ImageSeriesToInts(df, \"Image\")" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "## Invoke the model using JSON-RPC" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 33, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "import (\n", 144 | "\t\"fmt\"\n", 145 | "\t\"log\"\n", 146 | "\t\"math/rand\"\n", 147 | "\t\"net/http\"\n", 148 | "\t\"os\"\n", 149 | "\t\"os/exec\"\n", 150 | "\t\"strconv\"\n", 151 | "\t\"time\"\n", 152 | " \"io/ioutil\"\n", 153 | " \"encoding/json\"\n", 154 | " \"bytes\"\n", 155 | ")" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 25, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "c, err := jsonrpc.Dial(\"tcp\", \"localhost:8001\")\n", 165 | "p := model{Client: client}\n", 166 | "var req PredictRequest = PredictRequest{\n", 167 | " Image: testImages[16],\n", 168 | "}\n", 169 | "\n", 170 | "var reply interface{}\n", 171 | "err := c.Call(\"predict\", req, &reply)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 57, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "// Predict returns whether the ith image represents trousers or not based on the logistic regression model\n", 181 | "func Predict(i int) (bool, error){\n", 182 | " b, err := json.Marshal(testImages[i])\n", 183 | " if err != nil {\n", 184 | " return false, err\n", 185 | " }\n", 186 | " r := bytes.NewReader(b)\n", 187 | " resp, err := http.Post(\"http://127.0.0.1:8001\", \"application/json\", r)\n", 188 | " if err != nil {\n", 189 | " return false, err\n", 190 | " }\n", 191 | " body, err := ioutil.ReadAll(resp.Body)\n", 192 | " if err != nil {\n", 193 | " return false, err\n", 194 | " }\n", 195 | " resp.Body.Close()\n", 196 | " var resp struct {\n", 197 | " IsTrousers bool `json:\"is_trousers\"`\n", 198 | " }\n", 199 | " err := json.Unmarshal(body, &resp)\n", 200 | " return resp.IsTrousers, err \n", 201 | "}\n" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 60, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "true " 213 | ] 214 | }, 215 | "execution_count": 60, 216 | "metadata": {}, 217 | "output_type": "execute_result" 218 | } 219 | ], 220 | "source": [ 221 | "// Expected: true \n", 222 | "Predict(16)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 61, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "data": { 232 | "text/plain": [ 233 | "false " 234 | ] 235 | }, 236 | "execution_count": 61, 237 | "metadata": {}, 238 | "output_type": "execute_result" 239 | } 240 | ], 241 | "source": [ 242 | "// Expected false \n", 243 | "Predict(0)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [] 252 | } 253 | ], 254 | "metadata": { 255 | "kernelspec": { 256 | "display_name": "Go", 257 | "language": "go", 258 | "name": "gophernotes" 259 | }, 260 | "language_info": { 261 | "codemirror_mode": "", 262 | "file_extension": ".go", 263 | "mimetype": "", 264 | "name": "go", 265 | "nbconvert_exporter": "", 266 | "pygments_lexer": "", 267 | "version": "go1.11.4" 268 | } 269 | }, 270 | "nbformat": 4, 271 | "nbformat_minor": 2 272 | } 273 | -------------------------------------------------------------------------------- /Chapter05/Using os exec to Run a Python Model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import (\n", 10 | " \"fmt\"\n", 11 | " mnist \"github.com/petar/GoMNIST\"\n", 12 | " \"github.com/kniren/gota/dataframe\"\n", 13 | " \"github.com/kniren/gota/series\"\n", 14 | " \"image\"\n", 15 | " \"bytes\"\n", 16 | " \"math\"\n", 17 | ")" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 4, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "set, err := mnist.ReadSet(\"../datasets/mnist/images.gz\", \"../datasets/mnist/labels.gz\")" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 5, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAACAklEQVR4nFyQPWgUQRzF3/8/s+ttTu4uJhKSFBH8gIBRYgKSQoRUNnaChXZWFloFrCwFO2uxUSs77bSwlCApYiEYTbAx4VJ4wZhcvN2dj7/M3l3uY4op5sd7895TCIfCNfsmUafHlh/Pr6fonYDmn25932mIiPzY8Lsv53oaVF5f4mbLeF098gKUkvjT3WP4cWbPa0tgw+DwKpM3NgAd2MJMQ6tkaoSNdhTZwx0Ld2+lo1x52PDKPa/vTO2yiU9eedDQFX0GhQluWSUl+8IsvL+aTIwfPHO69G/qQsf28rY6gQo+HM2uvL2p1xds2flfS5sFnPttWSV7uJhNPiFDS6hPe59ee1XAR0nTJaldHDsVTZg0rt0ebVUpXmzbrk6cq5S33GfvndLk+HCzrLj+rttz9Pz969vV/SiMSZxWv94pnotA+LOWLUtcVh5EvpSXVtu7FlUohhwqLx0fhX0o6kIx+PlXZxAK/2QRDsDShWC0crGKhIg51+T6bCHwToRJmIjFs4f0IIBpUkLFn2xIDSrhEIOIQD6yEg0pU2XZi/KitUOtXwnAF4MwQUhs0lbqLuTCPEAWO4K+haSoHnoqCLEbCERATgRPLhiQU0OBwibMBGrrB6vUYbPM5FmWG4gaClQr63HmGLBqe+Qs2B9DEnz5th+Bm0LWm9paaAb8DwAA//+/q9DV4L+stgAAAABJRU5ErkJggg==" 37 | }, 38 | "execution_count": 5, 39 | "metadata": { 40 | "image/png": { 41 | "height": 28, 42 | "width": 28 43 | } 44 | }, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "set.Images[1]" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 6, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "func MNISTSetToDataframe(st *mnist.Set, maxExamples int) dataframe.DataFrame {\n", 59 | " length := maxExamples\n", 60 | " if length > len(st.Images) {\n", 61 | " length = len(st.Images)\n", 62 | " }\n", 63 | " s := make([]string, length, length)\n", 64 | " l := make([]int, length, length)\n", 65 | " for i := 0; i < length; i++ {\n", 66 | " s[i] = string(st.Images[i])\n", 67 | " l[i] = int(st.Labels[i])\n", 68 | " }\n", 69 | " var df dataframe.DataFrame\n", 70 | " images := series.Strings(s)\n", 71 | " images.Name = \"Image\"\n", 72 | " labels := series.Ints(l)\n", 73 | " labels.Name = \"Label\"\n", 74 | " df = dataframe.New(images, labels)\n", 75 | " return df\n", 76 | "}" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 7, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "df := MNISTSetToDataframe(set, 1000)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 8, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "categories := []string{\"tshirt\", \"trouser\", \"pullover\", \"dress\", \"coat\", \"sandal\", \"shirt\", \"shoe\", \"bag\", \"boot\"}" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 13, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "func NormalizeBytes(bs []byte) []int {\n", 104 | " ret := make([]int, len(bs), len(bs))\n", 105 | " for i := range bs {\n", 106 | " ret[i] = int(bs[i])\n", 107 | " }\n", 108 | " return ret\n", 109 | "}\n", 110 | "func ImageSeriesToInts(df dataframe.DataFrame, col string) [][]int {\n", 111 | " s := df.Col(col)\n", 112 | " ret := make([][]int, s.Len(), s.Len())\n", 113 | " for i := 0; i < s.Len(); i++ {\n", 114 | " b := []byte(s.Elem(i).String())\n", 115 | " ret[i] = NormalizeBytes(b)\n", 116 | " }\n", 117 | " return ret\n", 118 | "}" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 14, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "testImages := ImageSeriesToInts(df, \"Image\")" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "## Invoke the model using os/exec" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 19, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "import (\n", 144 | " \"encoding/json\"\n", 145 | " \"os/exec\"\n", 146 | " \"io/ioutil\"\n", 147 | ")" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 22, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "func InvokeAndWait(args ...string) ([]byte, error) {\n", 157 | "\tvar (\n", 158 | "\t\toutput []byte\n", 159 | "\t\terrOutput []byte\n", 160 | "\t\terr error\n", 161 | "\t)\n", 162 | "\tcmd := exec.Command(\"python3\", args...)\n", 163 | "\tstdout, err := cmd.StdoutPipe()\n", 164 | "\tif err != nil {\n", 165 | "\t\treturn nil, err\n", 166 | "\t}\n", 167 | "\tstderr, err := cmd.StderrPipe()\n", 168 | "\tif err := cmd.Start(); err != nil {\n", 169 | "\t\treturn nil, err\n", 170 | "\t}\n", 171 | "\n", 172 | "\tif output, err = ioutil.ReadAll(stdout); err != nil {\n", 173 | "\t\treturn nil, err\n", 174 | "\t}\n", 175 | "\n", 176 | "\tif errOutput, err = ioutil.ReadAll(stderr); err != nil || len(errOutput) > 0 {\n", 177 | "\t\treturn nil, fmt.Errorf(\"Error running model: %s\", string(errOutput))\n", 178 | "\t}\n", 179 | "\n", 180 | "\treturn output, nil\n", 181 | "}" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 60, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "// IsImageTrousers invokes the Python model to predict if image at given index is, in fact, of trousers\n", 191 | "func IsImageTrousers(i int) (bool, error){\n", 192 | " b, err := json.Marshal(testImages[i])\n", 193 | " if err != nil {\n", 194 | " panic(err)\n", 195 | " }\n", 196 | " b, err = InvokeAndWait(\"model.py\", \"predict\", string(b))\n", 197 | " if err != nil {\n", 198 | " return false, err\n", 199 | " } else {\n", 200 | " var ret struct {\n", 201 | " IsTrousers bool `json:\"is_trousers\"`\n", 202 | " }\n", 203 | " err := json.Unmarshal(b, &ret)\n", 204 | " if err != nil {\n", 205 | " return false, err\n", 206 | " }\n", 207 | " return ret.IsTrousers, nil\n", 208 | " }\n", 209 | "}\n" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 76, 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "data": { 219 | "text/plain": [ 220 | "true " 221 | ] 222 | }, 223 | "execution_count": 76, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": [ 229 | "// Prediction\n", 230 | "IsImageTrousers(16)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 75, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "name": "stdout", 240 | "output_type": "stream", 241 | "text": [ 242 | "// warning: expression returns 2 values, using only the first one: [int error]\n" 243 | ] 244 | }, 245 | { 246 | "data": { 247 | "text/plain": [ 248 | "true" 249 | ] 250 | }, 251 | "execution_count": 75, 252 | "metadata": {}, 253 | "output_type": "execute_result" 254 | } 255 | ], 256 | "source": [ 257 | "// Ground truth\n", 258 | "df.Col(\"Label\").Elem(16).Int()==1" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 81, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "data": { 268 | "text/plain": [ 269 | "false " 270 | ] 271 | }, 272 | "execution_count": 81, 273 | "metadata": {}, 274 | "output_type": "execute_result" 275 | } 276 | ], 277 | "source": [ 278 | "// Prediction\n", 279 | "IsImageTrousers(0)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 86, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "name": "stdout", 289 | "output_type": "stream", 290 | "text": [ 291 | "// warning: expression returns 2 values, using only the first one: [int error]\n" 292 | ] 293 | }, 294 | { 295 | "data": { 296 | "text/plain": [ 297 | "false" 298 | ] 299 | }, 300 | "execution_count": 86, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [ 306 | "// Ground truth\n", 307 | "df.Col(\"Label\").Elem(0).Int()==1" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [] 316 | } 317 | ], 318 | "metadata": { 319 | "kernelspec": { 320 | "display_name": "Go", 321 | "language": "go", 322 | "name": "gophernotes" 323 | }, 324 | "language_info": { 325 | "codemirror_mode": "", 326 | "file_extension": ".go", 327 | "mimetype": "", 328 | "name": "go", 329 | "nbconvert_exporter": "", 330 | "pygments_lexer": "", 331 | "version": "go1.11.4" 332 | } 333 | }, 334 | "nbformat": 4, 335 | "nbformat_minor": 2 336 | } 337 | -------------------------------------------------------------------------------- /Chapter05/install-python-dependencies.sh: -------------------------------------------------------------------------------- 1 | apk --no-cache \ 2 | --repository http://dl-4.alpinelinux.org/alpine/v3.7/community \ 3 | --repository http://dl-4.alpinelinux.org/alpine/v3.7/main \ 4 | --arch=x86_64 add python3-dev && \ 5 | apk add --no-cache \ 6 | --allow-untrusted \ 7 | --repository \ 8 | http://dl-4.alpinelinux.org/alpine/edge/testing \ 9 | hdf5 \ 10 | hdf5-dev && \ 11 | apk --no-cache \ 12 | --repository http://dl-4.alpinelinux.org/alpine/v3.7/community \ 13 | --repository http://dl-4.alpinelinux.org/alpine/v3.7/main \ 14 | --arch=x86_64 add gcc gfortran build-base wget freetype-dev libpng-dev openblas-dev && \ 15 | pip3 install -r requirements.txt && \ 16 | cat << EOT > $HOME/.keras/keras.json 17 | { 18 | "epsilon": 1e-07, 19 | "backend": "theano", 20 | "floatx": "float32", 21 | "image_dim_ordering": "th" 22 | } 23 | EOT -------------------------------------------------------------------------------- /Chapter05/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | tf "github.com/tensorflow/tensorflow/tensorflow/go" 5 | "fmt" 6 | "log" 7 | ) 8 | 9 | func makeTensorFromImage(img string) (*tf.Tensor, error) { 10 | t := make([][]float32, 1) 11 | t[0] = make([]float32, 784) 12 | tensor, err := tf.NewTensor(t) 13 | return tensor, err 14 | } 15 | 16 | func main() { 17 | savedModel, err := tf.LoadSavedModel("./saved_model", []string{"serve"}, nil) 18 | if err != nil { 19 | log.Fatalf("failed to load model: %v", err) 20 | } 21 | input := savedModel.Graph.Operation("input_input_1") 22 | output := savedModel.Graph.Operation("output_1/BiasAdd") 23 | 24 | session := savedModel.Session 25 | graph := savedModel.Graph 26 | if err != nil { 27 | log.Fatal(err) 28 | } 29 | defer session.Close() 30 | fmt.Println("Successfully imported model!") 31 | tensor, err := makeTensorFromImage("") 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | prediction, err := session.Run( 36 | map[tf.Output]*tf.Tensor{ 37 | graph.Operation(input.Name()).Output(0): tensor, 38 | }, 39 | []tf.Output{ 40 | graph.Operation(output.Name()).Output(0), 41 | }, 42 | nil) 43 | if err != nil { 44 | log.Fatal(err) 45 | } 46 | 47 | probability := prediction[0].Value().([][]float32)[0][0] 48 | if probability > 0.5 { 49 | fmt.Printf("It's a pair of trousers! Probability: %v\n", probability) 50 | } else { 51 | fmt.Printf("It's NOT a pair of trousers! Probability: %v\n", probability) 52 | } 53 | 54 | } -------------------------------------------------------------------------------- /Chapter05/model.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-with-Go-Quick-Start-Guide/f42e51ab1141e7df39cd01a8a4ee81728156b9c7/Chapter05/model.pickle -------------------------------------------------------------------------------- /Chapter05/model.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import pandas as pd 4 | import numpy as np 5 | import pickle 6 | 7 | MODEL = None 8 | MODEL_FILENAME = "model.pickle" 9 | 10 | def create_and_persist_model(): 11 | global MODEL 12 | from keras.datasets import fashion_mnist 13 | (x, y), _ = fashion_mnist.load_data() 14 | classes = ['tshirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'shoe', 'bag', 'boot'] 15 | from sklearn.model_selection import train_test_split 16 | x_train, x_test, y_train, y_test = train_test_split(x[:1000].reshape(-1, 28 * 28), y[:1000].flatten(), test_size=0.25, random_state=0) 17 | # Prepare examples 18 | y_train_1 = y_train == 1 19 | y_test_1 = y_test == 1 20 | from sklearn.linear_model import LogisticRegression 21 | MODEL = LogisticRegression(solver='liblinear').fit(x_train, y_train_1) 22 | pickle.dump(MODEL, open(MODEL_FILENAME, "wb" )) 23 | 24 | def restore_model(): 25 | global MODEL 26 | MODEL = pickle.load( open( "model.pickle", "rb" ) ) 27 | 28 | def predict(data): 29 | """Return a prediction given input""" 30 | #print(np.asarray([data]).astype('u8')) 31 | #return 32 | prediction = MODEL.predict(np.asarray([data]).astype('u8')) 33 | print('{"error": null, "is_trousers": ' + str(prediction[0]).lower() + '}') 34 | 35 | try: 36 | restore_model() # The first time that this script is run, the model will be created and cached. Thereafter it is reused. 37 | except: 38 | create_and_persist_model() 39 | 40 | if len(sys.argv) == 1: 41 | # Fit model only 42 | print('{"error": null}') 43 | sys.exit(0) 44 | 45 | if sys.argv[1] == 'predict': 46 | # Predict 47 | if len(sys.argv) != 3: 48 | print('{"error": "incorrect number of arguments"}') 49 | sys.exit(1) 50 | d = json.loads(sys.argv[2]) 51 | predict(d) 52 | sys.exit(0) -------------------------------------------------------------------------------- /Chapter05/model_http.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import pandas as pd 4 | import numpy as np 5 | import pickle 6 | from http.server import BaseHTTPRequestHandler, HTTPServer 7 | 8 | MODEL = None 9 | MODEL_FILENAME = "model.pickle" 10 | 11 | def create_and_persist_model(): 12 | global MODEL 13 | from keras.datasets import fashion_mnist 14 | (x, y), _ = fashion_mnist.load_data() 15 | classes = ['tshirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'shoe', 'bag', 'boot'] 16 | from sklearn.model_selection import train_test_split 17 | x_train, x_test, y_train, y_test = train_test_split(x[:1000].reshape(-1, 28 * 28), y[:1000].flatten(), test_size=0.25, random_state=0) 18 | # Prepare examples 19 | y_train_1 = y_train == 1 20 | y_test_1 = y_test == 1 21 | from sklearn.linear_model import LogisticRegression 22 | MODEL = LogisticRegression(solver='liblinear').fit(x_train, y_train_1) 23 | pickle.dump(MODEL, open(MODEL_FILENAME, "wb" )) 24 | 25 | def restore_model(): 26 | global MODEL 27 | MODEL = pickle.load( open( "model.pickle", "rb" ) ) 28 | 29 | class ML_RequestHandler(BaseHTTPRequestHandler): 30 | def predict(self, image): 31 | prediction = MODEL.predict(np.asarray([image]).astype('u8')) 32 | return bool(prediction[0]) 33 | 34 | def do_POST(self): 35 | content_len = int(self.headers.get('Content-Length')) 36 | post_body = self.rfile.read(content_len) 37 | image = json.loads(post_body) 38 | prediction = json.dumps({"is_trousers": self.predict(image)}) 39 | print(prediction) 40 | 41 | # Send response status code 42 | self.send_response(200) 43 | 44 | # Send headers 45 | self.send_header('Content-type','application/json') 46 | self.end_headers() 47 | 48 | # Send message back to client 49 | # Write content as utf-8 data 50 | self.wfile.write(bytes(prediction, "utf8")) 51 | return 52 | 53 | def run_server(): 54 | print('starting server...') 55 | 56 | server_address = ('127.0.0.1', 8001) 57 | httpd = HTTPServer(server_address, ML_RequestHandler) 58 | print('running server...') 59 | httpd.serve_forever() 60 | 61 | 62 | try: 63 | restore_model() # The first time that this script is run, the model will be created and cached. Thereafter it is reused. 64 | except: 65 | create_and_persist_model() 66 | 67 | 68 | 69 | if __name__ == '__main__': 70 | run_server() -------------------------------------------------------------------------------- /Chapter05/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | keras 4 | sklearn 5 | theano 6 | werkzeug 7 | json-rpc -------------------------------------------------------------------------------- /Chapter05/run.sh: -------------------------------------------------------------------------------- 1 | sudo docker build -t tfgo . && \ 2 | sudo docker run -it tfgo -------------------------------------------------------------------------------- /Chapter05/saved_model/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-with-Go-Quick-Start-Guide/f42e51ab1141e7df39cd01a8a4ee81728156b9c7/Chapter05/saved_model/saved_model.pb -------------------------------------------------------------------------------- /Chapter05/saved_model/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-with-Go-Quick-Start-Guide/f42e51ab1141e7df39cd01a8a4ee81728156b9c7/Chapter05/saved_model/variables/variables.data-00000-of-00001 -------------------------------------------------------------------------------- /Chapter05/saved_model/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-with-Go-Quick-Start-Guide/f42e51ab1141e7df39cd01a8a4ee81728156b9c7/Chapter05/saved_model/variables/variables.index -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Machine Learning with Go Quick Start Guide 5 | 6 | 7 | 8 | This is the code repository for [Machine Learning with Go Quick Start Guide](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-go-quick-start-guide?utm_source=github&utm_medium=repository&utm_campaign=9781838550356), published by Packt. 9 | 10 | **Hands-on techniques for building supervised and unsupervised machine learning workflows** 11 | 12 | ## What is this book about? 13 | Machine learning is an essential part of today's data-driven world and is extensively used across industries, including financial forecasting, robotics, and web technology. This book will teach you how to efficiently develop machine learning applications in Go. 14 | 15 | This book covers the following exciting features: 16 | * Manipulate string values and escape special characters 17 | * Work with dates, times, maps, and arrays 18 | * Handle errors and perform logging 19 | * Explore files and directories 20 | * Handle HTTP requests and responses 21 | 22 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1838550356) today! 23 | 24 | https://www.packtpub.com/ 25 | 26 | ## Instructions and Navigations 27 | All of the code is organized into folders. For example, Chapter02. 28 | 29 | The code will look like the following: 30 | ``` 31 | categories := []string{"tshirt", "trouser", "pullover", "dress", "coat", 32 | "sandal", "shirt", "shoe", "bag", "boot"} 33 | 34 | ``` 35 | 36 | **Following is what you need for this book:** 37 | This book is for developers and data scientists with at least beginner-level knowledge of Go, and a vague idea of what types of problem Machine Learning aims to tackle. No advanced knowledge of Go (and no theoretical understanding of the math that underpins Machine Learning) is required. 38 | 39 | With the following software and hardware list you can run all code files present in the book (Chapter 1-7). 40 | 41 | ### Software and Hardware List 42 | 43 | | Chapter | Software required | OS required | 44 | | -------- | ------------------------------------| -----------------------------------| 45 | |2-6 |Go |Ubuntu 16.04 server | 46 | 47 | 48 | 49 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](https://static.packt-cdn.com/downloads/9781838550356_ColorImages.pdf). 50 | 51 | 52 | ### Related products 53 | * Go Machine Learning Projects [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/go-machine-learning-projects?utm_source=github&utm_medium=repository&utm_campaign=9781788993401) [[Amazon]](https://www.amazon.com/dp/1788993403) 54 | 55 | * Hands-On Go Programming [[Packt]](https://www.packtpub.com/in/application-development/hands-go-programming?utm_source=github&utm_medium=repository&utm_campaign=9781789531756) [[Amazon]](https://www.amazon.com/dp/1789531756) 56 | 57 | ## Get to Know the Authors 58 | 59 | **Michael Bironneau** is an award-winning mathematician and experienced software engineer. He holds a PhD in mathematics from Loughborough University and has worked in several data science and software development roles. He is currently technical director of 60 | the energy AI technology company, Open Energi. 61 | 62 | **Toby Coleman** is an experienced data science and machine learning practitioner. Following degrees from Cambridge University and Imperial College London, he has worked on the application of data science techniques in the banking and energy sectors. Recently, he held the position of innovation director at cleantech SME Open Energi, and currently provides machine learning consultancy to start-up businesses. 63 | 64 | 65 | ### Suggestions and Feedback 66 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSdy7dATC6QmEL81FIUuymZ0Wy9vH1jHkvpY57OiMeKGqib_Ow/viewform) if you have any feedback or suggestions. 67 | ### Download a free PDF 68 | 69 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
70 |

https://packt.link/free-ebook/9781838550356

-------------------------------------------------------------------------------- /datasets/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 roshank10 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /datasets/README.md: -------------------------------------------------------------------------------- 1 | # Machine-Learning-with-Go-Quick-Start-Guide 2 | Machine Learning with Go Quick Start Guide published by Packt 3 | -------------------------------------------------------------------------------- /datasets/bmi/500_Person_Gender_Height_Weight_Index.csv: -------------------------------------------------------------------------------- 1 | Gender,Height,Weight,Index 2 | Male,174,96,4 3 | Male,189,87,2 4 | Female,185,110,4 5 | Female,195,104,3 6 | Male,149,61,3 7 | Male,189,104,3 8 | Male,147,92,5 9 | Male,154,111,5 10 | Male,174,90,3 11 | Female,169,103,4 12 | Male,195,81,2 13 | Female,159,80,4 14 | Female,192,101,3 15 | Male,155,51,2 16 | Male,191,79,2 17 | Female,153,107,5 18 | Female,157,110,5 19 | Male,140,129,5 20 | Male,144,145,5 21 | Male,172,139,5 22 | Male,157,110,5 23 | Female,153,149,5 24 | Female,169,97,4 25 | Male,185,139,5 26 | Female,172,67,2 27 | Female,151,64,3 28 | Male,190,95,3 29 | Male,187,62,1 30 | Female,163,159,5 31 | Male,179,152,5 32 | Male,153,121,5 33 | Male,178,52,1 34 | Female,195,65,1 35 | Female,160,131,5 36 | Female,157,153,5 37 | Female,189,132,4 38 | Female,197,114,3 39 | Male,144,80,4 40 | Female,171,152,5 41 | Female,185,81,2 42 | Female,175,120,4 43 | Female,149,108,5 44 | Male,157,56,2 45 | Male,161,118,5 46 | Female,182,126,4 47 | Male,185,76,2 48 | Female,188,122,4 49 | Male,181,111,4 50 | Male,161,72,3 51 | Male,140,152,5 52 | Female,168,135,5 53 | Female,176,54,1 54 | Male,163,110,5 55 | Male,172,105,4 56 | Male,196,116,4 57 | Female,187,89,3 58 | Male,172,92,4 59 | Male,178,127,5 60 | Female,164,70,3 61 | Male,143,88,5 62 | Female,191,54,0 63 | Female,141,143,5 64 | Male,193,54,0 65 | Male,190,83,2 66 | Male,175,135,5 67 | Female,179,158,5 68 | Female,172,96,4 69 | Female,168,59,2 70 | Female,164,82,4 71 | Female,194,136,4 72 | Female,153,51,2 73 | Male,178,117,4 74 | Male,141,80,5 75 | Male,180,75,2 76 | Female,185,100,3 77 | Female,197,154,4 78 | Male,165,104,4 79 | Female,168,90,4 80 | Female,176,122,4 81 | Male,181,51,0 82 | Male,164,75,3 83 | Female,166,140,5 84 | Female,190,105,3 85 | Male,186,118,4 86 | Male,168,123,5 87 | Male,198,50,0 88 | Female,175,141,5 89 | Male,145,117,5 90 | Female,159,104,5 91 | Female,185,140,5 92 | Female,178,154,5 93 | Female,183,96,3 94 | Female,194,111,3 95 | Male,177,61,2 96 | Male,197,119,4 97 | Female,170,156,5 98 | Male,142,69,4 99 | Male,160,139,5 100 | Male,195,69,1 101 | Female,190,50,0 102 | Male,199,156,4 103 | Male,154,105,5 104 | Male,161,155,5 105 | Female,198,145,4 106 | Female,192,140,4 107 | Male,195,126,4 108 | Male,166,160,5 109 | Male,159,154,5 110 | Female,181,106,4 111 | Male,149,66,3 112 | Female,150,70,4 113 | Female,146,157,5 114 | Male,190,135,4 115 | Female,192,90,2 116 | Female,177,96,4 117 | Male,148,60,3 118 | Female,165,57,2 119 | Female,146,104,5 120 | Male,144,108,5 121 | Female,176,156,5 122 | Female,168,87,4 123 | Male,187,122,4 124 | Male,187,138,4 125 | Female,184,160,5 126 | Female,158,149,5 127 | Male,158,96,4 128 | Male,194,115,4 129 | Female,145,79,4 130 | Male,182,151,5 131 | Male,154,54,2 132 | Female,168,139,5 133 | Female,187,70,2 134 | Female,158,153,5 135 | Female,167,110,4 136 | Female,171,155,5 137 | Female,183,150,5 138 | Female,190,156,5 139 | Male,194,108,3 140 | Male,171,147,5 141 | Male,159,124,5 142 | Female,169,54,2 143 | Female,167,85,4 144 | Male,180,149,5 145 | Male,163,123,5 146 | Male,140,79,5 147 | Male,197,125,4 148 | Male,194,106,3 149 | Female,140,146,5 150 | Male,195,98,3 151 | Female,168,115,3 152 | Female,196,50,0 153 | Male,140,52,3 154 | Female,150,60,3 155 | Female,168,140,5 156 | Female,155,111,5 157 | Female,179,103,4 158 | Female,182,84,3 159 | Male,168,160,5 160 | Female,187,102,3 161 | Male,181,105,4 162 | Male,199,99,2 163 | Female,184,76,2 164 | Male,192,101,3 165 | Female,182,143,5 166 | Female,172,111,4 167 | Male,181,78,2 168 | Male,176,109,4 169 | Female,156,106,5 170 | Female,151,67,3 171 | Female,188,80,2 172 | Male,187,136,4 173 | Male,174,138,5 174 | Male,167,151,5 175 | Female,196,131,4 176 | Male,197,149,4 177 | Female,185,119,4 178 | Female,170,102,4 179 | Female,181,94,3 180 | Female,166,126,5 181 | Male,188,100,3 182 | Female,162,74,3 183 | Male,177,117,4 184 | Male,162,97,4 185 | Male,180,73,2 186 | Female,192,108,3 187 | Male,165,80,3 188 | Female,167,135,5 189 | Female,182,84,3 190 | Female,161,134,5 191 | Male,158,95,4 192 | Male,141,85,5 193 | Male,154,100,5 194 | Male,165,105,4 195 | Female,142,137,5 196 | Male,141,94,5 197 | Male,145,108,5 198 | Male,157,74,4 199 | Female,177,117,4 200 | Female,166,144,5 201 | Male,193,151,5 202 | Male,184,57,1 203 | Male,179,93,3 204 | Female,156,89,4 205 | Male,182,104,4 206 | Male,145,160,5 207 | Female,150,87,4 208 | Male,145,99,5 209 | Female,196,122,4 210 | Male,191,96,3 211 | Female,148,67,4 212 | Female,150,84,4 213 | Male,148,155,5 214 | Female,153,146,5 215 | Female,196,159,5 216 | Female,185,52,0 217 | Female,171,131,5 218 | Female,143,118,5 219 | Female,142,86,5 220 | Female,141,126,5 221 | Male,159,109,5 222 | Female,173,82,2 223 | Male,183,138,5 224 | Female,152,90,4 225 | Male,178,140,5 226 | Male,188,54,0 227 | Female,155,144,5 228 | Male,166,70,3 229 | Male,188,123,4 230 | Female,171,120,5 231 | Male,179,130,5 232 | Female,186,137,4 233 | Female,153,78,2 234 | Female,184,86,3 235 | Female,177,81,3 236 | Male,145,78,4 237 | Male,170,81,3 238 | Male,181,141,5 239 | Male,165,155,5 240 | Female,174,65,2 241 | Female,146,110,5 242 | Male,178,85,3 243 | Male,166,61,2 244 | Male,191,62,1 245 | Female,177,155,5 246 | Female,183,50,0 247 | Male,151,114,5 248 | Male,182,98,3 249 | Female,142,159,5 250 | Female,188,90,3 251 | Male,161,89,4 252 | Male,153,70,3 253 | Male,140,143,5 254 | Male,169,141,5 255 | Female,162,159,5 256 | Male,183,147,5 257 | Female,162,58,2 258 | Female,172,109,4 259 | Female,150,119,5 260 | Female,169,145,5 261 | Female,184,132,4 262 | Male,159,104,5 263 | Male,163,131,5 264 | Male,156,137,5 265 | Female,157,52,2 266 | Male,147,84,4 267 | Male,141,86,5 268 | Male,173,139,5 269 | Male,154,145,5 270 | Male,168,148,5 271 | Male,168,50,1 272 | Male,145,130,5 273 | Male,152,103,5 274 | Female,187,121,4 275 | Female,163,57,0 276 | Male,178,83,3 277 | Female,187,94,3 278 | Female,179,114,4 279 | Male,190,80,2 280 | Male,172,75,3 281 | Male,188,57,1 282 | Male,193,65,1 283 | Female,147,126,5 284 | Female,147,94,5 285 | Male,166,107,4 286 | Female,192,139,4 287 | Male,181,139,4 288 | Male,150,74,4 289 | Male,178,160,5 290 | Female,156,52,2 291 | Male,149,100,5 292 | Male,156,74,4 293 | Male,183,105,3 294 | Female,162,68,3 295 | Female,165,83,4 296 | Female,168,143,5 297 | Male,160,156,5 298 | Female,169,88,2 299 | Female,140,76,4 300 | Female,187,92,3 301 | Male,151,82,4 302 | Female,186,140,5 303 | Male,182,108,4 304 | Male,188,81,2 305 | Male,179,110,4 306 | Female,156,126,5 307 | Male,188,114,4 308 | Male,183,153,5 309 | Male,144,88,5 310 | Male,196,69,1 311 | Male,171,141,5 312 | Male,171,147,5 313 | Female,180,156,5 314 | Male,191,146,5 315 | Female,179,67,2 316 | Female,180,60,2 317 | Female,154,132,5 318 | Male,188,99,3 319 | Male,142,135,5 320 | Male,170,95,4 321 | Male,152,141,5 322 | Female,190,118,4 323 | Female,181,111,4 324 | Male,153,104,5 325 | Male,187,140,5 326 | Female,144,66,4 327 | Female,148,54,2 328 | Female,199,92,2 329 | Female,167,85,4 330 | Female,164,71,3 331 | Female,185,102,3 332 | Female,164,160,5 333 | Male,142,71,4 334 | Male,165,68,2 335 | Female,172,62,2 336 | Female,157,56,2 337 | Male,155,57,2 338 | Female,167,153,5 339 | Female,164,126,5 340 | Female,189,125,4 341 | Female,161,145,5 342 | Female,155,71,3 343 | Female,171,118,4 344 | Female,154,92,4 345 | Male,179,83,3 346 | Male,170,115,4 347 | Female,184,106,4 348 | Female,191,68,2 349 | Male,162,58,2 350 | Male,178,138,5 351 | Female,157,60,2 352 | Male,184,83,2 353 | Male,197,88,2 354 | Female,160,51,2 355 | Male,184,153,5 356 | Male,190,50,0 357 | Male,174,90,3 358 | Female,189,124,4 359 | Female,186,143,5 360 | Female,180,58,1 361 | Female,186,148,4 362 | Female,193,61,1 363 | Male,161,103,4 364 | Female,151,158,5 365 | Female,195,147,4 366 | Female,184,152,5 367 | Male,141,80,5 368 | Female,185,94,3 369 | Female,186,127,4 370 | Male,142,131,5 371 | Female,147,67,4 372 | Male,151,62,3 373 | Female,160,124,5 374 | Male,185,60,1 375 | Female,163,63,2 376 | Male,174,95,4 377 | Female,150,144,5 378 | Male,142,91,5 379 | Male,178,142,5 380 | Female,154,96,5 381 | Male,176,87,3 382 | Male,159,120,5 383 | Male,191,62,1 384 | Male,177,117,4 385 | Male,151,154,5 386 | Female,182,149,5 387 | Female,197,72,2 388 | Male,146,138,5 389 | Female,160,83,4 390 | Female,157,66,3 391 | Female,150,50,2 392 | Female,167,58,2 393 | Female,180,70,2 394 | Female,183,76,2 395 | Female,183,87,3 396 | Female,152,154,5 397 | Female,164,71,3 398 | Male,187,96,3 399 | Male,169,136,5 400 | Female,149,61,3 401 | Male,163,137,5 402 | Female,195,104,3 403 | Male,174,107,4 404 | Male,182,70,2 405 | Male,169,110,4 406 | Male,193,130,4 407 | Male,148,141,5 408 | Male,186,68,2 409 | Male,165,143,5 410 | Female,146,123,5 411 | Female,166,133,5 412 | Male,179,56,1 413 | Female,177,101,4 414 | Male,181,154,5 415 | Female,161,154,5 416 | Female,157,103,5 417 | Female,169,98,4 418 | Female,152,114,5 419 | Female,162,64,2 420 | Male,162,130,5 421 | Female,177,61,2 422 | Female,195,61,1 423 | Male,140,146,5 424 | Female,186,146,5 425 | Female,178,107,4 426 | Male,174,54,1 427 | Female,180,59,1 428 | Male,188,141,4 429 | Female,187,130,4 430 | Female,153,77,4 431 | Female,165,95,4 432 | Female,178,79,2 433 | Female,163,154,5 434 | Female,150,97,5 435 | Male,179,127,4 436 | Male,165,62,2 437 | Male,168,158,5 438 | Female,153,133,5 439 | Male,184,157,5 440 | Male,188,65,1 441 | Female,166,153,5 442 | Female,172,116,4 443 | Male,182,73,2 444 | Male,143,149,5 445 | Male,152,146,5 446 | Female,186,128,4 447 | Male,159,140,5 448 | Male,146,70,4 449 | Female,176,121,4 450 | Female,146,101,5 451 | Male,159,145,5 452 | Male,162,157,5 453 | Female,172,90,4 454 | Female,169,121,5 455 | Male,182,50,0 456 | Female,183,79,2 457 | Male,176,77,2 458 | Female,188,128,4 459 | Female,175,83,2 460 | Male,154,81,4 461 | Female,184,147,5 462 | Male,179,123,4 463 | Male,152,132,5 464 | Male,179,56,1 465 | Female,145,141,5 466 | Female,181,80,2 467 | Male,158,127,5 468 | Female,188,99,3 469 | Male,145,142,5 470 | Male,161,115,5 471 | Male,198,109,3 472 | Male,147,142,5 473 | Male,154,112,5 474 | Female,178,65,2 475 | Male,195,153,5 476 | Female,167,79,3 477 | Male,183,131,4 478 | Female,164,142,5 479 | Male,167,64,2 480 | Female,151,55,2 481 | Female,147,107,5 482 | Female,155,115,5 483 | Female,172,108,4 484 | Female,142,86,5 485 | Male,146,85,4 486 | Female,188,115,4 487 | Male,173,111,4 488 | Female,160,109,5 489 | Male,187,80,2 490 | Male,198,136,4 491 | Female,179,150,5 492 | Female,164,59,2 493 | Female,146,147,5 494 | Female,198,50,0 495 | Female,170,53,1 496 | Male,152,98,5 497 | Female,150,153,5 498 | Female,184,121,4 499 | Female,141,136,5 500 | Male,150,95,5 501 | Male,173,131,5 502 | -------------------------------------------------------------------------------- /datasets/download-datasets.sh: -------------------------------------------------------------------------------- 1 | mkdir -p datasets/words && \ 2 | wget http://www.cs.jhu.edu/~mdredze/datasets/sentiment/processed_acl.tar.gz -O datasets/words-temp.tar.gz && \ 3 | tar xzvf datasets/words-temp.tar.gz -C datasets/words && rm datasets/words-temp.tar.gz 4 | 5 | -------------------------------------------------------------------------------- /datasets/start-gophernotes.sh: -------------------------------------------------------------------------------- 1 | sudo docker run -it -p 8888:8888 -e GODEBUG=cgocheck=0 -v $(pwd):/usr/share/notebooks gopherdata/gophernotes:latest-ds 2 | --------------------------------------------------------------------------------