├── .gitignore ├── Assets └── MLNet_Jupyter.png ├── Dockerfile ├── NuGet.config ├── README.md └── notebooks ├── Binary Classification ├── Binary Classification.ipynb ├── winequality_white_test.csv └── winequality_white_train.csv ├── Box Plot ├── 2017-18_NBA_salary.csv ├── Box Plot.ipynb └── Mall_Customers.csv ├── Clustering ├── Clustering.ipynb └── Mall_Customers.csv ├── Data Correlation Chart ├── Data Correlation Chart.ipynb └── titanic.csv ├── Introduction.ipynb ├── Multiclass Classification ├── Multiclass Classification.ipynb ├── sentences_test.tsv └── sentences_training.tsv ├── Recommendation using FFM ├── LasVegasTripAdvisorReviews.csv └── Recommendation using Field-aware Factorization Machine .ipynb ├── Recommendation ├── LasVegasTripAdvisorReviews.csv └── Recommendation using Matrix Factorization.ipynb └── Regression ├── Regression.ipynb ├── winequality_white_test.csv └── winequality_white_train.csv /.gitignore: -------------------------------------------------------------------------------- 1 | *checkpoint.ipynb -------------------------------------------------------------------------------- /Assets/MLNet_Jupyter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XamlBrewer/ML.NET-Jupyter-Notebooks/d1201de1b9e569d8e108e5134422d0401702df2b/Assets/MLNet_Jupyter.png -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jupyter/scipy-notebook:latest 2 | 3 | # Install .NET CLI dependencies 4 | 5 | ARG NB_USER=jovyan 6 | ARG NB_UID=1000 7 | ENV USER ${NB_USER} 8 | ENV NB_UID ${NB_UID} 9 | ENV HOME /home/${NB_USER} 10 | 11 | WORKDIR ${HOME} 12 | 13 | USER root 14 | RUN apt-get update 15 | RUN apt-get install -y curl 16 | 17 | # Install .NET CLI dependencies 18 | RUN apt-get install -y --no-install-recommends \ 19 | libc6 \ 20 | libgcc1 \ 21 | libgssapi-krb5-2 \ 22 | libicu60 \ 23 | libssl1.1 \ 24 | libstdc++6 \ 25 | zlib1g 26 | 27 | RUN rm -rf /var/lib/apt/lists/* 28 | 29 | # Install .NET Core SDK 30 | ENV DOTNET_SDK_VERSION 3.0.100 31 | 32 | RUN curl -SL --output dotnet.tar.gz https://dotnetcli.blob.core.windows.net/dotnet/Sdk/$DOTNET_SDK_VERSION/dotnet-sdk-$DOTNET_SDK_VERSION-linux-x64.tar.gz \ 33 | && dotnet_sha512='766da31f9a0bcfbf0f12c91ea68354eb509ac2111879d55b656f19299c6ea1c005d31460dac7c2a4ef82b3edfea30232c82ba301fb52c0ff268d3e3a1b73d8f7' \ 34 | && echo "$dotnet_sha512 dotnet.tar.gz" | sha512sum -c - \ 35 | && mkdir -p /usr/share/dotnet \ 36 | && tar -zxf dotnet.tar.gz -C /usr/share/dotnet \ 37 | && rm dotnet.tar.gz \ 38 | && ln -s /usr/share/dotnet/dotnet /usr/bin/dotnet 39 | 40 | # Enable detection of running in a container 41 | ENV DOTNET_RUNNING_IN_CONTAINER=true \ 42 | # Enable correct mode for dotnet watch (only mode supported in a container) 43 | DOTNET_USE_POLLING_FILE_WATCHER=true \ 44 | # Skip extraction of XML docs - generally not useful within an image/container - helps performance 45 | NUGET_XMLDOC_MODE=skip \ 46 | # Opt out of telemetry until after we install jupyter when building the image, this prevents caching of machine id 47 | DOTNET_TRY_CLI_TELEMETRY_OPTOUT=true 48 | 49 | # Trigger first run experience by running arbitrary cmd 50 | RUN dotnet help 51 | 52 | # Copy notebooks 53 | 54 | COPY ./notebooks/ ${HOME}/Notebooks/ 55 | 56 | # Copy package sources 57 | 58 | COPY ./NuGet.config ${HOME}/nuget.config 59 | 60 | RUN chown -R ${NB_UID} ${HOME} 61 | USER ${USER} 62 | 63 | # Install Microsoft.DotNet.Interactive 64 | RUN dotnet tool install -g dotnet-try --version "1.0.19569.5" --add-source "https://dotnet.myget.org/F/dotnet-try/api/v3/index.json" 65 | 66 | ENV PATH="${PATH}:${HOME}/.dotnet/tools" 67 | RUN echo "$PATH" 68 | 69 | # Install kernel specs 70 | RUN dotnet try jupyter install 71 | 72 | # Enable telemetry once we install jupyter for the image 73 | ENV DOTNET_TRY_CLI_TELEMETRY_OPTOUT=false 74 | 75 | # Set root to Notebooks 76 | WORKDIR ${HOME}/Notebooks/ 77 | -------------------------------------------------------------------------------- /NuGet.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ML.NET-Jupyter-Notebooks 2 | 3 | A collection of C# Jupyter notebooks covering some ML.NET scenarios inspired by https://github.com/XamlBrewer/UWP-MachineLearning-Sample. 4 | 5 | It is best viewed via [nbViewer](https://nbviewer.jupyter.org/github/XamlBrewer/ML.NET-Jupyter-Notebooks/tree/master/notebooks/). 6 | 7 | It is a **work in progress**, but sofar we can offer the following samples: 8 | 9 | * Building and Visualizing Models: 10 | * Clustering 11 | * Binary Classification 12 | * Multiclass Classification 13 | * Regression 14 | * Recommendation using Matrix Factorization 15 | * Recommendation using Field-Aware Factorization Machine 16 | 17 | * Data Analysis: 18 | * Distribution analysis using Box Plots 19 | * Correlation analysis using Heatmaps 20 | 21 | Here's a screenshot of one of these: 22 | 23 | ![Screenshot](Assets/MLNet_Jupyter.png?raw=true) 24 | -------------------------------------------------------------------------------- /notebooks/Binary Classification/Binary Classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### ML.NET Binary Classification\n", 8 | "Creates a binary classification model to predict the quality of wine using 11 physicochemical features. Uses the DataFrame API to read the raw data and prepare it." 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "### NuGet package installation" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "data": { 25 | "text/html": [ 26 | "Installing package Microsoft.ML, version 1.4.0................done!" 27 | ] 28 | }, 29 | "metadata": {}, 30 | "output_type": "display_data" 31 | }, 32 | { 33 | "data": { 34 | "text/html": [ 35 | "Successfully added reference to package Microsoft.ML, version 1.4.0" 36 | ] 37 | }, 38 | "metadata": {}, 39 | "output_type": "display_data" 40 | }, 41 | { 42 | "data": { 43 | "text/html": [ 44 | "Installing package XPlot.Plotly, version 3.0.1........done!" 45 | ] 46 | }, 47 | "metadata": {}, 48 | "output_type": "display_data" 49 | }, 50 | { 51 | "data": { 52 | "text/html": [ 53 | "Successfully added reference to package XPlot.Plotly, version 3.0.1" 54 | ] 55 | }, 56 | "metadata": {}, 57 | "output_type": "display_data" 58 | } 59 | ], 60 | "source": [ 61 | "#r \"nuget:Microsoft.ML, 1.4.0\"\n", 62 | "#r \"nuget:XPlot.Plotly, 3.0.1\"" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "### Namespaces" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 2, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "using Microsoft.ML;\n", 79 | "using Microsoft.ML.Data;\n", 80 | "using Microsoft.ML.Trainers;\n", 81 | "using Microsoft.ML.Transforms;\n", 82 | "using XPlot.Plotly;" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### Input class definition" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 3, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "public class BinaryClassificationData\n", 99 | "{\n", 100 | " [LoadColumn(0)]\n", 101 | " public float FixedAcidity;\n", 102 | "\n", 103 | " [LoadColumn(1)]\n", 104 | " public float VolatileAcidity;\n", 105 | "\n", 106 | " [LoadColumn(2)]\n", 107 | " public float CitricAcid;\n", 108 | "\n", 109 | " [LoadColumn(3)]\n", 110 | " public float ResidualSugar;\n", 111 | "\n", 112 | " [LoadColumn(4)]\n", 113 | " public float Chlorides;\n", 114 | "\n", 115 | " [LoadColumn(5)]\n", 116 | " public float FreeSulfurDioxide;\n", 117 | "\n", 118 | " [LoadColumn(6)]\n", 119 | " public float TotalSulfurDioxide;\n", 120 | "\n", 121 | " [LoadColumn(7)]\n", 122 | " public float Density;\n", 123 | "\n", 124 | " [LoadColumn(8)]\n", 125 | " public float Ph;\n", 126 | "\n", 127 | " [LoadColumn(9)]\n", 128 | " public float Sulphates;\n", 129 | "\n", 130 | " [LoadColumn(10)]\n", 131 | " public float Alcohol;\n", 132 | "\n", 133 | " [LoadColumn(11)]\n", 134 | " public float Quality;\n", 135 | "}\n", 136 | "\n", 137 | "public class RichBinaryClassificationData: BinaryClassificationData\n", 138 | "{\n", 139 | " public bool Label => Quality > 5;\n", 140 | "}" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "### Output class definition" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 4, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "public class BinaryClassificationPrediction\n", 157 | "{\n", 158 | " public bool Label;\n", 159 | "\n", 160 | " [ColumnName(\"PredictedLabel\")]\n", 161 | " public bool PredictedLabel;\n", 162 | "\n", 163 | " public int LabelAsNumber => PredictedLabel ? 1 : 0;\n", 164 | "}" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "### Bring in the DataFrame" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 5, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "data": { 181 | "text/html": [ 182 | "Installing package Microsoft.Data.Analysis, version 0.2.0......done!" 183 | ] 184 | }, 185 | "metadata": {}, 186 | "output_type": "display_data" 187 | }, 188 | { 189 | "data": { 190 | "text/html": [ 191 | "Successfully added reference to package Microsoft.Data.Analysis, version 0.2.0" 192 | ] 193 | }, 194 | "metadata": {}, 195 | "output_type": "display_data" 196 | } 197 | ], 198 | "source": [ 199 | "#r \"nuget:Microsoft.Data.Analysis,0.2.0\"\n", 200 | "using Microsoft.Data.Analysis;\n", 201 | "using Microsoft.AspNetCore.Html;\n", 202 | "\n", 203 | "// Convenient custom formatter.\n", 204 | "Formatter.Register((df, writer) =>\n", 205 | "{\n", 206 | " var headers = new List();\n", 207 | " headers.Add(th(i(\"index\")));\n", 208 | " headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));\n", 209 | " var rows = new List>();\n", 210 | " var take = 5;\n", 211 | " for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)\n", 212 | " {\n", 213 | " var cells = new List();\n", 214 | " cells.Add(td(i));\n", 215 | " foreach (var obj in df.Rows[i])\n", 216 | " {\n", 217 | " cells.Add(td(obj));\n", 218 | " }\n", 219 | " rows.Add(cells);\n", 220 | " }\n", 221 | "\n", 222 | " var t = table(\n", 223 | " thead(\n", 224 | " headers),\n", 225 | " tbody(\n", 226 | " rows.Select(\n", 227 | " r => tr(r))));\n", 228 | "\n", 229 | " writer.Write(t);\n", 230 | "}, \"text/html\");" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "### Read the raw data" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 6, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "data": { 247 | "text/html": [ 248 | "
indexFixedAcidityVolatileAcidityCitricAcidResidualSugarChloridesFreeSulfurDioxideTotalSulfurDioxideDensityPhSulphatesAlcoholQuality
070.270.3620.70.045451701.00130.458.86
16.30.30.341.60.049141320.9943.30.499.56
28.10.280.46.90.0530970.99513.260.4410.16
37.20.230.328.50.058471860.99563.190.49.96
47.20.230.328.50.058471860.99563.190.49.96
" 249 | ] 250 | }, 251 | "metadata": {}, 252 | "output_type": "display_data" 253 | } 254 | ], 255 | "source": [ 256 | "var trainingData = DataFrame.LoadCsv(\n", 257 | " \"./WineQuality_White_Train.csv\",\n", 258 | " separator: ';',\n", 259 | " columnNames: new[]\n", 260 | " {\n", 261 | " \"FixedAcidity\",\n", 262 | " \"VolatileAcidity\",\n", 263 | " \"CitricAcid\",\n", 264 | " \"ResidualSugar\",\n", 265 | " \"Chlorides\",\n", 266 | " \"FreeSulfurDioxide\",\n", 267 | " \"TotalSulfurDioxide\",\n", 268 | " \"Density\",\n", 269 | " \"Ph\",\n", 270 | " \"Sulphates\",\n", 271 | " \"Alcohol\",\n", 272 | " \"Quality\"\n", 273 | " });\n", 274 | "\n", 275 | "display(trainingData);" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "### Prepare the data" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 7, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "data": { 292 | "text/html": [ 293 | "
indexFixedAcidityVolatileAcidityCitricAcidResidualSugarChloridesFreeSulfurDioxideTotalSulfurDioxideDensityPhSulphatesAlcoholQualityLabel
070.270.3620.70.045451701.00130.458.86True
16.30.30.341.60.049141320.9943.30.499.56True
28.10.280.46.90.0530970.99513.260.4410.16True
37.20.230.328.50.058471860.99563.190.49.96True
47.20.230.328.50.058471860.99563.190.49.96True
" 294 | ] 295 | }, 296 | "metadata": {}, 297 | "output_type": "display_data" 298 | } 299 | ], 300 | "source": [ 301 | "// Create the Label column and add it to the data.\n", 302 | "var labelCol = trainingData[\"Quality\"].ElementwiseGreaterThanOrEqual(6);\n", 303 | "labelCol.SetName(\"Label\");\n", 304 | "trainingData.Columns.Add(labelCol);\n", 305 | "\n", 306 | "// This works, but we need the Quality column in later cells ...\n", 307 | "// trainingData.Columns.Remove(trainingData[\"Quality\"]);\n", 308 | "\n", 309 | "display(trainingData);" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 8, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "var mlContext = new MLContext(seed: null);\n", 319 | "\n", 320 | "// Define the pipeline.\n", 321 | "var pipeline =\n", 322 | " mlContext.Transforms.ReplaceMissingValues(\n", 323 | " outputColumnName: \"FixedAcidity\",\n", 324 | " replacementMode: MissingValueReplacingEstimator.ReplacementMode.Mean)\n", 325 | " .Append(mlContext.Transforms.Concatenate(\"Features\",\n", 326 | " new[]\n", 327 | " {\n", 328 | " \"FixedAcidity\",\n", 329 | " \"VolatileAcidity\",\n", 330 | " \"CitricAcid\",\n", 331 | " \"ResidualSugar\",\n", 332 | " \"Chlorides\",\n", 333 | " \"FreeSulfurDioxide\",\n", 334 | " \"TotalSulfurDioxide\",\n", 335 | " \"Density\",\n", 336 | " \"Ph\",\n", 337 | " \"Sulphates\",\n", 338 | " \"Alcohol\"\n", 339 | " }))\n", 340 | " .Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression());" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "### Train the model" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 9, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "var model = pipeline.Fit(trainingData);" 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "metadata": {}, 362 | "source": [ 363 | "### Evaluate the model" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 10, 369 | "metadata": {}, 370 | "outputs": [ 371 | { 372 | "data": { 373 | "text/html": [ 374 | "
LogLossLogLossReductionEntropyAreaUnderRocCurveAccuracyPositivePrecisionPositiveRecallNegativePrecisionNegativeRecallF1ScoreAreaUnderPrecisionRecallCurveConfusionMatrix
0.74520452598977840.19740040333434760.9284885378533480.79096305668454760.73908603523104420.7645249487354750.87042801556420240.66397578203834510.48849294729027470.81404657933042210.8749940309174482{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.764524948735475, 0.6639757820383451 ], PerClassRecall: [ 0.8704280155642024, 0.4884929472902747 ], Counts: [ [ 2237, 333 ], [ 689, 658 ] ], NumberOfClasses: 2 }
" 375 | ] 376 | }, 377 | "metadata": {}, 378 | "output_type": "display_data" 379 | } 380 | ], 381 | "source": [ 382 | "// Load the raw test data.\n", 383 | "var testData = mlContext.Data.LoadFromTextFile(\n", 384 | " \"./WineQuality_White_Test.csv\", \n", 385 | " separatorChar: ';',\n", 386 | " hasHeader: true);\n", 387 | " \n", 388 | "// Calculate the Label (IDataView to IEnumerable to IDataView). \n", 389 | "var stronglyTypedTestData = mlContext.Data.CreateEnumerable(trainingData, false);\n", 390 | "testData = mlContext.Data.LoadFromEnumerable(stronglyTypedTestData);\n", 391 | "\n", 392 | "// Score the test data and calculate the metrics.\n", 393 | "var scoredData = model.Transform(testData);\n", 394 | "var qualityMetrics = mlContext.BinaryClassification.Evaluate(scoredData);\n", 395 | "display(qualityMetrics);" 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": {}, 401 | "source": [ 402 | "### Visualize the quality metrics" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 11, 408 | "metadata": { 409 | "scrolled": true 410 | }, 411 | "outputs": [ 412 | { 413 | "data": { 414 | "text/html": [ 415 | "
" 438 | ] 439 | }, 440 | "metadata": {}, 441 | "output_type": "display_data" 442 | } 443 | ], 444 | "source": [ 445 | "string[] metricNames = \n", 446 | " { \n", 447 | " \"Log Loss\", \n", 448 | " \"Log Loss Reduction\", \n", 449 | " \"Entropy\", \n", 450 | " \"Area Under Curve\", \n", 451 | " \"Accuracy\",\n", 452 | " \"Positive Recall\", \n", 453 | " \"Negative Recall\",\n", 454 | " \"F1 Score\"\n", 455 | " };\n", 456 | "\n", 457 | "double[] metricValues = \n", 458 | " { \n", 459 | " qualityMetrics.LogLoss, \n", 460 | " qualityMetrics.LogLossReduction, \n", 461 | " qualityMetrics.Entropy, \n", 462 | " qualityMetrics.AreaUnderRocCurve, \n", 463 | " qualityMetrics.Accuracy,\n", 464 | " qualityMetrics.PositiveRecall, \n", 465 | " qualityMetrics.NegativeRecall,\n", 466 | " qualityMetrics.F1Score\n", 467 | " };\n", 468 | "\n", 469 | "var graph = new Graph.Bar()\n", 470 | "{\n", 471 | " x = metricValues,\n", 472 | " y = metricNames,\n", 473 | " orientation = \"h\",\n", 474 | " marker = new Graph.Marker { color = \"darkred\" }\n", 475 | "};\n", 476 | "\n", 477 | "var chart = Chart.Plot(graph);\n", 478 | "\n", 479 | "var layout = new Layout.Layout(){ title=\"Quality Metrics\" };\n", 480 | "chart.WithLayout(layout);\n", 481 | "\n", 482 | "display(chart);" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "### Drawing the Confusion Matrix\n", 490 | "\n", 491 | "#### Default" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 12, 497 | "metadata": { 498 | "scrolled": true 499 | }, 500 | "outputs": [ 501 | { 502 | "data": { 503 | "text/html": [ 504 | "
PerClassPrecisionPerClassRecallCountsNumberOfClasses
[ 0.764524948735475, 0.6639757820383451 ][ 0.8704280155642024, 0.4884929472902747 ][ [ 2237, 333 ], [ 689, 658 ] ]2
" 505 | ] 506 | }, 507 | "metadata": {}, 508 | "output_type": "display_data" 509 | } 510 | ], 511 | "source": [ 512 | "display(qualityMetrics.ConfusionMatrix);" 513 | ] 514 | }, 515 | { 516 | "cell_type": "markdown", 517 | "metadata": {}, 518 | "source": [ 519 | "#### Custom Formatter for Binary Confusion Matrix" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 13, 525 | "metadata": {}, 526 | "outputs": [], 527 | "source": [ 528 | "Formatter.Register((df, writer) =>\n", 529 | "{\n", 530 | " var rows = new List();\n", 531 | "\n", 532 | " var cells = new List();\n", 533 | " var n = df.Counts[0][0] + df.Counts[0][1] + df.Counts[1][0] + df.Counts[1][1];\n", 534 | " cells.Add(td[rowspan: 2, colspan: 2, style: \"text-align: center; background-color: transparent\"](\"n = \" + n));\n", 535 | " cells.Add(td[colspan: 2, style: \"border: 1px solid black; text-align: center; padding: 24px; background-color: lightsteelblue\"](b(\"Predicted\")));\n", 536 | " rows.Add(tr[style: \"background-color: transparent\"](cells));\n", 537 | " \n", 538 | " cells = new List();\n", 539 | " cells.Add(td[style:\"border: 1px solid black; padding: 24px; background-color: #E3EAF3\"](b(\"True\")));\n", 540 | " cells.Add(td[style:\"border: 1px solid black; padding: 24px; background-color: #E3EAF3\"](b(\"False\")));\n", 541 | " rows.Add(tr[style: \"background-color: transparent\"](cells));\n", 542 | " \n", 543 | " cells = new List();\n", 544 | " cells.Add(td[rowspan: 2, style:\"border: 1px solid black; text-align: center; padding: 24px; background-color: lightsteelblue\"](b(\"Actual\")));\n", 545 | " cells.Add(td[style:\"border: 1px solid black; text-align: center; padding: 24px; background-color: #E3EAF3\"](b(\"True\"))); \n", 546 | " cells.Add(td[style:\"border: 1px solid black; padding: 24px\"](df.Counts[0][0]));\n", 547 | " cells.Add(td[style:\"border: 1px solid black; padding: 24px\"](df.Counts[0][1]));\n", 548 | " rows.Add(tr[style: \"background-color: transparent\"](cells));\n", 549 | " \n", 550 | " cells = new List();\n", 551 | " cells.Add(td[style:\"border: 1px solid black; text-align: center; padding: 24px; background-color: #E3EAF3\"](b(\"False\")));\n", 552 | " cells.Add(td[style:\"border: 1px solid black; padding: 24px\"](df.Counts[1][0]));\n", 553 | " cells.Add(td[style:\"border: 1px solid black; padding: 24px\"](df.Counts[1][1]));\n", 554 | " rows.Add(tr(cells));\n", 555 | "\n", 556 | " var t = table(\n", 557 | " tbody(\n", 558 | " rows));\n", 559 | "\n", 560 | " writer.Write(t);\n", 561 | "}, \"text/html\");" 562 | ] 563 | }, 564 | { 565 | "cell_type": "markdown", 566 | "metadata": {}, 567 | "source": [ 568 | "#### Tadaa" 569 | ] 570 | }, 571 | { 572 | "cell_type": "code", 573 | "execution_count": 14, 574 | "metadata": {}, 575 | "outputs": [ 576 | { 577 | "data": { 578 | "text/html": [ 579 | "
n = 3917Predicted
TrueFalse
ActualTrue2237333
False689658
" 580 | ] 581 | }, 582 | "metadata": {}, 583 | "output_type": "display_data" 584 | } 585 | ], 586 | "source": [ 587 | "display(qualityMetrics.ConfusionMatrix);" 588 | ] 589 | }, 590 | { 591 | "cell_type": "markdown", 592 | "metadata": {}, 593 | "source": [ 594 | "### Create a prediction engine and use it on a random sample" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": 15, 600 | "metadata": { 601 | "scrolled": true 602 | }, 603 | "outputs": [ 604 | { 605 | "data": { 606 | "text/html": [ 607 | "
LabelFixedAcidityVolatileAcidityCitricAcidResidualSugarChloridesFreeSulfurDioxideTotalSulfurDioxideDensityPhSulphatesAlcoholQuality
False7.10.370.6710.50.045491550.99753.160.448.75
" 608 | ] 609 | }, 610 | "metadata": {}, 611 | "output_type": "display_data" 612 | }, 613 | { 614 | "data": { 615 | "text/html": [ 616 | "
LabelAsNumberLabelPredictedLabel
0FalseFalse
" 617 | ] 618 | }, 619 | "metadata": {}, 620 | "output_type": "display_data" 621 | } 622 | ], 623 | "source": [ 624 | "// Create prediction engine\n", 625 | "var predictionEngine = mlContext.Model.CreatePredictionEngine(model);\n", 626 | "\n", 627 | "// Get a random data sample\n", 628 | "var shuffledData = mlContext.Data.ShuffleRows(trainingData);\n", 629 | "var rawSample = mlContext.Data.TakeRows(shuffledData, 1);\n", 630 | "var sample = mlContext.Data.CreateEnumerable(rawSample, false).First();\n", 631 | "display(sample);\n", 632 | "\n", 633 | "// Predict quality of sample\n", 634 | "var prediction = predictionEngine.Predict(sample);\n", 635 | "display(prediction);" 636 | ] 637 | } 638 | ], 639 | "metadata": { 640 | "kernelspec": { 641 | "display_name": ".NET (C#)", 642 | "language": "C#", 643 | "name": ".net-csharp" 644 | }, 645 | "language_info": { 646 | "file_extension": ".cs", 647 | "mimetype": "text/x-csharp", 648 | "name": "C#", 649 | "pygments_lexer": "csharp", 650 | "version": "8.0" 651 | } 652 | }, 653 | "nbformat": 4, 654 | "nbformat_minor": 2 655 | } 656 | -------------------------------------------------------------------------------- /notebooks/Box Plot/Box Plot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Draws some Box Plot diagrams on data distribution" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### NuGet package installation" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "data": { 24 | "text/html": [ 25 | "Installing package Microsoft.ML, version 1.4.0.............done!" 26 | ] 27 | }, 28 | "metadata": {}, 29 | "output_type": "display_data" 30 | }, 31 | { 32 | "data": { 33 | "text/html": [ 34 | "Successfully added reference to package Microsoft.ML, version 1.4.0" 35 | ] 36 | }, 37 | "metadata": {}, 38 | "output_type": "display_data" 39 | }, 40 | { 41 | "data": { 42 | "text/html": [ 43 | "Installing package XPlot.Plotly, version 3.0.1.....done!" 44 | ] 45 | }, 46 | "metadata": {}, 47 | "output_type": "display_data" 48 | }, 49 | { 50 | "data": { 51 | "text/html": [ 52 | "Successfully added reference to package XPlot.Plotly, version 3.0.1" 53 | ] 54 | }, 55 | "metadata": {}, 56 | "output_type": "display_data" 57 | } 58 | ], 59 | "source": [ 60 | "#r \"nuget:Microsoft.ML, 1.4.0\"\n", 61 | "#r \"nuget:XPlot.Plotly, 3.0.1\"" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Namespaces" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 2, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "using Microsoft.ML;\n", 78 | "using Microsoft.ML.Data;\n", 79 | "using XPlot.Plotly;" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "## Simple start: well prepared data" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "### Read the raw data" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 3, 99 | "metadata": { 100 | "scrolled": true 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "var mlContext = new MLContext(seed: null);\n", 105 | "\n", 106 | "var reader = mlContext.Data.CreateTextLoader(\n", 107 | " new TextLoader.Options()\n", 108 | " {\n", 109 | " Separators = new[] { ',' },\n", 110 | " HasHeader = true,\n", 111 | " Columns = new[]\n", 112 | " {\n", 113 | " new TextLoader.Column(\"Age\", DataKind.Single, 2),\n", 114 | " new TextLoader.Column(\"AnnualIncome\", DataKind.Single, 3),\n", 115 | " new TextLoader.Column(\"SpendingScore\", DataKind.Single, 4),\n", 116 | " }\n", 117 | " });\n", 118 | " \n", 119 | "var dataView = reader.Load(\"./Mall_Customers.csv\");" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "### Visualize the data" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 4, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/html": [ 137 | "
" 160 | ] 161 | }, 162 | "metadata": {}, 163 | "output_type": "display_data" 164 | } 165 | ], 166 | "source": [ 167 | "var graph = new Graph.Box()\n", 168 | "{\n", 169 | " y = dataView.GetColumn(dataView.Schema[0]),\n", 170 | " name = \"Age\"\n", 171 | "};\n", 172 | "\n", 173 | "var graph2 = new Graph.Box()\n", 174 | "{\n", 175 | " y = dataView.GetColumn(dataView.Schema[1]),\n", 176 | " name = \"Annual Income\"\n", 177 | "};\n", 178 | "\n", 179 | "var graph3 = new Graph.Box()\n", 180 | "{\n", 181 | " y = dataView.GetColumn(dataView.Schema[2]),\n", 182 | " name = \"Spending Score\"\n", 183 | "};\n", 184 | "\n", 185 | "var chart = Chart.Plot(new List { graph, graph2, graph3 });\n", 186 | "\n", 187 | "var layout = new Layout.Layout(){ title=\"Shopping Mall Customers Data Distribution\", showlegend = false };\n", 188 | "chart.WithLayout(layout);\n", 189 | "\n", 190 | "display(chart);" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "## A more interesting sample: dispersed data" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "### Read the raw data" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 5, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "var reader2 = mlContext.Data.CreateTextLoader(\n", 214 | " new TextLoader.Options()\n", 215 | " {\n", 216 | " Separators = new[] { ',' },\n", 217 | " HasHeader = true,\n", 218 | " Columns = new[]\n", 219 | " {\n", 220 | " new TextLoader.Column(\"Ts\", DataKind.Single, 9),\n", 221 | " new TextLoader.Column(\"Orb\", DataKind.Single, 12),\n", 222 | " new TextLoader.Column(\"Drb\", DataKind.Single, 13),\n", 223 | " new TextLoader.Column(\"Trb\", DataKind.Single, 14),\n", 224 | " new TextLoader.Column(\"Ast\", DataKind.Single, 15),\n", 225 | " new TextLoader.Column(\"Stl\", DataKind.Single, 16),\n", 226 | " new TextLoader.Column(\"Blk\", DataKind.Single, 17),\n", 227 | " new TextLoader.Column(\"Tov\", DataKind.Single, 18),\n", 228 | " new TextLoader.Column(\"Usg\", DataKind.Single, 19),\n", 229 | " new TextLoader.Column(\"Age\", DataKind.Single, 4)\n", 230 | " }\n", 231 | " });\n", 232 | "\n", 233 | "var dataView2 = reader2.Load(\"./2017-18_NBA_salary.csv\");" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "### Visualize the data" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 6, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/html": [ 251 | "
" 274 | ] 275 | }, 276 | "metadata": {}, 277 | "output_type": "display_data" 278 | } 279 | ], 280 | "source": [ 281 | "var chart2 = Chart.Plot(new List \n", 282 | "{ \n", 283 | " new Graph.Box()\n", 284 | " {\n", 285 | " y = dataView2.GetColumn(dataView2.Schema[0]),\n", 286 | " name = \"True Shootings\"\n", 287 | " },\n", 288 | " new Graph.Box()\n", 289 | " {\n", 290 | " y = dataView2.GetColumn(dataView2.Schema[1]),\n", 291 | " name = \"Offensive Rebounds\"\n", 292 | " },\n", 293 | " new Graph.Box()\n", 294 | " {\n", 295 | " y = dataView2.GetColumn(dataView2.Schema[2]),\n", 296 | " name = \"Defensive Rebounds\"\n", 297 | " },\n", 298 | " new Graph.Box()\n", 299 | " {\n", 300 | " y = dataView2.GetColumn(dataView2.Schema[3]),\n", 301 | " name = \"Team Rebounds\"\n", 302 | " },\n", 303 | " new Graph.Box()\n", 304 | " {\n", 305 | " y = dataView2.GetColumn(dataView2.Schema[4]),\n", 306 | " name = \"Assists\"\n", 307 | " }, \n", 308 | " new Graph.Box()\n", 309 | " {\n", 310 | " y = dataView2.GetColumn(dataView2.Schema[5]),\n", 311 | " name = \"Steals\"\n", 312 | " },\n", 313 | " new Graph.Box()\n", 314 | " {\n", 315 | " y = dataView2.GetColumn(dataView2.Schema[6]),\n", 316 | " name = \"Blocks\"\n", 317 | " },\n", 318 | " new Graph.Box()\n", 319 | " {\n", 320 | " y = dataView2.GetColumn(dataView2.Schema[7]),\n", 321 | " name = \"Turnover\"\n", 322 | " },\n", 323 | " new Graph.Box()\n", 324 | " {\n", 325 | " y = dataView2.GetColumn(dataView2.Schema[8]),\n", 326 | " name = \"Usage\"\n", 327 | " },\n", 328 | " new Graph.Box()\n", 329 | " {\n", 330 | " y = dataView2.GetColumn(dataView2.Schema[9]),\n", 331 | " name = \"Age\"\n", 332 | " }\n", 333 | "});\n", 334 | "\n", 335 | "var layout2 = new Layout.Layout(){ title=\"NBA Statistics Data Distribution\", showlegend = false };\n", 336 | "chart2.WithLayout(layout2);\n", 337 | "\n", 338 | "display(chart2);" 339 | ] 340 | } 341 | ], 342 | "metadata": { 343 | "kernelspec": { 344 | "display_name": ".NET (C#)", 345 | "language": "C#", 346 | "name": ".net-csharp" 347 | }, 348 | "language_info": { 349 | "file_extension": ".cs", 350 | "mimetype": "text/x-csharp", 351 | "name": "C#", 352 | "pygments_lexer": "csharp", 353 | "version": "8.0" 354 | } 355 | }, 356 | "nbformat": 4, 357 | "nbformat_minor": 2 358 | } 359 | -------------------------------------------------------------------------------- /notebooks/Box Plot/Mall_Customers.csv: -------------------------------------------------------------------------------- 1 | CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100) 2 | 1,Male,19,15,39 3 | 2,Male,21,15,81 4 | 3,Female,20,16,6 5 | 4,Female,23,16,77 6 | 5,Female,31,17,40 7 | 6,Female,22,17,76 8 | 7,Female,35,18,6 9 | 8,Female,23,18,94 10 | 9,Male,64,19,3 11 | 10,Female,30,19,72 12 | 11,Male,67,19,14 13 | 12,Female,35,19,99 14 | 13,Female,58,20,15 15 | 14,Female,24,20,77 16 | 15,Male,37,20,13 17 | 16,Male,22,20,79 18 | 17,Female,35,21,35 19 | 18,Male,20,21,66 20 | 19,Male,52,23,29 21 | 20,Female,35,23,98 22 | 21,Male,35,24,35 23 | 22,Male,25,24,73 24 | 23,Female,46,25,5 25 | 24,Male,31,25,73 26 | 25,Female,54,28,14 27 | 26,Male,29,28,82 28 | 27,Female,45,28,32 29 | 28,Male,35,28,61 30 | 29,Female,40,29,31 31 | 30,Female,23,29,87 32 | 31,Male,60,30,4 33 | 32,Female,21,30,73 34 | 33,Male,53,33,4 35 | 34,Male,18,33,92 36 | 35,Female,49,33,14 37 | 36,Female,21,33,81 38 | 37,Female,42,34,17 39 | 38,Female,30,34,73 40 | 39,Female,36,37,26 41 | 40,Female,20,37,75 42 | 41,Female,65,38,35 43 | 42,Male,24,38,92 44 | 43,Male,48,39,36 45 | 44,Female,31,39,61 46 | 45,Female,49,39,28 47 | 46,Female,24,39,65 48 | 47,Female,50,40,55 49 | 48,Female,27,40,47 50 | 49,Female,29,40,42 51 | 50,Female,31,40,42 52 | 51,Female,49,42,52 53 | 52,Male,33,42,60 54 | 53,Female,31,43,54 55 | 54,Male,59,43,60 56 | 55,Female,50,43,45 57 | 56,Male,47,43,41 58 | 57,Female,51,44,50 59 | 58,Male,69,44,46 60 | 59,Female,27,46,51 61 | 60,Male,53,46,46 62 | 61,Male,70,46,56 63 | 62,Male,19,46,55 64 | 63,Female,67,47,52 65 | 64,Female,54,47,59 66 | 65,Male,63,48,51 67 | 66,Male,18,48,59 68 | 67,Female,43,48,50 69 | 68,Female,68,48,48 70 | 69,Male,19,48,59 71 | 70,Female,32,48,47 72 | 71,Male,70,49,55 73 | 72,Female,47,49,42 74 | 73,Female,60,50,49 75 | 74,Female,60,50,56 76 | 75,Male,59,54,47 77 | 76,Male,26,54,54 78 | 77,Female,45,54,53 79 | 78,Male,40,54,48 80 | 79,Female,23,54,52 81 | 80,Female,49,54,42 82 | 81,Male,57,54,51 83 | 82,Male,38,54,55 84 | 83,Male,67,54,41 85 | 84,Female,46,54,44 86 | 85,Female,21,54,57 87 | 86,Male,48,54,46 88 | 87,Female,55,57,58 89 | 88,Female,22,57,55 90 | 89,Female,34,58,60 91 | 90,Female,50,58,46 92 | 91,Female,68,59,55 93 | 92,Male,18,59,41 94 | 93,Male,48,60,49 95 | 94,Female,40,60,40 96 | 95,Female,32,60,42 97 | 96,Male,24,60,52 98 | 97,Female,47,60,47 99 | 98,Female,27,60,50 100 | 99,Male,48,61,42 101 | 100,Male,20,61,49 102 | 101,Female,23,62,41 103 | 102,Female,49,62,48 104 | 103,Male,67,62,59 105 | 104,Male,26,62,55 106 | 105,Male,49,62,56 107 | 106,Female,21,62,42 108 | 107,Female,66,63,50 109 | 108,Male,54,63,46 110 | 109,Male,68,63,43 111 | 110,Male,66,63,48 112 | 111,Male,65,63,52 113 | 112,Female,19,63,54 114 | 113,Female,38,64,42 115 | 114,Male,19,64,46 116 | 115,Female,18,65,48 117 | 116,Female,19,65,50 118 | 117,Female,63,65,43 119 | 118,Female,49,65,59 120 | 119,Female,51,67,43 121 | 120,Female,50,67,57 122 | 121,Male,27,67,56 123 | 122,Female,38,67,40 124 | 123,Female,40,69,58 125 | 124,Male,39,69,91 126 | 125,Female,23,70,29 127 | 126,Female,31,70,77 128 | 127,Male,43,71,35 129 | 128,Male,40,71,95 130 | 129,Male,59,71,11 131 | 130,Male,38,71,75 132 | 131,Male,47,71,9 133 | 132,Male,39,71,75 134 | 133,Female,25,72,34 135 | 134,Female,31,72,71 136 | 135,Male,20,73,5 137 | 136,Female,29,73,88 138 | 137,Female,44,73,7 139 | 138,Male,32,73,73 140 | 139,Male,19,74,10 141 | 140,Female,35,74,72 142 | 141,Female,57,75,5 143 | 142,Male,32,75,93 144 | 143,Female,28,76,40 145 | 144,Female,32,76,87 146 | 145,Male,25,77,12 147 | 146,Male,28,77,97 148 | 147,Male,48,77,36 149 | 148,Female,32,77,74 150 | 149,Female,34,78,22 151 | 150,Male,34,78,90 152 | 151,Male,43,78,17 153 | 152,Male,39,78,88 154 | 153,Female,44,78,20 155 | 154,Female,38,78,76 156 | 155,Female,47,78,16 157 | 156,Female,27,78,89 158 | 157,Male,37,78,1 159 | 158,Female,30,78,78 160 | 159,Male,34,78,1 161 | 160,Female,30,78,73 162 | 161,Female,56,79,35 163 | 162,Female,29,79,83 164 | 163,Male,19,81,5 165 | 164,Female,31,81,93 166 | 165,Male,50,85,26 167 | 166,Female,36,85,75 168 | 167,Male,42,86,20 169 | 168,Female,33,86,95 170 | 169,Female,36,87,27 171 | 170,Male,32,87,63 172 | 171,Male,40,87,13 173 | 172,Male,28,87,75 174 | 173,Male,36,87,10 175 | 174,Male,36,87,92 176 | 175,Female,52,88,13 177 | 176,Female,30,88,86 178 | 177,Male,58,88,15 179 | 178,Male,27,88,69 180 | 179,Male,59,93,14 181 | 180,Male,35,93,90 182 | 181,Female,37,97,32 183 | 182,Female,32,97,86 184 | 183,Male,46,98,15 185 | 184,Female,29,98,88 186 | 185,Female,41,99,39 187 | 186,Male,30,99,97 188 | 187,Female,54,101,24 189 | 188,Male,28,101,68 190 | 189,Female,41,103,17 191 | 190,Female,36,103,85 192 | 191,Female,34,103,23 193 | 192,Female,32,103,69 194 | 193,Male,33,113,8 195 | 194,Female,38,113,91 196 | 195,Female,47,120,16 197 | 196,Female,35,120,79 198 | 197,Female,45,126,28 199 | 198,Male,32,126,74 200 | 199,Male,32,137,18 201 | 200,Male,30,137,83 202 | -------------------------------------------------------------------------------- /notebooks/Clustering/Clustering.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## ML.NET Clustering\n", 8 | "\n", 9 | "Divides shopping mall customers in 5 clusters, based on annual income and spending score." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### NuGet package installation" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 3, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/html": [ 27 | "Installing package Microsoft.ML, version 1.4.0..done!" 28 | ] 29 | }, 30 | "metadata": {}, 31 | "output_type": "display_data" 32 | }, 33 | { 34 | "data": { 35 | "text/html": [ 36 | "Successfully added reference to package Microsoft.ML, version 1.4.0" 37 | ] 38 | }, 39 | "metadata": {}, 40 | "output_type": "display_data" 41 | }, 42 | { 43 | "data": { 44 | "text/html": [ 45 | "Installing package XPlot.Plotly, version 3.0.1..done!" 46 | ] 47 | }, 48 | "metadata": {}, 49 | "output_type": "display_data" 50 | }, 51 | { 52 | "data": { 53 | "text/html": [ 54 | "Successfully added reference to package XPlot.Plotly, version 3.0.1" 55 | ] 56 | }, 57 | "metadata": {}, 58 | "output_type": "display_data" 59 | } 60 | ], 61 | "source": [ 62 | "#r \"nuget:Microsoft.ML, 1.4.0\"\n", 63 | "#r \"nuget:XPlot.Plotly, 3.0.1\"" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Namespaces" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 4, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "using Microsoft.ML;\n", 80 | "using Microsoft.ML.Data;\n", 81 | "using Microsoft.ML.Trainers;\n", 82 | "using XPlot.Plotly;" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### Input class definition" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "public class ClusteringData\n", 99 | "{\n", 100 | " public float AnnualIncome;\n", 101 | "\n", 102 | " public float SpendingScore;\n", 103 | "}" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "### Output class definition" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 6, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "public class ClusteringPrediction\n", 120 | "{\n", 121 | " [ColumnName(\"PredictedLabel\")]\n", 122 | " public uint PredictedCluster;\n", 123 | "\n", 124 | " [ColumnName(\"Score\")]\n", 125 | " public float[] Distances;\n", 126 | "\n", 127 | " public float AnnualIncome;\n", 128 | "\n", 129 | " public float SpendingScore;\n", 130 | "}" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "### Read the raw data" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 7, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "var mlContext = new MLContext(seed: null);\n", 147 | "\n", 148 | "var readerOptions = new TextLoader.Options()\n", 149 | "{\n", 150 | " Separators = new[] { ',' },\n", 151 | " HasHeader = true,\n", 152 | " Columns = new[]\n", 153 | " {\n", 154 | " new TextLoader.Column(\"AnnualIncome\", DataKind.Single, 3),\n", 155 | " new TextLoader.Column(\"SpendingScore\", DataKind.Single, 4),\n", 156 | " }\n", 157 | "};\n", 158 | "\n", 159 | "var dataView = mlContext.Data.LoadFromTextFile(\"./Mall_Customers.csv\", readerOptions);" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "### Visualize the raw data" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 8, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/html": [ 177 | "

Some data

" 178 | ] 179 | }, 180 | "metadata": {}, 181 | "output_type": "display_data" 182 | }, 183 | { 184 | "data": { 185 | "text/html": [ 186 | "
indexAnnualIncomeSpendingScore
01539
11581
2166
31677
41740
" 187 | ] 188 | }, 189 | "metadata": {}, 190 | "output_type": "display_data" 191 | }, 192 | { 193 | "data": { 194 | "text/html": [ 195 | "
" 218 | ] 219 | }, 220 | "metadata": {}, 221 | "output_type": "display_data" 222 | } 223 | ], 224 | "source": [ 225 | "// Table\n", 226 | "display(h4(\"Some data\"));\n", 227 | "var rawData = mlContext.Data.CreateEnumerable(dataView, false);\n", 228 | "display(rawData.Take(5).ToList());\n", 229 | "\n", 230 | "// Plot\n", 231 | "var rawChart = Chart.Plot(new Graph.Scatter() \n", 232 | " {\n", 233 | " x = rawData.Select(r => r.SpendingScore), \n", 234 | " y = rawData.Select(r => r.AnnualIncome),\n", 235 | " mode = \"markers\"\n", 236 | " });\n", 237 | "var layout = new Layout.Layout() { title = \"Raw data\"};\n", 238 | "rawChart.WithLayout(layout);\n", 239 | "rawChart.Width = 500;\n", 240 | "rawChart.Height = 500;\n", 241 | "rawChart.WithYTitle(\"Income\");\n", 242 | "rawChart.WithXTitle(\"Spending Score\");\n", 243 | "display(rawChart);" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "### Prepare the data" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 9, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "var pipeline = mlContext.Transforms.Concatenate(\"Features\", new[] { \"AnnualIncome\", \"SpendingScore\" })\n", 260 | " .Append(mlContext.Clustering.Trainers.KMeans(\n", 261 | " featureColumnName: \"Features\",\n", 262 | " numberOfClusters: 5));" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "### Train the model" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 10, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "var model = pipeline.Fit(dataView);" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "### Calculate clusters for the training data" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 11, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "var clusters = model.Transform(dataView);" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "### Visualize the clusters" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 12, 307 | "metadata": { 308 | "scrolled": true 309 | }, 310 | "outputs": [ 311 | { 312 | "data": { 313 | "text/html": [ 314 | "
" 337 | ] 338 | }, 339 | "metadata": {}, 340 | "output_type": "display_data" 341 | } 342 | ], 343 | "source": [ 344 | "var clusterData = mlContext.Data.CreateEnumerable(clusters, false);\n", 345 | "var clusterScatter = new Graph.Scattergl() \n", 346 | " {\n", 347 | " x = clusterData.Select(r => r.SpendingScore), \n", 348 | " y = clusterData.Select(r => r.AnnualIncome),\n", 349 | " mode = \"markers\",\n", 350 | " marker = new Graph.Marker() \n", 351 | " { \n", 352 | " color = clusterData.Select(r => r.PredictedCluster), \n", 353 | " colorscale = \"Jet\",\n", 354 | " cmin = 1,\n", 355 | " cmax = 5\n", 356 | " }\n", 357 | " };\n", 358 | "var clusterChart = Chart.Plot(clusterScatter);\n", 359 | "clusterChart.WithLayout(new Layout.Layout() { title = \"Clusters\"});\n", 360 | "clusterChart.Width = 500;\n", 361 | "clusterChart.Height = 500;\n", 362 | "clusterChart.WithYTitle(\"Income\");\n", 363 | "clusterChart.WithXTitle(\"Spending Score\");\n", 364 | "clusterChart.WithLegend(false);\n", 365 | "display(clusterChart);" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "### Predict cluster" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 13, 378 | "metadata": {}, 379 | "outputs": [ 380 | { 381 | "data": { 382 | "text/html": [ 383 | "

Prediction

" 384 | ] 385 | }, 386 | "metadata": {}, 387 | "output_type": "display_data" 388 | }, 389 | { 390 | "data": { 391 | "text/html": [ 392 | "
PredictedClusterDistancesAnnualIncomeSpendingScore
5[ 635.68945, 2047.7524, 3128.1382, 4318.84, 420.61523 ]7070
" 393 | ] 394 | }, 395 | "metadata": {}, 396 | "output_type": "display_data" 397 | } 398 | ], 399 | "source": [ 400 | "var predictionEngine = mlContext.Model.CreatePredictionEngine(model);\n", 401 | "var clusteringData = new ClusteringData\n", 402 | "{\n", 403 | " AnnualIncome = 70,\n", 404 | " SpendingScore = 70\n", 405 | "};\n", 406 | "var result = predictionEngine.Predict(clusteringData);\n", 407 | "\n", 408 | "display (h4(\"Prediction\"));\n", 409 | "display(result);" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": {}, 415 | "source": [ 416 | "### Visualize predicted cluster" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 14, 422 | "metadata": {}, 423 | "outputs": [ 424 | { 425 | "data": { 426 | "text/html": [ 427 | "
PredictedClusterDistancesAnnualIncomeSpendingScore
5[ 635.68945, 2047.7524, 3128.1382, 4318.84, 420.61523 ]7070
" 428 | ] 429 | }, 430 | "metadata": {}, 431 | "output_type": "display_data" 432 | }, 433 | { 434 | "data": { 435 | "text/html": [ 436 | "
" 459 | ] 460 | }, 461 | "metadata": {}, 462 | "output_type": "display_data" 463 | } 464 | ], 465 | "source": [ 466 | "display(result);\n", 467 | "var results = new List { result };\n", 468 | "var predictionScatter = new Graph.Scattergl\n", 469 | " {\n", 470 | " x = results.Select(r => r.SpendingScore), \n", 471 | " y = results.Select(r => r.AnnualIncome),\n", 472 | " //mode = \"markers\",\n", 473 | " marker = new Graph.Marker() \n", 474 | " { \n", 475 | " color = results.Select(r => r.PredictedCluster), \n", 476 | " colorscale = \"Jet\", \n", 477 | " size = 20, \n", 478 | " symbol = 22, // Star Diamond\n", 479 | " cmin = 1,\n", 480 | " cmax = 5\n", 481 | " }\n", 482 | " };\n", 483 | "\n", 484 | "var scatters = new List { predictionScatter, clusterScatter };\n", 485 | "\n", 486 | "var clusterChart = Chart.Plot(scatters);\n", 487 | "clusterChart.WithLayout(new Layout.Layout() { title = \"Prediction\"});\n", 488 | "clusterChart.Width = 500;\n", 489 | "clusterChart.Height = 500;\n", 490 | "clusterChart.WithYTitle(\"Income\");\n", 491 | "clusterChart.WithXTitle(\"Spending Score\");\n", 492 | "clusterChart.WithLegend(false);\n", 493 | "display(clusterChart);" 494 | ] 495 | } 496 | ], 497 | "metadata": { 498 | "kernelspec": { 499 | "display_name": ".NET (C#)", 500 | "language": "C#", 501 | "name": ".net-csharp" 502 | }, 503 | "language_info": { 504 | "file_extension": ".cs", 505 | "mimetype": "text/x-csharp", 506 | "name": "C#", 507 | "pygments_lexer": "csharp", 508 | "version": "8.0" 509 | } 510 | }, 511 | "nbformat": 4, 512 | "nbformat_minor": 2 513 | } 514 | -------------------------------------------------------------------------------- /notebooks/Clustering/Mall_Customers.csv: -------------------------------------------------------------------------------- 1 | CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100) 2 | 1,Male,19,15,39 3 | 2,Male,21,15,81 4 | 3,Female,20,16,6 5 | 4,Female,23,16,77 6 | 5,Female,31,17,40 7 | 6,Female,22,17,76 8 | 7,Female,35,18,6 9 | 8,Female,23,18,94 10 | 9,Male,64,19,3 11 | 10,Female,30,19,72 12 | 11,Male,67,19,14 13 | 12,Female,35,19,99 14 | 13,Female,58,20,15 15 | 14,Female,24,20,77 16 | 15,Male,37,20,13 17 | 16,Male,22,20,79 18 | 17,Female,35,21,35 19 | 18,Male,20,21,66 20 | 19,Male,52,23,29 21 | 20,Female,35,23,98 22 | 21,Male,35,24,35 23 | 22,Male,25,24,73 24 | 23,Female,46,25,5 25 | 24,Male,31,25,73 26 | 25,Female,54,28,14 27 | 26,Male,29,28,82 28 | 27,Female,45,28,32 29 | 28,Male,35,28,61 30 | 29,Female,40,29,31 31 | 30,Female,23,29,87 32 | 31,Male,60,30,4 33 | 32,Female,21,30,73 34 | 33,Male,53,33,4 35 | 34,Male,18,33,92 36 | 35,Female,49,33,14 37 | 36,Female,21,33,81 38 | 37,Female,42,34,17 39 | 38,Female,30,34,73 40 | 39,Female,36,37,26 41 | 40,Female,20,37,75 42 | 41,Female,65,38,35 43 | 42,Male,24,38,92 44 | 43,Male,48,39,36 45 | 44,Female,31,39,61 46 | 45,Female,49,39,28 47 | 46,Female,24,39,65 48 | 47,Female,50,40,55 49 | 48,Female,27,40,47 50 | 49,Female,29,40,42 51 | 50,Female,31,40,42 52 | 51,Female,49,42,52 53 | 52,Male,33,42,60 54 | 53,Female,31,43,54 55 | 54,Male,59,43,60 56 | 55,Female,50,43,45 57 | 56,Male,47,43,41 58 | 57,Female,51,44,50 59 | 58,Male,69,44,46 60 | 59,Female,27,46,51 61 | 60,Male,53,46,46 62 | 61,Male,70,46,56 63 | 62,Male,19,46,55 64 | 63,Female,67,47,52 65 | 64,Female,54,47,59 66 | 65,Male,63,48,51 67 | 66,Male,18,48,59 68 | 67,Female,43,48,50 69 | 68,Female,68,48,48 70 | 69,Male,19,48,59 71 | 70,Female,32,48,47 72 | 71,Male,70,49,55 73 | 72,Female,47,49,42 74 | 73,Female,60,50,49 75 | 74,Female,60,50,56 76 | 75,Male,59,54,47 77 | 76,Male,26,54,54 78 | 77,Female,45,54,53 79 | 78,Male,40,54,48 80 | 79,Female,23,54,52 81 | 80,Female,49,54,42 82 | 81,Male,57,54,51 83 | 82,Male,38,54,55 84 | 83,Male,67,54,41 85 | 84,Female,46,54,44 86 | 85,Female,21,54,57 87 | 86,Male,48,54,46 88 | 87,Female,55,57,58 89 | 88,Female,22,57,55 90 | 89,Female,34,58,60 91 | 90,Female,50,58,46 92 | 91,Female,68,59,55 93 | 92,Male,18,59,41 94 | 93,Male,48,60,49 95 | 94,Female,40,60,40 96 | 95,Female,32,60,42 97 | 96,Male,24,60,52 98 | 97,Female,47,60,47 99 | 98,Female,27,60,50 100 | 99,Male,48,61,42 101 | 100,Male,20,61,49 102 | 101,Female,23,62,41 103 | 102,Female,49,62,48 104 | 103,Male,67,62,59 105 | 104,Male,26,62,55 106 | 105,Male,49,62,56 107 | 106,Female,21,62,42 108 | 107,Female,66,63,50 109 | 108,Male,54,63,46 110 | 109,Male,68,63,43 111 | 110,Male,66,63,48 112 | 111,Male,65,63,52 113 | 112,Female,19,63,54 114 | 113,Female,38,64,42 115 | 114,Male,19,64,46 116 | 115,Female,18,65,48 117 | 116,Female,19,65,50 118 | 117,Female,63,65,43 119 | 118,Female,49,65,59 120 | 119,Female,51,67,43 121 | 120,Female,50,67,57 122 | 121,Male,27,67,56 123 | 122,Female,38,67,40 124 | 123,Female,40,69,58 125 | 124,Male,39,69,91 126 | 125,Female,23,70,29 127 | 126,Female,31,70,77 128 | 127,Male,43,71,35 129 | 128,Male,40,71,95 130 | 129,Male,59,71,11 131 | 130,Male,38,71,75 132 | 131,Male,47,71,9 133 | 132,Male,39,71,75 134 | 133,Female,25,72,34 135 | 134,Female,31,72,71 136 | 135,Male,20,73,5 137 | 136,Female,29,73,88 138 | 137,Female,44,73,7 139 | 138,Male,32,73,73 140 | 139,Male,19,74,10 141 | 140,Female,35,74,72 142 | 141,Female,57,75,5 143 | 142,Male,32,75,93 144 | 143,Female,28,76,40 145 | 144,Female,32,76,87 146 | 145,Male,25,77,12 147 | 146,Male,28,77,97 148 | 147,Male,48,77,36 149 | 148,Female,32,77,74 150 | 149,Female,34,78,22 151 | 150,Male,34,78,90 152 | 151,Male,43,78,17 153 | 152,Male,39,78,88 154 | 153,Female,44,78,20 155 | 154,Female,38,78,76 156 | 155,Female,47,78,16 157 | 156,Female,27,78,89 158 | 157,Male,37,78,1 159 | 158,Female,30,78,78 160 | 159,Male,34,78,1 161 | 160,Female,30,78,73 162 | 161,Female,56,79,35 163 | 162,Female,29,79,83 164 | 163,Male,19,81,5 165 | 164,Female,31,81,93 166 | 165,Male,50,85,26 167 | 166,Female,36,85,75 168 | 167,Male,42,86,20 169 | 168,Female,33,86,95 170 | 169,Female,36,87,27 171 | 170,Male,32,87,63 172 | 171,Male,40,87,13 173 | 172,Male,28,87,75 174 | 173,Male,36,87,10 175 | 174,Male,36,87,92 176 | 175,Female,52,88,13 177 | 176,Female,30,88,86 178 | 177,Male,58,88,15 179 | 178,Male,27,88,69 180 | 179,Male,59,93,14 181 | 180,Male,35,93,90 182 | 181,Female,37,97,32 183 | 182,Female,32,97,86 184 | 183,Male,46,98,15 185 | 184,Female,29,98,88 186 | 185,Female,41,99,39 187 | 186,Male,30,99,97 188 | 187,Female,54,101,24 189 | 188,Male,28,101,68 190 | 189,Female,41,103,17 191 | 190,Female,36,103,85 192 | 191,Female,34,103,23 193 | 192,Female,32,103,69 194 | 193,Male,33,113,8 195 | 194,Female,38,113,91 196 | 195,Female,47,120,16 197 | 196,Female,35,120,79 198 | 197,Female,45,126,28 199 | 198,Male,32,126,74 200 | 199,Male,32,137,18 201 | 200,Male,30,137,83 202 | -------------------------------------------------------------------------------- /notebooks/Data Correlation Chart/Data Correlation Chart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Draws a Correlation Chart or Heatmap on the Titanic data set" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### NuGet package installation" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "data": { 24 | "text/html": [ 25 | "Installing package MathNet.Numerics, version 4.9.0.....................................done!" 26 | ] 27 | }, 28 | "metadata": {}, 29 | "output_type": "display_data" 30 | }, 31 | { 32 | "data": { 33 | "text/html": [ 34 | "Successfully added reference to package MathNet.Numerics, version 4.9.0" 35 | ] 36 | }, 37 | "metadata": {}, 38 | "output_type": "display_data" 39 | }, 40 | { 41 | "data": { 42 | "text/html": [ 43 | "Installing package Microsoft.ML, version 1.4.0.........done!" 44 | ] 45 | }, 46 | "metadata": {}, 47 | "output_type": "display_data" 48 | }, 49 | { 50 | "data": { 51 | "text/html": [ 52 | "Successfully added reference to package Microsoft.ML, version 1.4.0" 53 | ] 54 | }, 55 | "metadata": {}, 56 | "output_type": "display_data" 57 | }, 58 | { 59 | "data": { 60 | "text/html": [ 61 | "Installing package XPlot.Plotly, version 3.0.1........done!" 62 | ] 63 | }, 64 | "metadata": {}, 65 | "output_type": "display_data" 66 | }, 67 | { 68 | "data": { 69 | "text/html": [ 70 | "Successfully added reference to package XPlot.Plotly, version 3.0.1" 71 | ] 72 | }, 73 | "metadata": {}, 74 | "output_type": "display_data" 75 | } 76 | ], 77 | "source": [ 78 | "#r \"nuget:MathNet.Numerics, 4.9.0\"\n", 79 | "#r \"nuget:Microsoft.ML, 1.4.0\"\n", 80 | "#r \"nuget:XPlot.Plotly, 3.0.1\"" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### Namespaces" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 2, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "using Microsoft.ML;\n", 97 | "using Microsoft.ML.Data;\n", 98 | "using XPlot.Plotly;\n", 99 | "using MathNet.Numerics.Statistics;" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "## Simple heatmap sample\n", 107 | "Warming up ..." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 3, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/html": [ 118 | "
" 141 | ] 142 | }, 143 | "metadata": {}, 144 | "output_type": "display_data" 145 | } 146 | ], 147 | "source": [ 148 | "var graph = new Graph.Heatmap()\n", 149 | "{\n", 150 | " x = new [] { \"one\", \"two\", \"three\"},\n", 151 | " y = new [] { \"three\", \"two\", \"one\" },\n", 152 | " z = new List> \n", 153 | " { \n", 154 | " new List { 0, -.75, 1 }, \n", 155 | " new List { .75, 1, -.75 }, \n", 156 | " new List { 1, .75, 0 }\n", 157 | " },\n", 158 | " zmin = -1,\n", 159 | " zmax = 1\n", 160 | "};\n", 161 | "\n", 162 | "var chart = Chart.Plot(graph);\n", 163 | "\n", 164 | "var layout = new Layout.Layout(){ title=\"Sample Correlation Chart\" };\n", 165 | "chart.WithLayout(layout);\n", 166 | "\n", 167 | "display(chart);" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "## And now for the real thing" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "### Read the data" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 4, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "var mlContext = new MLContext(seed: null);\n", 191 | "\n", 192 | "var readerOptions = new TextLoader.Options()\n", 193 | "{\n", 194 | " Separators = new[] { ',' },\n", 195 | " HasHeader = true,\n", 196 | " AllowQuoting = true,\n", 197 | " Columns = new[]\n", 198 | " {\n", 199 | " new TextLoader.Column(\"Survived\", DataKind.Single, 1),\n", 200 | " new TextLoader.Column(\"PClass\", DataKind.Single, 2),\n", 201 | " new TextLoader.Column(\"Age\", DataKind.Single, 5),\n", 202 | " new TextLoader.Column(\"SibSp\", DataKind.Single, 6),\n", 203 | " new TextLoader.Column(\"Parch\", DataKind.Single, 7),\n", 204 | " new TextLoader.Column(\"Fare\", DataKind.Single, 9)\n", 205 | " }\n", 206 | "};\n", 207 | "\n", 208 | "var dataView = mlContext.Data.LoadFromTextFile(\"./Titanic.csv\", readerOptions);" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "### Enter the Matrix\n", 216 | "\n", 217 | "ML.NET prefers singles (float), Math.NET prefers doubles ..." 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 5, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "var matrix = new List>();\n", 227 | "for (int i = 0; i < dataView.Schema.Count; i++)\n", 228 | "{\n", 229 | " var column = dataView.Schema[i];\n", 230 | " matrix.Add(dataView.GetColumn(column).Select(f => (double)f).ToList());\n", 231 | "}\n", 232 | "\n", 233 | "var data = new double[6, 6];\n", 234 | "for (int x = 0; x < 6; ++x)\n", 235 | "{\n", 236 | " for (int y = 0; y < 5 - x; ++y)\n", 237 | " {\n", 238 | " var seriesA = matrix[x];\n", 239 | " var seriesB = matrix[5 - y];\n", 240 | "\n", 241 | " var value = Correlation.Pearson(seriesA, seriesB);\n", 242 | "\n", 243 | " data[x, y] = value;\n", 244 | " data[5 - y, 5 - x] = value;\n", 245 | " }\n", 246 | "\n", 247 | " data[x, 5 - x] = 1;\n", 248 | "}" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "### Draw" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 6, 261 | "metadata": { 262 | "scrolled": true 263 | }, 264 | "outputs": [ 265 | { 266 | "data": { 267 | "text/html": [ 268 | "
" 291 | ] 292 | }, 293 | "metadata": {}, 294 | "output_type": "display_data" 295 | } 296 | ], 297 | "source": [ 298 | "var graph = new Graph.Heatmap()\n", 299 | "{\n", 300 | " y = new [] { \"Survived\", \"Class\", \"Age\", \"Sib / Sp\", \"Par / Chi\", \"Fare\" },\n", 301 | " x = new [] { \"Fare\", \"Parents / Children\", \"Siblings / Spouses\", \"Age\", \"Class\", \"Survived\" },\n", 302 | " z = data,\n", 303 | " zmin = -1,\n", 304 | " zmax = 1\n", 305 | "};\n", 306 | "\n", 307 | "var chart = Chart.Plot(graph);\n", 308 | "\n", 309 | "var layout = new Layout.Layout(){ title=\"Titanic Survival Correlation Chart\" };\n", 310 | "chart.WithLayout(layout);\n", 311 | "\n", 312 | "display(chart);" 313 | ] 314 | } 315 | ], 316 | "metadata": { 317 | "kernelspec": { 318 | "display_name": ".NET (C#)", 319 | "language": "C#", 320 | "name": ".net-csharp" 321 | }, 322 | "language_info": { 323 | "file_extension": ".cs", 324 | "mimetype": "text/x-csharp", 325 | "name": "C#", 326 | "pygments_lexer": "csharp", 327 | "version": "8.0" 328 | } 329 | }, 330 | "nbformat": 4, 331 | "nbformat_minor": 2 332 | } 333 | -------------------------------------------------------------------------------- /notebooks/Introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Introduction" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "#### Regular C# # " 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "Hello World\r\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "Console.WriteLine(\"Hello World\");" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "#### New functions" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "Hello World" 50 | ] 51 | }, 52 | "metadata": {}, 53 | "output_type": "display_data" 54 | } 55 | ], 56 | "source": [ 57 | "display(\"Hello World\");" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "#### Read-Eval-Print Loop" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 3, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "text/plain": [ 75 | "Hello World" 76 | ] 77 | }, 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "\"Hello World\"" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "### Nuget Packages and Namespaces" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 4, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/html": [ 102 | "Installing package XPlot.Plotly.........done!" 103 | ] 104 | }, 105 | "metadata": {}, 106 | "output_type": "display_data" 107 | }, 108 | { 109 | "data": { 110 | "text/html": [ 111 | "Failed to add reference to package XPlot.Plotly" 112 | ] 113 | }, 114 | "metadata": {}, 115 | "output_type": "display_data" 116 | }, 117 | { 118 | "data": { 119 | "text/html": [ 120 | "
indexvalue
0error: Unable to load the service index for source https://api.nuget.org/v3/index.json.
1error: No such host is known.
" 121 | ] 122 | }, 123 | "metadata": {}, 124 | "output_type": "display_data" 125 | } 126 | ], 127 | "source": [ 128 | "#r \"nuget:XPlot.Plotly\"\n", 129 | "using XPlot.Plotly;" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 5, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/html": [ 140 | "
" 163 | ] 164 | }, 165 | "metadata": {}, 166 | "output_type": "display_data" 167 | } 168 | ], 169 | "source": [ 170 | "var openSeries = new Graph.Scatter\n", 171 | "{\n", 172 | " name = \"Open\",\n", 173 | " x = new[] {1, 2, 3, 4}, \n", 174 | " y = new[] {10, 15, 13, 17}\n", 175 | "};\n", 176 | "\n", 177 | "var closeSeries = new Graph.Scatter\n", 178 | "{\n", 179 | " name = \"Close\",\n", 180 | " x = new[] { 2,3,4,5 },\n", 181 | " y = new[] { 16, 5, 11, 9 }\n", 182 | "};\n", 183 | "\n", 184 | "var chart = Chart.Plot(new[] {openSeries, closeSeries});\n", 185 | "chart.WithTitle(\"Open vs Close\");\n", 186 | "display(chart);" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 6, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/html": [ 197 | "
" 220 | ] 221 | }, 222 | "metadata": {}, 223 | "output_type": "display_data" 224 | } 225 | ], 226 | "source": [ 227 | "var chart = Chart.Plot(new List \n", 228 | "{ \n", 229 | " new Graph.Box() { y = new int [] { -40, 24, 36, 52, 67, 99, 26, 28, 20 }, name = \"Age\" }, \n", 230 | " new Graph.Box() { y = new int [] { 12, 42, 70, 100, 46, 20, 24, 24, 20 }, name = \"Annual Income\" }, \n", 231 | " new Graph.Box() { y = new int [] { 0, 30, 100, 50, 60, 80, 75, 75, 20 }, name = \"Spending Score\" } \n", 232 | "});\n", 233 | "\n", 234 | "var layout = new Layout.Layout() \n", 235 | "{ \n", 236 | " title=\"Shopping Mall Customers Data Distribution\", \n", 237 | " showlegend = false \n", 238 | "};\n", 239 | "chart.WithLayout(layout);\n", 240 | "\n", 241 | "display(chart);" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "### PocketView API" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 7, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "data": { 258 | "text/html": [ 259 | "
indexvalue
0_
1a
2area
3aside
4b
5body
6br
7button
8caption
9center
10code
11colgroup
12dd
13details
14div
15dl
16dt
17em
18figure
19font
20form
21h1
22h2
23h3
24h4
25h5
26h6
27head
28header
29hgroup
30hr
31html
32i
33iframe
34img
35input
36label
37li
38link
39main
40menu
41menuitem
42meta
43meter
44nav
45ol
46optgroup
47option
48p
49pre
50progress
51q
52script
53section
54select
55small
56source
57span
58strike
59style
60strong
61sub
62sup
63svg
64table
65tbody
66td
67textarea
68tfoot
69th
70thead
71title
72tr
73u
74ul
75video
" 260 | ] 261 | }, 262 | "metadata": {}, 263 | "output_type": "display_data" 264 | } 265 | ], 266 | "source": [ 267 | "var pocketViewTagMethods = typeof(PocketViewTags)\n", 268 | " .GetProperties()\n", 269 | " .Select(m => m.Name);\n", 270 | "\n", 271 | "display(pocketViewTagMethods);" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 8, 277 | "metadata": {}, 278 | "outputs": [ 279 | { 280 | "data": { 281 | "text/html": [ 282 | "
Hello!
" 283 | ] 284 | }, 285 | "metadata": {}, 286 | "output_type": "display_data" 287 | } 288 | ], 289 | "source": [ 290 | "var pocketView = table[style: \"width: 100%\"](tr(td[style:\"border: 1px solid black\"](\"Hello!\")));\n", 291 | "\n", 292 | "display(pocketView);" 293 | ] 294 | } 295 | ], 296 | "metadata": { 297 | "kernelspec": { 298 | "display_name": ".NET (C#)", 299 | "language": "C#", 300 | "name": ".net-csharp" 301 | }, 302 | "language_info": { 303 | "file_extension": ".cs", 304 | "mimetype": "text/x-csharp", 305 | "name": "C#", 306 | "pygments_lexer": "csharp", 307 | "version": "8.0" 308 | } 309 | }, 310 | "nbformat": 4, 311 | "nbformat_minor": 2 312 | } 313 | -------------------------------------------------------------------------------- /notebooks/Multiclass Classification/Multiclass Classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### ML.NET Multiclass Classification\n", 8 | "\n", 9 | "Recognizes whether a piece of text is written in German, English, Italian, Romanian, French or Spanish." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### NuGet package installation" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/html": [ 27 | "Installing package Microsoft.ML, version 1.4.0......................................................done!" 28 | ] 29 | }, 30 | "metadata": {}, 31 | "output_type": "display_data" 32 | }, 33 | { 34 | "data": { 35 | "text/html": [ 36 | "Successfully added reference to package Microsoft.ML, version 1.4.0" 37 | ] 38 | }, 39 | "metadata": {}, 40 | "output_type": "display_data" 41 | }, 42 | { 43 | "data": { 44 | "text/html": [ 45 | "Installing package XPlot.Plotly, version 3.0.1.......done!" 46 | ] 47 | }, 48 | "metadata": {}, 49 | "output_type": "display_data" 50 | }, 51 | { 52 | "data": { 53 | "text/html": [ 54 | "Successfully added reference to package XPlot.Plotly, version 3.0.1" 55 | ] 56 | }, 57 | "metadata": {}, 58 | "output_type": "display_data" 59 | } 60 | ], 61 | "source": [ 62 | "#r \"nuget:Microsoft.ML, 1.4.0\"\n", 63 | "#r \"nuget:XPlot.Plotly, 3.0.1\"" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Namespaces" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 2, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "using Microsoft.ML;\n", 80 | "using Microsoft.ML.Data;\n", 81 | "using Microsoft.ML.Trainers;\n", 82 | "using XPlot.Plotly;" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### Input class definition" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 3, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "public class MulticlassClassificationData\n", 99 | "{\n", 100 | " [LoadColumn(0), ColumnName(\"Label\")]\n", 101 | " public float LanguageClass;\n", 102 | "\n", 103 | " [LoadColumn(1)]\n", 104 | " public string Text;\n", 105 | "\n", 106 | " public MulticlassClassificationData(string text)\n", 107 | " {\n", 108 | " Text = text;\n", 109 | " }\n", 110 | "}" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "### Output class definition" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 4, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "public class MulticlassClassificationPrediction\n", 127 | "{\n", 128 | " private readonly string[] classNames = { \"German\", \"English\", \"French\", \"Italian\", \"Romanian\", \"Spanish\" };\n", 129 | "\n", 130 | " public string Text;\n", 131 | "\n", 132 | " [ColumnName(\"PredictedLabel\")]\n", 133 | " public float Class;\n", 134 | "\n", 135 | " [ColumnName(\"Score\")]\n", 136 | " public float[] Confidences;\n", 137 | "\n", 138 | " public string PredictedLanguage => classNames[(int)Class];\n", 139 | "\n", 140 | " public int Confidence => (int)(Confidences[(int)Class] * 100);\n", 141 | "}" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "### Read the raw data" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 5, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "var mlContext = new MLContext(seed: null);\n", 158 | "\n", 159 | "var trainingData = mlContext.Data.LoadFromTextFile(\"./Sentences_Training.tsv\");" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "### Prepare the data" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 6, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "var pipeline = mlContext.Transforms.Conversion.MapValueToKey(\"Label\")\n", 176 | " .Append(mlContext.Transforms.Text.FeaturizeText(\"Features\", \"Text\"))\n", 177 | " .Append(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy())\n", 178 | " .Append(mlContext.Transforms.Conversion.MapKeyToValue(\"PredictedLabel\"));" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### Train the model" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 7, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "var model = pipeline.Fit(trainingData);" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "### Evaluate the model" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 8, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/html": [ 212 | "
LogLossLogLossReductionMacroAccuracyMicroAccuracyTopKAccuracyTopKPredictionCountPerClassLogLossConfusionMatrix
0.106136727973620550.94076389299214790.98248939885727540.982486573039330200[ 0.17754890146426386, 0.05881043090926731, 0.07523981505497156, 0.1630929051034134, 0.09504758946231957, 0.06692201731154722 ]{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.9950525664811379, 0.9913896676011213, 0.9905069682892345, 0.9410080183276059, 0.9838, 0.9957515678737608 ], PerClassRecall: [ 0.9654, 0.99000199960008, 0.9861250754071988, 0.9856028794241152, 0.9836032793441312, 0.9842031593681264 ], Counts: [ [ 4827, 34, 15, 111, 12, 1 ], [ 9, 4951, 5, 32, 2, 2 ], [ 7, 7, 4904, 32, 13, 10 ], [ 4, 0, 16, 4929, 46, 6 ], [ 1, 1, 4, 74, 4919, 2 ], [ 3, 1, 7, 60, 8, 4922 ] ], NumberOfClasses: 6 }
" 213 | ] 214 | }, 215 | "metadata": {}, 216 | "output_type": "display_data" 217 | } 218 | ], 219 | "source": [ 220 | "var testData = mlContext.Data.LoadFromTextFile(\"./Sentences_Test.tsv\");\n", 221 | "var scoredData = model.Transform(testData);\n", 222 | "var qualityMetrics = mlContext.MulticlassClassification.Evaluate(scoredData);\n", 223 | "display(qualityMetrics);" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "### Visualize logarithmic loss per class" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 9, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/html": [ 241 | "
" 264 | ] 265 | }, 266 | "metadata": {}, 267 | "output_type": "display_data" 268 | } 269 | ], 270 | "source": [ 271 | "string[] classNames = { \"German\", \"English\", \"French\", \"Italian\", \"Romanian\", \"Spanish\" };\n", 272 | "\n", 273 | "var graph = new Graph.Bar()\n", 274 | "{\n", 275 | " y = qualityMetrics.PerClassLogLoss,\n", 276 | " x = classNames\n", 277 | "};\n", 278 | "\n", 279 | "var chart = Chart.Plot(graph);\n", 280 | "\n", 281 | "var layout = new Layout.Layout(){ title=\"Logarithmic Loss per Language (less is better)\" };\n", 282 | "chart.WithLayout(layout);\n", 283 | "\n", 284 | "display(chart);" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "### Predict language" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 10, 297 | "metadata": {}, 298 | "outputs": [ 299 | { 300 | "data": { 301 | "text/html": [ 302 | "
PredictedLanguageConfidenceTextClassConfidences
French86Ceci n'est pas une pipe2[ 0.031047437, 0.027249021, 0.86063504, 0.030808335, 0.03533128, 0.014928936 ]
" 303 | ] 304 | }, 305 | "metadata": {}, 306 | "output_type": "display_data" 307 | }, 308 | { 309 | "data": { 310 | "text/html": [ 311 | "
PredictedLanguageConfidenceTextClassConfidences
German81Guten Morgen liebe Freunde0[ 0.8122419, 0.05640031, 0.022422126, 0.051359497, 0.027108163, 0.030468086 ]
" 312 | ] 313 | }, 314 | "metadata": {}, 315 | "output_type": "display_data" 316 | } 317 | ], 318 | "source": [ 319 | "var predictionEngine = mlContext.Model.CreatePredictionEngine\n", 320 | " \n", 321 | " (model);\n", 322 | " \n", 323 | "var prediction = predictionEngine.Predict(new MulticlassClassificationData(\"Ceci n'est pas une pipe\"));\n", 324 | "display(prediction);\n", 325 | "\n", 326 | "prediction = predictionEngine.Predict(new MulticlassClassificationData(\"Guten Morgen liebe Freunde\"));\n", 327 | "display(prediction);" 328 | ] 329 | } 330 | ], 331 | "metadata": { 332 | "kernelspec": { 333 | "display_name": ".NET (C#)", 334 | "language": "C#", 335 | "name": ".net-csharp" 336 | }, 337 | "language_info": { 338 | "file_extension": ".cs", 339 | "mimetype": "text/x-csharp", 340 | "name": "C#", 341 | "pygments_lexer": "csharp", 342 | "version": "8.0" 343 | } 344 | }, 345 | "nbformat": 4, 346 | "nbformat_minor": 2 347 | } 348 | -------------------------------------------------------------------------------- /notebooks/Recommendation using FFM/Recommendation using Field-aware Factorization Machine .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Recommendation using FFM" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Uses a Field-Aware Factorization Machine to recommend hotels on the Las Vegas Strip for the selected traveler type and season, based on 2015 TripAdvisor ratings." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### NuGet Packages" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/html": [ 32 | "Installing package Microsoft.ML, version 1.4.0..........................done!" 33 | ] 34 | }, 35 | "metadata": {}, 36 | "output_type": "display_data" 37 | }, 38 | { 39 | "data": { 40 | "text/html": [ 41 | "Successfully added reference to package Microsoft.ML, version 1.4.0" 42 | ] 43 | }, 44 | "metadata": {}, 45 | "output_type": "display_data" 46 | }, 47 | { 48 | "data": { 49 | "text/html": [ 50 | "Installing package Microsoft.ML.Recommender, version 0.16.0........done!" 51 | ] 52 | }, 53 | "metadata": {}, 54 | "output_type": "display_data" 55 | }, 56 | { 57 | "data": { 58 | "text/html": [ 59 | "Successfully added reference to package Microsoft.ML.Recommender, version 0.16.0" 60 | ] 61 | }, 62 | "metadata": {}, 63 | "output_type": "display_data" 64 | }, 65 | { 66 | "data": { 67 | "text/html": [ 68 | "Installing package XPlot.Plotly, version 3.0.1.......done!" 69 | ] 70 | }, 71 | "metadata": {}, 72 | "output_type": "display_data" 73 | }, 74 | { 75 | "data": { 76 | "text/html": [ 77 | "Successfully added reference to package XPlot.Plotly, version 3.0.1" 78 | ] 79 | }, 80 | "metadata": {}, 81 | "output_type": "display_data" 82 | } 83 | ], 84 | "source": [ 85 | "#r \"nuget:Microsoft.ML, 1.4.0\"\n", 86 | "#r \"nuget:Microsoft.ML.Recommender, 0.16.0\"\n", 87 | "#r \"nuget:XPlot.Plotly, 3.0.1\"" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "### Namespaces" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 2, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "using Microsoft.ML;\n", 104 | "using Microsoft.ML.Data;\n", 105 | "using Microsoft.ML.Trainers;\n", 106 | "using XPlot.Plotly;\n", 107 | "using System.IO;\n", 108 | "using System.Linq;" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "### Input class definition" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 3, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "public class FfmRecommendationData\n", 125 | "{\n", 126 | " public bool Label;\n", 127 | "\n", 128 | " public string TravelerType;\n", 129 | "\n", 130 | " public string Season;\n", 131 | "\n", 132 | " public string Hotel;\n", 133 | "}" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "### Output class definition" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 4, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "public class FfmRecommendationPrediction\n", 150 | "{\n", 151 | " public bool PredictedLabel;\n", 152 | "\n", 153 | " public float Probability;\n", 154 | "\n", 155 | " public string TravelerType;\n", 156 | "\n", 157 | " public string Season;\n", 158 | "\n", 159 | " public string Hotel;\n", 160 | "}" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "### Read the raw data" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 5, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "var mlContext = new MLContext(seed: null);\n", 177 | "\n", 178 | "var ratingTreshold = 3;\n", 179 | "\n", 180 | "// Populating an IDataView from an IEnumerable.\n", 181 | "var data = File.ReadAllLines(\"./LasVegasTripAdvisorReviews.csv\")\n", 182 | " .Skip(1)\n", 183 | " .Select(x => x.Split(';'))\n", 184 | " .Select(x => new FfmRecommendationData\n", 185 | " {\n", 186 | " Label = double.Parse(x[4]) > ratingTreshold,\n", 187 | " Season = x[5],\n", 188 | " TravelerType = x[6],\n", 189 | " Hotel = x[13]\n", 190 | " });\n", 191 | "\n", 192 | "var allData = mlContext.Data.LoadFromEnumerable(data);\n", 193 | "\n", 194 | "// return _mlContext.Data.CreateEnumerable(_allData, reuseRowObject: false);\n" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "### Prepare the data" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 6, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "var pipeline = mlContext.Transforms.Categorical.OneHotEncoding(\"TravelerTypeOneHot\", \"TravelerType\")\n", 211 | " .Append(mlContext.Transforms.Categorical.OneHotEncoding(\"SeasonOneHot\", \"Season\"))\n", 212 | " .Append(mlContext.Transforms.Categorical.OneHotEncoding(\"HotelOneHot\", \"Hotel\"))\n", 213 | " .Append(mlContext.Transforms.Concatenate(\"Features\", \"TravelerTypeOneHot\", \"SeasonOneHot\", \"HotelOneHot\"))\n", 214 | " .Append(mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(new string[] { \"Features\" }));" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "### Train the model" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 7, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "var trainingData = mlContext.Data.ShuffleRows(allData);\n", 231 | "trainingData = mlContext.Data.TakeRows(trainingData, 450);\n", 232 | "\n", 233 | "var model = pipeline.Fit(trainingData);" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "### Evaluate the model" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 8, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/html": [ 251 | "
LogLossLogLossReductionEntropyAreaUnderRocCurveAccuracyPositivePrecisionPositiveRecallNegativePrecisionNegativeRecallF1ScoreAreaUnderPrecisionRecallCurveConfusionMatrix
0.8696357994517928-0.016581887253545370.85545081056013070.57713293650793650.720.721000.83720930232558130.8140226340547256{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.72, 0 ], PerClassRecall: [ 1, 0 ], Counts: [ [ 72, 0 ], [ 28, 0 ] ], NumberOfClasses: 2 }
" 252 | ] 253 | }, 254 | "metadata": {}, 255 | "output_type": "display_data" 256 | } 257 | ], 258 | "source": [ 259 | "var testData = mlContext.Data.ShuffleRows(allData);\n", 260 | "testData = mlContext.Data.TakeRows(testData, 100);\n", 261 | "\n", 262 | "var scoredData = model.Transform(testData);\n", 263 | "var metrics = mlContext.BinaryClassification.Evaluate(\n", 264 | " data: scoredData,\n", 265 | " labelColumnName: \"Label\",\n", 266 | " scoreColumnName: \"Probability\",\n", 267 | " predictedLabelColumnName: \"PredictedLabel\");\n", 268 | " \n", 269 | "display(metrics);" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "### Values" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 9, 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "data": { 286 | "text/html": [ 287 | "
indexvalue
0Bellagio Las Vegas
1Caesars Palace
2Circus Circus Hotel & Casino Las Vegas
3Encore at wynn Las Vegas
4Excalibur Hotel & Casino
5Hilton Grand Vacations at the Flamingo
6Hilton Grand Vacations on the Boulevard
7Marriott's Grand Chateau
8Monte Carlo Resort&Casino
9Paris Las Vegas
10The Cosmopolitan Las Vegas
11The Cromwell
12The Palazzo Resort Hotel Casino
13The Venetian Las Vegas Hotel
14The Westin las Vegas Hotel Casino & Spa
15Treasure Island- TI Hotel & Casino
16Tropicana Las Vegas - A Double Tree by Hilton Hotel
17Trump International Hotel Las Vegas
18Tuscany Las Vegas Suites & Casino
19Wyndham Grand Desert
20Wynn Las Vegas
" 288 | ] 289 | }, 290 | "metadata": {}, 291 | "output_type": "display_data" 292 | }, 293 | { 294 | "data": { 295 | "text/html": [ 296 | "
indexvalue
0Business
1Couples
2Families
3Friends
4Solo
" 297 | ] 298 | }, 299 | "metadata": {}, 300 | "output_type": "display_data" 301 | }, 302 | { 303 | "data": { 304 | "text/html": [ 305 | "
indexvalue
0Dec-Feb
1Jun-Aug
2Mar-May
3Sep-Nov
" 306 | ] 307 | }, 308 | "metadata": {}, 309 | "output_type": "display_data" 310 | } 311 | ], 312 | "source": [ 313 | "var hotels = data.Select(r => r.Hotel).Distinct().ToList();\n", 314 | "hotels.Sort();\n", 315 | "display (hotels);\n", 316 | "\n", 317 | "var travelerTypes = data.Select(r => r.TravelerType).Distinct().ToList();\n", 318 | "travelerTypes.Sort();\n", 319 | "display (travelerTypes);\n", 320 | "\n", 321 | "var seasons = data.Select(r => r.Season).Distinct().ToList();\n", 322 | "seasons.Sort();\n", 323 | "display (seasons);" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "### Single predicition" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 10, 336 | "metadata": {}, 337 | "outputs": [ 338 | { 339 | "data": { 340 | "text/html": [ 341 | "
PredictedLabelProbabilityTravelerTypeSeasonHotel
True0.6717184CouplesMar-MayParis Las Vegas
" 342 | ] 343 | }, 344 | "metadata": {}, 345 | "output_type": "display_data" 346 | } 347 | ], 348 | "source": [ 349 | "var predictionEngine = mlContext.Model.CreatePredictionEngine(model);\n", 350 | "var prediction = predictionEngine.Predict(new FfmRecommendationData\n", 351 | " { \n", 352 | " TravelerType = \"Couples\",\n", 353 | " Hotel = \"Paris Las Vegas\",\n", 354 | " Season = \"Mar-May\"\n", 355 | " });\n", 356 | "\n", 357 | "display(prediction);" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "### Bulk prediction" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 11, 370 | "metadata": {}, 371 | "outputs": [ 372 | { 373 | "data": { 374 | "text/html": [ 375 | "
indexPredictedLabelProbabilityTravelerTypeSeasonHotel
0True0.9625945FriendsJun-AugMarriott's Grand Chateau
1True0.95631635FriendsJun-AugWynn Las Vegas
2True0.95459354FriendsJun-AugThe Venetian Las Vegas Hotel
3True0.9434606FriendsJun-AugEncore at wynn Las Vegas
4True0.94161034FriendsJun-AugBellagio Las Vegas
5True0.9288549FriendsJun-AugTrump International Hotel Las Vegas
6True0.9242633FriendsJun-AugHilton Grand Vacations on the Boulevard
7True0.9128056FriendsJun-AugWyndham Grand Desert
8True0.90588105FriendsJun-AugCaesars Palace
9True0.89246196FriendsJun-AugThe Palazzo Resort Hotel Casino
10True0.88921124FriendsJun-AugThe Cosmopolitan Las Vegas
11True0.87842625FriendsJun-AugTreasure Island- TI Hotel & Casino
12True0.8734917FriendsJun-AugTropicana Las Vegas - A Double Tree by Hilton Hotel
13True0.8609912FriendsJun-AugThe Cromwell
14True0.82868594FriendsJun-AugThe Westin las Vegas Hotel Casino & Spa
15True0.82397866FriendsJun-AugTuscany Las Vegas Suites & Casino
16True0.82361853FriendsJun-AugHilton Grand Vacations at the Flamingo
17True0.82218105FriendsJun-AugParis Las Vegas
18True0.8100095FriendsJun-AugExcalibur Hotel & Casino
19True0.68517226FriendsJun-AugMonte Carlo Resort&Casino
20True0.53452903FriendsJun-AugCircus Circus Hotel & Casino Las Vegas
" 376 | ] 377 | }, 378 | "metadata": {}, 379 | "output_type": "display_data" 380 | } 381 | ], 382 | "source": [ 383 | "var input = hotels.Select(h => new FfmRecommendationData\n", 384 | " {\n", 385 | " TravelerType = \"Friends\",\n", 386 | " Season = \"Jun-Aug\",\n", 387 | " Hotel = h\n", 388 | " }).ToList();\n", 389 | "var predictions = model.Transform(mlContext.Data.LoadFromEnumerable(input));\n", 390 | "var result = mlContext.Data.CreateEnumerable(predictions, reuseRowObject: false);\n", 391 | "result = result.Where(r => r.PredictedLabel == true).OrderByDescending(r => r.Probability);\n", 392 | "\n", 393 | "display(result);" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "### Visualization" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 12, 406 | "metadata": {}, 407 | "outputs": [ 408 | { 409 | "data": { 410 | "text/html": [ 411 | "
" 434 | ] 435 | }, 436 | "metadata": {}, 437 | "output_type": "display_data" 438 | } 439 | ], 440 | "source": [ 441 | "var graph = new Graph.Bar()\n", 442 | "{\n", 443 | " y = result.Select(r => r.Probability),\n", 444 | " x = result.Select(r => r.Hotel),\n", 445 | " marker = new Graph.Marker { color = \"darkred\" }\n", 446 | "};\n", 447 | "\n", 448 | "var chart = Chart.Plot(graph);\n", 449 | "\n", 450 | "var layout = new Layout.Layout(){ title=\"Recommended Hotels for Friends in Summer\" };\n", 451 | "chart.WithLayout(layout);\n", 452 | "\n", 453 | "display(chart);" 454 | ] 455 | } 456 | ], 457 | "metadata": { 458 | "kernelspec": { 459 | "display_name": ".NET (C#)", 460 | "language": "C#", 461 | "name": ".net-csharp" 462 | }, 463 | "language_info": { 464 | "file_extension": ".cs", 465 | "mimetype": "text/x-csharp", 466 | "name": "C#", 467 | "pygments_lexer": "csharp", 468 | "version": "8.0" 469 | } 470 | }, 471 | "nbformat": 4, 472 | "nbformat_minor": 2 473 | } 474 | -------------------------------------------------------------------------------- /notebooks/Recommendation/Recommendation using Matrix Factorization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Recommendation using Matrix Factorization" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Recommends hotels on the Las Vegas Strip for the selected traveler type, based on 2015 TripAdvisor ratings." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### NuGet packages" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/html": [ 32 | "Installing package Microsoft.ML, version 1.4.0........................done!" 33 | ] 34 | }, 35 | "metadata": {}, 36 | "output_type": "display_data" 37 | }, 38 | { 39 | "data": { 40 | "text/html": [ 41 | "Successfully added reference to package Microsoft.ML, version 1.4.0" 42 | ] 43 | }, 44 | "metadata": {}, 45 | "output_type": "display_data" 46 | }, 47 | { 48 | "data": { 49 | "text/html": [ 50 | "Installing package Microsoft.ML.Recommender, version 0.16.0........done!" 51 | ] 52 | }, 53 | "metadata": {}, 54 | "output_type": "display_data" 55 | }, 56 | { 57 | "data": { 58 | "text/html": [ 59 | "Successfully added reference to package Microsoft.ML.Recommender, version 0.16.0" 60 | ] 61 | }, 62 | "metadata": {}, 63 | "output_type": "display_data" 64 | }, 65 | { 66 | "data": { 67 | "text/html": [ 68 | "Installing package XPlot.Plotly, version 3.0.1.........done!" 69 | ] 70 | }, 71 | "metadata": {}, 72 | "output_type": "display_data" 73 | }, 74 | { 75 | "data": { 76 | "text/html": [ 77 | "Successfully added reference to package XPlot.Plotly, version 3.0.1" 78 | ] 79 | }, 80 | "metadata": {}, 81 | "output_type": "display_data" 82 | } 83 | ], 84 | "source": [ 85 | "#r \"nuget:Microsoft.ML, 1.4.0\"\n", 86 | "#r \"nuget:Microsoft.ML.Recommender, 0.16.0\"\n", 87 | "#r \"nuget:XPlot.Plotly, 3.0.1\"" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "### Namespaces" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 2, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "using Microsoft.ML;\n", 104 | "using Microsoft.ML.Data;\n", 105 | "using Microsoft.ML.Trainers;\n", 106 | "using XPlot.Plotly;\n", 107 | "using System.IO;\n", 108 | "using System.Linq;" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "### Input class definition" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 3, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "public class RecommendationData\n", 125 | "{\n", 126 | " public float Label;\n", 127 | "\n", 128 | " public string TravelerType;\n", 129 | "\n", 130 | " public string Hotel;\n", 131 | "}" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "### Output class definition" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 4, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "public class RecommendationPrediction\n", 148 | "{\n", 149 | " public float Score;\n", 150 | "\n", 151 | " public string TravelerType;\n", 152 | "\n", 153 | " public string Hotel;\n", 154 | "}" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "### Read the raw data" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 5, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "var mlContext = new MLContext(seed: null);\n", 171 | "\n", 172 | "// Populating an IDataView from an IEnumerable.\n", 173 | "var allData = File.ReadAllLines(\"./LasVegasTripAdvisorReviews.csv\")\n", 174 | " .Skip(1)\n", 175 | " .Select(x => x.Split(';'))\n", 176 | " .Select(x => new RecommendationData\n", 177 | " {\n", 178 | " Label = uint.Parse(x[4]),\n", 179 | " TravelerType = x[6],\n", 180 | " Hotel = x[13]\n", 181 | " });\n", 182 | " \n", 183 | "var data = allData\n", 184 | " .OrderBy(x => (x.GetHashCode())) // Cheap Randomization.\n", 185 | " .Take(400);\n", 186 | "\n", 187 | "var trainingData = mlContext.Data.LoadFromEnumerable(data);\n", 188 | "\n", 189 | "// Keep in memory.\n", 190 | "trainingData = mlContext.Data.Cache(trainingData);" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "### Prepare the data" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 6, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "var pipeline = mlContext.Transforms.Conversion.MapValueToKey(\"Hotel\")\n", 207 | " .Append(mlContext.Transforms.Conversion.MapValueToKey(\"TravelerType\"))\n", 208 | " .Append(mlContext.Recommendation().Trainers.MatrixFactorization(\n", 209 | " labelColumnName: \"Label\",\n", 210 | " matrixColumnIndexColumnName: \"Hotel\",\n", 211 | " matrixRowIndexColumnName: \"TravelerType\",\n", 212 | " // Optional fine tuning:\n", 213 | " numberOfIterations: 20,\n", 214 | " approximationRank: 8,\n", 215 | " learningRate: 0.4))\n", 216 | " .Append(mlContext.Transforms.Conversion.MapKeyToValue(\"Hotel\"))\n", 217 | " .Append(mlContext.Transforms.Conversion.MapKeyToValue(\"TravelerType\"));" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "### Train the model" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 7, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "var model = pipeline.Fit(trainingData);" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "### Evaluate the model" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 13, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/html": [ 251 | "
MeanAbsoluteErrorMeanSquaredErrorRootMeanSquaredErrorLossFunctionRSquared
0.87103003263473511.09795861759227641.04783520536021141.097958618457756-0.14017354302269291
" 252 | ] 253 | }, 254 | "execution_count": 13, 255 | "metadata": {}, 256 | "output_type": "execute_result" 257 | } 258 | ], 259 | "source": [ 260 | "data = allData\n", 261 | " .OrderBy(x => (x.GetHashCode())) // Cheap Randomization.\n", 262 | " .TakeLast(200);\n", 263 | "\n", 264 | "var testData = mlContext.Data.LoadFromEnumerable(data);\n", 265 | "var scoredData = model.Transform(testData);\n", 266 | "var metrics = mlContext.Recommendation().Evaluate(scoredData);\n", 267 | "\n", 268 | "// REPL: Read-Evaluate-Print loop\n", 269 | "metrics" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "### Values" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 9, 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "data": { 286 | "text/html": [ 287 | "
indexvalue
0Bellagio Las Vegas
1Caesars Palace
2Circus Circus Hotel & Casino Las Vegas
3Encore at wynn Las Vegas
4Excalibur Hotel & Casino
5Hilton Grand Vacations at the Flamingo
6Hilton Grand Vacations on the Boulevard
7Marriott's Grand Chateau
8Monte Carlo Resort&Casino
9Paris Las Vegas
10The Cosmopolitan Las Vegas
11The Cromwell
12The Palazzo Resort Hotel Casino
13The Venetian Las Vegas Hotel
14The Westin las Vegas Hotel Casino & Spa
15Treasure Island- TI Hotel & Casino
16Tropicana Las Vegas - A Double Tree by Hilton Hotel
17Trump International Hotel Las Vegas
18Tuscany Las Vegas Suites & Casino
19Wyndham Grand Desert
20Wynn Las Vegas
" 288 | ] 289 | }, 290 | "metadata": {}, 291 | "output_type": "display_data" 292 | }, 293 | { 294 | "data": { 295 | "text/html": [ 296 | "
indexvalue
0Business
1Couples
2Families
3Friends
4Solo
" 297 | ] 298 | }, 299 | "metadata": {}, 300 | "output_type": "display_data" 301 | } 302 | ], 303 | "source": [ 304 | "var hotels = allData.Select(r => r.Hotel).Distinct().ToList();\n", 305 | "hotels.Sort();\n", 306 | "display (hotels);\n", 307 | "\n", 308 | "var travelerTypes = allData.Select(r => r.TravelerType).Distinct().ToList();\n", 309 | "travelerTypes.Sort();\n", 310 | "display (travelerTypes);" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "### Single prediction" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 10, 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "data": { 327 | "text/html": [ 328 | "
ScoreTravelerTypeHotel
4.091547BusinessThe Cromwell
" 329 | ] 330 | }, 331 | "metadata": {}, 332 | "output_type": "display_data" 333 | } 334 | ], 335 | "source": [ 336 | "var predictionEngine = mlContext.Model.CreatePredictionEngine(model);\n", 337 | "\n", 338 | "var prediction = predictionEngine.Predict(new RecommendationData\n", 339 | " { \n", 340 | " TravelerType = \"Business\",\n", 341 | " Hotel = \"The Cromwell\"\n", 342 | " });\n", 343 | "display(prediction);" 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "### Bulk prediction\n", 351 | "\n", 352 | "Predicts the score for all hotels, for one specific traveler type." 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 11, 358 | "metadata": {}, 359 | "outputs": [ 360 | { 361 | "data": { 362 | "text/html": [ 363 | "
indexScoreTravelerTypeHotel
04.5782423FamiliesHilton Grand Vacations at the Flamingo
14.521886FamiliesEncore at wynn Las Vegas
24.52161FamiliesThe Palazzo Resort Hotel Casino
34.4301143FamiliesParis Las Vegas
44.408109FamiliesThe Westin las Vegas Hotel Casino & Spa
54.4050274FamiliesMonte Carlo Resort&Casino
64.387952FamiliesThe Cromwell
74.357065FamiliesTrump International Hotel Las Vegas
84.3004317FamiliesBellagio Las Vegas
94.286673FamiliesThe Venetian Las Vegas Hotel
104.2342777FamiliesTreasure Island- TI Hotel & Casino
114.2272773FamiliesThe Cosmopolitan Las Vegas
124.133292FamiliesTropicana Las Vegas - A Double Tree by Hilton Hotel
134.132224FamiliesMarriott's Grand Chateau
143.9717076FamiliesHilton Grand Vacations on the Boulevard
153.8964734FamiliesCaesars Palace
163.8790998FamiliesCircus Circus Hotel & Casino Las Vegas
173.793819FamiliesWyndham Grand Desert
183.742117FamiliesTuscany Las Vegas Suites & Casino
193.7183032FamiliesWynn Las Vegas
203.681215FamiliesExcalibur Hotel & Casino
" 364 | ] 365 | }, 366 | "metadata": {}, 367 | "output_type": "display_data" 368 | } 369 | ], 370 | "source": [ 371 | "var input = hotels.Select(h => new RecommendationData\n", 372 | " {\n", 373 | " TravelerType = \"Families\",\n", 374 | " Hotel = h\n", 375 | " }).ToList();\n", 376 | "var predictions = model.Transform(mlContext.Data.LoadFromEnumerable(input));\n", 377 | "var result = mlContext.Data.CreateEnumerable(predictions, reuseRowObject: false);\n", 378 | "result = result.OrderByDescending(r => r.Score);\n", 379 | "display(result);" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "### Visualization" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 16, 392 | "metadata": {}, 393 | "outputs": [ 394 | { 395 | "data": { 396 | "text/html": [ 397 | "
" 420 | ] 421 | }, 422 | "metadata": {}, 423 | "output_type": "display_data" 424 | } 425 | ], 426 | "source": [ 427 | "var graph = new Graph.Bar()\n", 428 | "{\n", 429 | " y = result.Select(r => r.Score),\n", 430 | " x = result.Select(r => r.Hotel),\n", 431 | " marker = new Graph.Marker { color = \"darkred\" }\n", 432 | "};\n", 433 | "\n", 434 | "var chart = Chart.Plot(graph);\n", 435 | "\n", 436 | "var layout = new Layout.Layout(){ title=\"Recommended Hotels for Families\" };\n", 437 | "chart.WithLayout(layout);\n", 438 | "\n", 439 | "display(chart);" 440 | ] 441 | } 442 | ], 443 | "metadata": { 444 | "kernelspec": { 445 | "display_name": ".NET (C#)", 446 | "language": "C#", 447 | "name": ".net-csharp" 448 | }, 449 | "language_info": { 450 | "file_extension": ".cs", 451 | "mimetype": "text/x-csharp", 452 | "name": "C#", 453 | "pygments_lexer": "csharp", 454 | "version": "8.0" 455 | } 456 | }, 457 | "nbformat": 4, 458 | "nbformat_minor": 2 459 | } 460 | -------------------------------------------------------------------------------- /notebooks/Regression/Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## ML.NET Regression\n", 8 | "Creates a regression model to predict the quality of wine using 11 physicochemical features" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "### NuGet package installation" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "data": { 25 | "text/html": [ 26 | "Installing package Microsoft.ML, version 1.4.0..............done!" 27 | ] 28 | }, 29 | "metadata": {}, 30 | "output_type": "display_data" 31 | }, 32 | { 33 | "data": { 34 | "text/html": [ 35 | "Successfully added reference to package Microsoft.ML, version 1.4.0" 36 | ] 37 | }, 38 | "metadata": {}, 39 | "output_type": "display_data" 40 | }, 41 | { 42 | "data": { 43 | "text/html": [ 44 | "Installing package XPlot.Plotly, version 3.0.1......done!" 45 | ] 46 | }, 47 | "metadata": {}, 48 | "output_type": "display_data" 49 | }, 50 | { 51 | "data": { 52 | "text/html": [ 53 | "Successfully added reference to package XPlot.Plotly, version 3.0.1" 54 | ] 55 | }, 56 | "metadata": {}, 57 | "output_type": "display_data" 58 | } 59 | ], 60 | "source": [ 61 | "#r \"nuget:Microsoft.ML, 1.4.0\"\n", 62 | "#r \"nuget:XPlot.Plotly, 3.0.1\"" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "### Namespaces" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 2, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "using Microsoft.ML;\n", 79 | "using Microsoft.ML.Data;\n", 80 | "using Microsoft.ML.Trainers;\n", 81 | "using Microsoft.ML.Transforms;\n", 82 | "using XPlot.Plotly;" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### Input Class Definition" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 3, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "public class RegressionData\n", 99 | "{\n", 100 | " [LoadColumn(0)]\n", 101 | " public float FixedAcidity;\n", 102 | "\n", 103 | " [LoadColumn(1)]\n", 104 | " public float VolatileAcidity;\n", 105 | "\n", 106 | " [LoadColumn(2)]\n", 107 | " public float CitricAcid;\n", 108 | "\n", 109 | " [LoadColumn(3)]\n", 110 | " public float ResidualSugar;\n", 111 | "\n", 112 | " [LoadColumn(4)]\n", 113 | " public float Chlorides;\n", 114 | "\n", 115 | " [LoadColumn(5)]\n", 116 | " public float FreeSulfurDioxide;\n", 117 | "\n", 118 | " [LoadColumn(6)]\n", 119 | " public float TotalSulfurDioxide;\n", 120 | "\n", 121 | " [LoadColumn(7)]\n", 122 | " public float Density;\n", 123 | "\n", 124 | " [LoadColumn(8)]\n", 125 | " public float Ph;\n", 126 | "\n", 127 | " [LoadColumn(9)]\n", 128 | " public float Sulphates;\n", 129 | "\n", 130 | " [LoadColumn(10)]\n", 131 | " public float Alcohol;\n", 132 | "\n", 133 | " [LoadColumn(11)]\n", 134 | " public float Label;\n", 135 | "}\n" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "### Output Class Definition" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 4, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "public class RegressionPrediction\n", 152 | "{\n", 153 | " [ColumnName(\"Label\")]\n", 154 | " public float Label;\n", 155 | "\n", 156 | " [ColumnName(\"Score\")]\n", 157 | " public float PredictedLabel;\n", 158 | "}" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "### Helpers " 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 5, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "string[] featureNames = \n", 175 | " { \n", 176 | " \"FixedAcidity\",\n", 177 | " \"VolatileAcidity\",\n", 178 | " \"CitricAcid\",\n", 179 | " \"ResidualSugar\",\n", 180 | " \"Chlorides\",\n", 181 | " \"FreeSulfurDioxide\",\n", 182 | " \"TotalSulfurDioxide\",\n", 183 | " \"Density\",\n", 184 | " \"Ph\",\n", 185 | " \"Sulphates\",\n", 186 | " \"Alcohol\"\n", 187 | " };\n", 188 | " \n", 189 | "string[] metricNames = \n", 190 | " { \n", 191 | " \"Mean Absolute Error\", \n", 192 | " \"Mean Squared Error\", \n", 193 | " \"Root Mean Squared Error\", \n", 194 | " \"Loss Function\", \n", 195 | " \"R Squared\"\n", 196 | " };" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "### Read the Raw Data" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 6, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "var mlContext = new MLContext(seed: null);\n", 213 | "\n", 214 | "var trainingData = mlContext.Data.LoadFromTextFile(\n", 215 | " \"./WineQuality_White_Train.csv\", \n", 216 | " separatorChar: ';',\n", 217 | " hasHeader: true);" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "### Prepare the Data" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 7, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "var pipeline =\n", 234 | " mlContext.Transforms.ReplaceMissingValues(\n", 235 | " outputColumnName: \"PreparedFixedAcidity\",\n", 236 | " inputColumnName: \"FixedAcidity\",\n", 237 | " replacementMode: MissingValueReplacingEstimator.ReplacementMode.Mean)\n", 238 | " .Append(mlContext.Transforms.DropColumns(\"FixedAcidity\"))\n", 239 | " .Append(mlContext.Transforms.Concatenate(\"Features\",\n", 240 | " new[]\n", 241 | " {\n", 242 | " \"PreparedFixedAcidity\",\n", 243 | " \"VolatileAcidity\",\n", 244 | " \"CitricAcid\",\n", 245 | " \"ResidualSugar\",\n", 246 | " \"Chlorides\",\n", 247 | " \"FreeSulfurDioxide\",\n", 248 | " \"TotalSulfurDioxide\",\n", 249 | " \"Density\",\n", 250 | " \"Ph\",\n", 251 | " \"Sulphates\",\n", 252 | " \"Alcohol\"\n", 253 | " }))\n", 254 | " .Append(mlContext.Transforms.NormalizeMeanVariance(\"Features\"))\n", 255 | " .Append(mlContext.Regression.Trainers.Sdca());" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "### Train the Model" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 8, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "var model = pipeline.Fit(trainingData);" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "### Evaluate the Model" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 9, 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "data": { 288 | "text/html": [ 289 | "
MeanAbsoluteErrorMeanSquaredErrorRootMeanSquaredErrorLossFunctionRSquared
0.54162366091293660.50253049627572070.70889385402591880.5025304960861520.16448854936302226
" 290 | ] 291 | }, 292 | "metadata": {}, 293 | "output_type": "display_data" 294 | } 295 | ], 296 | "source": [ 297 | "// Load the raw test data.\n", 298 | "var testData = mlContext.Data.LoadFromTextFile(\n", 299 | " \"./WineQuality_White_Test.csv\", \n", 300 | " separatorChar: ';',\n", 301 | " hasHeader: true);\n", 302 | "\n", 303 | "// Score the test data and calculate the metrics.\n", 304 | "var scoredData = model.Transform(testData);\n", 305 | "var qualityMetrics = mlContext.Regression.Evaluate(scoredData);\n", 306 | "display(qualityMetrics);" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": {}, 312 | "source": [ 313 | "### Visualize the Quality Metrics" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 10, 319 | "metadata": {}, 320 | "outputs": [ 321 | { 322 | "data": { 323 | "text/html": [ 324 | "
" 347 | ] 348 | }, 349 | "metadata": {}, 350 | "output_type": "display_data" 351 | } 352 | ], 353 | "source": [ 354 | "double[] metricValues = \n", 355 | " { \n", 356 | " qualityMetrics.MeanAbsoluteError, \n", 357 | " qualityMetrics.MeanSquaredError, \n", 358 | " qualityMetrics.RootMeanSquaredError, \n", 359 | " qualityMetrics.LossFunction, \n", 360 | " qualityMetrics.RSquared\n", 361 | " };\n", 362 | "\n", 363 | "var graph = new Graph.Bar()\n", 364 | "{\n", 365 | " x = metricValues, y = metricNames,\n", 366 | " orientation = \"h\", marker = new Graph.Marker { color = \"darkred\" }\n", 367 | "};\n", 368 | "\n", 369 | "var chart = Chart.Plot(graph);\n", 370 | "\n", 371 | "var layout = new Layout.Layout(){ title=\"Quality Metrics\" };\n", 372 | "chart.WithLayout(layout);\n", 373 | "\n", 374 | "display(chart);" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "### Calculate and Visualize the Feature Contributions" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 11, 387 | "metadata": {}, 388 | "outputs": [ 389 | { 390 | "data": { 391 | "text/html": [ 392 | "
" 415 | ] 416 | }, 417 | "metadata": {}, 418 | "output_type": "display_data" 419 | } 420 | ], 421 | "source": [ 422 | "var regressionModel = model.Last() as RegressionPredictionTransformer;\n", 423 | "var contributions = regressionModel.Model.Weights;\n", 424 | "\n", 425 | "var graph2 = new Graph.Bar()\n", 426 | "{\n", 427 | " x = contributions,\n", 428 | " y = featureNames,\n", 429 | " orientation = \"h\",\n", 430 | " marker = new Graph.Marker { color = \"darkblue\" }\n", 431 | "};\n", 432 | "\n", 433 | "var chart2 = Chart.Plot(graph2);\n", 434 | "\n", 435 | "var layout2 = new Layout.Layout(){ title=\"Feature Contributions\" };\n", 436 | "chart2.WithLayout(layout2);\n", 437 | "\n", 438 | "display(chart2);" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "### Create a prediction engine and use it on a random sample" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 12, 451 | "metadata": {}, 452 | "outputs": [ 453 | { 454 | "data": { 455 | "text/html": [ 456 | "
FixedAcidityVolatileAcidityCitricAcidResidualSugarChloridesFreeSulfurDioxideTotalSulfurDioxideDensityPhSulphatesAlcoholLabel
6.40.330.281.10.038301100.99173.120.4210.56
" 457 | ] 458 | }, 459 | "metadata": {}, 460 | "output_type": "display_data" 461 | }, 462 | { 463 | "data": { 464 | "text/html": [ 465 | "
LabelPredictedLabel
65.665756
" 466 | ] 467 | }, 468 | "metadata": {}, 469 | "output_type": "display_data" 470 | } 471 | ], 472 | "source": [ 473 | "// Create prediction engine\n", 474 | "var predictionEngine = mlContext.Model.CreatePredictionEngine(model);\n", 475 | "\n", 476 | "// Get a random data sample\n", 477 | "var shuffledData = mlContext.Data.ShuffleRows(trainingData);\n", 478 | "var rawSample = mlContext.Data.TakeRows(shuffledData, 1);\n", 479 | "var sample = mlContext.Data.CreateEnumerable(rawSample, false).First();\n", 480 | "display(sample);\n", 481 | "\n", 482 | "// Predict quality of sample\n", 483 | "var prediction = predictionEngine.Predict(sample);\n", 484 | "display(prediction);" 485 | ] 486 | } 487 | ], 488 | "metadata": { 489 | "kernelspec": { 490 | "display_name": ".NET (C#)", 491 | "language": "C#", 492 | "name": ".net-csharp" 493 | }, 494 | "language_info": { 495 | "file_extension": ".cs", 496 | "mimetype": "text/x-csharp", 497 | "name": "C#", 498 | "pygments_lexer": "csharp", 499 | "version": "8.0" 500 | } 501 | }, 502 | "nbformat": 4, 503 | "nbformat_minor": 2 504 | } 505 | --------------------------------------------------------------------------------