├── README.md ├── NYCTaxiMultiOutputRegression ├── TF_MultiOutputLR.py ├── NYCTaxiMultiOutputRegression.csproj ├── Program.cs └── README.md ├── DeepLearningWithMLdotNet.sln └── .gitignore /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning with ML.Net 2 | -------------------------------------------------------------------------------- /NYCTaxiMultiOutputRegression/TF_MultiOutputLR.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | 4 | f_size = 15 # Number of features passed from ML.Net 5 | num_output = 2 # Number of outputs 6 | tf.set_random_seed(1) 7 | X = tf.placeholder('float', [None, f_size], name="X") 8 | Y = tf.placeholder('float', [None, num_output], name="Y") 9 | lr = tf.placeholder(tf.float32, name = "learning_rate") 10 | 11 | 12 | # Set model weights 13 | W = tf.Variable(tf.random_normal([f_size,num_output], stddev=0.1), name = 'W') 14 | b = tf.Variable(tf.zeros([num_output]), name = 'b') 15 | 16 | l1 = 0 17 | l2 = 0 18 | RegScores = tf.add(tf.matmul(X, W), b, name='RegScores') 19 | loss = tf.reduce_mean(tf.square(Y-tf.squeeze(RegScores))) / 2 + l2 * tf.nn.l2_loss(W) + l1 * tf.reduce_sum(tf.abs(W)) 20 | loss = tf.identity(loss, name="Loss") 21 | optimizer = tf.train.MomentumOptimizer(lr, momentum=0.9, name='MomentumOptimizer').minimize(loss) 22 | 23 | init = tf.global_variables_initializer() 24 | # Launch the graph. 25 | with tf.Session() as sess: 26 | sess.run(init) 27 | tf.saved_model.simple_save(sess, r'NYCTaxi/model', inputs={'X': X, 'Y': Y}, outputs={'RegScores': RegScores} ) -------------------------------------------------------------------------------- /NYCTaxiMultiOutputRegression/NYCTaxiMultiOutputRegression.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp2.1 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | PreserveNewest 16 | 17 | 18 | PreserveNewest 19 | 20 | 21 | PreserveNewest 22 | 23 | 24 | PreserveNewest 25 | 26 | 27 | PreserveNewest 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /DeepLearningWithMLdotNet.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.27703.2000 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NYCTaxiMultiOutputRegression", "NYCTaxiMultiOutputRegression\NYCTaxiMultiOutputRegression.csproj", "{28AABA70-A6FC-4B57-9DF3-26E4C2821BA9}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {28AABA70-A6FC-4B57-9DF3-26E4C2821BA9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {28AABA70-A6FC-4B57-9DF3-26E4C2821BA9}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {28AABA70-A6FC-4B57-9DF3-26E4C2821BA9}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {28AABA70-A6FC-4B57-9DF3-26E4C2821BA9}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {8CC2B3C6-752E-484E-937E-213C210F32DF} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /NYCTaxiMultiOutputRegression/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Linq; 3 | using Microsoft.ML; 4 | using Microsoft.ML.Data; 5 | using Microsoft.ML.Transforms; 6 | using static Microsoft.ML.Transforms.Normalizers.NormalizingEstimator; 7 | 8 | namespace NYCTaxiMultiOutputRegression 9 | { 10 | class Program 11 | { 12 | public class TaxiTrip 13 | { 14 | public string VendorId; 15 | public string RateCode; 16 | public float PassengerCount; 17 | public float TripTime; 18 | public float TripDistance; 19 | public string PaymentType; 20 | public float FareAmount; 21 | public float TipAmount; 22 | } 23 | 24 | public class TaxiTripFarePrediction 25 | { 26 | [VectorType(2)] 27 | public float[] RegScores; // This is vector because its a MultiOuput regression problem. 28 | } 29 | 30 | static void Main(string[] args) 31 | { 32 | 33 | string TrainDataPath = "NYCTaxi/train.csv"; 34 | string TestDataPath = "NYCTaxi/test.csv"; 35 | 36 | //Create ML Context with seed for repeteable/deterministic results 37 | MLContext mlContext = new MLContext(seed: 0); 38 | 39 | // STEP 1: Common data loading configuration 40 | TextLoader textLoader = mlContext.Data.CreateTextReader(new[] 41 | { 42 | new TextLoader.Column("VendorId", DataKind.Text, 0), 43 | new TextLoader.Column("RateCode", DataKind.Text, 1), 44 | new TextLoader.Column("PassengerCount", DataKind.R4, 2), 45 | new TextLoader.Column("TripTime", DataKind.R4, 3), 46 | new TextLoader.Column("TripDistance", DataKind.R4, 4), 47 | new TextLoader.Column("PaymentType", DataKind.Text, 5), 48 | new TextLoader.Column("FareAmount", DataKind.R4, 6), 49 | new TextLoader.Column("TipAmount", DataKind.R4, 7) 50 | }, 51 | hasHeader: true, 52 | separatorChar: ',' 53 | ); 54 | 55 | IDataView baseTrainingDataView = textLoader.Read(TrainDataPath); 56 | IDataView testDataView = textLoader.Read(TestDataPath); 57 | 58 | //Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data 59 | var cnt = baseTrainingDataView.GetColumn(mlContext, "FareAmount").Count(); 60 | IDataView trainingDataView = mlContext.Data.FilterByColumn(baseTrainingDataView, "FareAmount", lowerBound: 1, upperBound: 150); 61 | var cnt2 = trainingDataView.GetColumn(mlContext, "FareAmount").Count(); 62 | 63 | // STEP 2: Common data process configuration with pipeline data transformations 64 | var dataProcessPipeline = mlContext.Transforms.Concatenate("Y", "FareAmount", "TipAmount") 65 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("VendorId", "VendorIdEncoded")) 66 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("RateCode", "RateCodeEncoded")) 67 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("PaymentType", "PaymentTypeEncoded")) 68 | .Append(mlContext.Transforms.Normalize(inputName: "PassengerCount", mode: NormalizerMode.MeanVariance)) 69 | .Append(mlContext.Transforms.Normalize(inputName: "TripTime", mode: NormalizerMode.MeanVariance)) 70 | .Append(mlContext.Transforms.Normalize(inputName: "TripDistance", mode: NormalizerMode.MeanVariance)) 71 | .Append(mlContext.Transforms.Concatenate("X", "VendorIdEncoded", "RateCodeEncoded", "PaymentTypeEncoded", "PassengerCount", "TripTime", "TripDistance")) 72 | .Append(new TensorFlowEstimator(mlContext, new TensorFlowTransform.Arguments() 73 | { 74 | ModelLocation = "NYCTaxi/model", // Model is created with this script: DeepLearningWithMLdotNet\NYCTaxiMultiOutputRegression\TF_MultiOutputLR.py 75 | InputColumns = new[] { "X" }, 76 | OutputColumns = new[] { "RegScores" }, 77 | LabelColumn = "Y", 78 | TensorFlowLabel = "Y", 79 | OptimizationOperation = "MomentumOptimizer", 80 | LossOperation = "Loss", 81 | Epoch = 10, 82 | LearningRateOperation = "learning_rate", 83 | LearningRate = 0.01f, 84 | BatchSize = 20, 85 | ReTrain = true 86 | })); 87 | 88 | var trainedModel = dataProcessPipeline.Fit(baseTrainingDataView); 89 | 90 | 91 | // The evaluation does not work. It requires the score to be scalar. 92 | // However, for multi-output regression its vector 93 | // var predicted = trainedModel.Transform(testDataView); 94 | // var metrics = mlContext.Regression.Evaluate(predicted, "Y", "RegScores"); 95 | 96 | // Create prediction function and test prediction 97 | var predictFunction = trainedModel.CreatePredictionEngine(mlContext); 98 | 99 | var oneSample = new TaxiTrip() 100 | { 101 | VendorId = "CMT", 102 | RateCode = "1", 103 | PassengerCount = 2, 104 | TripTime = 1405, 105 | TripDistance = 10.3f, 106 | PaymentType = "CRD", 107 | FareAmount = 0, // To predict. Actual/Observed = 31.0 108 | TipAmount = 0 // To predict. Actual/Observed = 7.36 109 | }; 110 | 111 | var prediction = predictFunction.Predict(oneSample); 112 | Console.WriteLine("[FareAmount, TipAmount] = [{0}]", string.Join(", ", prediction.RegScores)); 113 | Console.WriteLine("Press any key to exit.."); 114 | Console.ReadLine(); 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.suo 8 | *.user 9 | *.userosscache 10 | *.sln.docstates 11 | 12 | # User-specific files (MonoDevelop/Xamarin Studio) 13 | *.userprefs 14 | 15 | # Build results 16 | [Dd]ebug/ 17 | [Dd]ebugPublic/ 18 | [Rr]elease/ 19 | [Rr]eleases/ 20 | x64/ 21 | x86/ 22 | bld/ 23 | [Bb]in/ 24 | [Oo]bj/ 25 | [Ll]og/ 26 | 27 | # Visual Studio 2015/2017 cache/options directory 28 | .vs/ 29 | # Uncomment if you have tasks that create the project's static files in wwwroot 30 | #wwwroot/ 31 | 32 | # Visual Studio 2017 auto generated files 33 | Generated\ Files/ 34 | 35 | # MSTest test Results 36 | [Tt]est[Rr]esult*/ 37 | [Bb]uild[Ll]og.* 38 | 39 | # NUNIT 40 | *.VisualState.xml 41 | TestResult.xml 42 | 43 | # Build Results of an ATL Project 44 | [Dd]ebugPS/ 45 | [Rr]eleasePS/ 46 | dlldata.c 47 | 48 | # Benchmark Results 49 | BenchmarkDotNet.Artifacts/ 50 | 51 | # .NET Core 52 | project.lock.json 53 | project.fragment.lock.json 54 | artifacts/ 55 | **/Properties/launchSettings.json 56 | 57 | # StyleCop 58 | StyleCopReport.xml 59 | 60 | # Files built by Visual Studio 61 | *_i.c 62 | *_p.c 63 | *_i.h 64 | *.ilk 65 | *.meta 66 | *.obj 67 | *.iobj 68 | *.pch 69 | *.pdb 70 | *.ipdb 71 | *.pgc 72 | *.pgd 73 | *.rsp 74 | *.sbr 75 | *.tlb 76 | *.tli 77 | *.tlh 78 | *.tmp 79 | *.tmp_proj 80 | *.log 81 | *.vspscc 82 | *.vssscc 83 | .builds 84 | *.pidb 85 | *.svclog 86 | *.scc 87 | 88 | # Chutzpah Test files 89 | _Chutzpah* 90 | 91 | # Visual C++ cache files 92 | ipch/ 93 | *.aps 94 | *.ncb 95 | *.opendb 96 | *.opensdf 97 | *.sdf 98 | *.cachefile 99 | *.VC.db 100 | *.VC.VC.opendb 101 | 102 | # Visual Studio profiler 103 | *.psess 104 | *.vsp 105 | *.vspx 106 | *.sap 107 | 108 | # Visual Studio Trace Files 109 | *.e2e 110 | 111 | # TFS 2012 Local Workspace 112 | $tf/ 113 | 114 | # Guidance Automation Toolkit 115 | *.gpState 116 | 117 | # ReSharper is a .NET coding add-in 118 | _ReSharper*/ 119 | *.[Rr]e[Ss]harper 120 | *.DotSettings.user 121 | 122 | # JustCode is a .NET coding add-in 123 | .JustCode 124 | 125 | # TeamCity is a build add-in 126 | _TeamCity* 127 | 128 | # DotCover is a Code Coverage Tool 129 | *.dotCover 130 | 131 | # AxoCover is a Code Coverage Tool 132 | .axoCover/* 133 | !.axoCover/settings.json 134 | 135 | # Visual Studio code coverage results 136 | *.coverage 137 | *.coveragexml 138 | 139 | # NCrunch 140 | _NCrunch_* 141 | .*crunch*.local.xml 142 | nCrunchTemp_* 143 | 144 | # MightyMoose 145 | *.mm.* 146 | AutoTest.Net/ 147 | 148 | # Web workbench (sass) 149 | .sass-cache/ 150 | 151 | # Installshield output folder 152 | [Ee]xpress/ 153 | 154 | # DocProject is a documentation generator add-in 155 | DocProject/buildhelp/ 156 | DocProject/Help/*.HxT 157 | DocProject/Help/*.HxC 158 | DocProject/Help/*.hhc 159 | DocProject/Help/*.hhk 160 | DocProject/Help/*.hhp 161 | DocProject/Help/Html2 162 | DocProject/Help/html 163 | 164 | # Click-Once directory 165 | publish/ 166 | 167 | # Publish Web Output 168 | *.[Pp]ublish.xml 169 | *.azurePubxml 170 | # Note: Comment the next line if you want to checkin your web deploy settings, 171 | # but database connection strings (with potential passwords) will be unencrypted 172 | *.pubxml 173 | *.publishproj 174 | 175 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 176 | # checkin your Azure Web App publish settings, but sensitive information contained 177 | # in these scripts will be unencrypted 178 | PublishScripts/ 179 | 180 | # NuGet Packages 181 | *.nupkg 182 | # The packages folder can be ignored because of Package Restore 183 | **/[Pp]ackages/* 184 | # except build/, which is used as an MSBuild target. 185 | !**/[Pp]ackages/build/ 186 | # Uncomment if necessary however generally it will be regenerated when needed 187 | #!**/[Pp]ackages/repositories.config 188 | # NuGet v3's project.json files produces more ignorable files 189 | *.nuget.props 190 | *.nuget.targets 191 | 192 | # Microsoft Azure Build Output 193 | csx/ 194 | *.build.csdef 195 | 196 | # Microsoft Azure Emulator 197 | ecf/ 198 | rcf/ 199 | 200 | # Windows Store app package directories and files 201 | AppPackages/ 202 | BundleArtifacts/ 203 | Package.StoreAssociation.xml 204 | _pkginfo.txt 205 | *.appx 206 | 207 | # Visual Studio cache files 208 | # files ending in .cache can be ignored 209 | *.[Cc]ache 210 | # but keep track of directories ending in .cache 211 | !*.[Cc]ache/ 212 | 213 | # Others 214 | ClientBin/ 215 | ~$* 216 | *~ 217 | *.dbmdl 218 | *.dbproj.schemaview 219 | *.jfm 220 | *.pfx 221 | *.publishsettings 222 | orleans.codegen.cs 223 | 224 | # Including strong name files can present a security risk 225 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 226 | #*.snk 227 | 228 | # Since there are multiple workflows, uncomment next line to ignore bower_components 229 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 230 | #bower_components/ 231 | 232 | # RIA/Silverlight projects 233 | Generated_Code/ 234 | 235 | # Backup & report files from converting an old project file 236 | # to a newer Visual Studio version. Backup files are not needed, 237 | # because we have git ;-) 238 | _UpgradeReport_Files/ 239 | Backup*/ 240 | UpgradeLog*.XML 241 | UpgradeLog*.htm 242 | ServiceFabricBackup/ 243 | *.rptproj.bak 244 | 245 | # SQL Server files 246 | *.mdf 247 | *.ldf 248 | *.ndf 249 | 250 | # Business Intelligence projects 251 | *.rdl.data 252 | *.bim.layout 253 | *.bim_*.settings 254 | *.rptproj.rsuser 255 | 256 | # Microsoft Fakes 257 | FakesAssemblies/ 258 | 259 | # GhostDoc plugin setting file 260 | *.GhostDoc.xml 261 | 262 | # Node.js Tools for Visual Studio 263 | .ntvs_analysis.dat 264 | node_modules/ 265 | 266 | # Visual Studio 6 build log 267 | *.plg 268 | 269 | # Visual Studio 6 workspace options file 270 | *.opt 271 | 272 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 273 | *.vbw 274 | 275 | # Visual Studio LightSwitch build output 276 | **/*.HTMLClient/GeneratedArtifacts 277 | **/*.DesktopClient/GeneratedArtifacts 278 | **/*.DesktopClient/ModelManifest.xml 279 | **/*.Server/GeneratedArtifacts 280 | **/*.Server/ModelManifest.xml 281 | _Pvt_Extensions 282 | 283 | # Paket dependency manager 284 | .paket/paket.exe 285 | paket-files/ 286 | 287 | # FAKE - F# Make 288 | .fake/ 289 | 290 | # JetBrains Rider 291 | .idea/ 292 | *.sln.iml 293 | 294 | # CodeRush 295 | .cr/ 296 | 297 | # Python Tools for Visual Studio (PTVS) 298 | __pycache__/ 299 | *.pyc 300 | 301 | # Cake - Uncomment if you are using it 302 | # tools/** 303 | # !tools/packages.config 304 | 305 | # Tabs Studio 306 | *.tss 307 | 308 | # Telerik's JustMock configuration file 309 | *.jmconfig 310 | 311 | # BizTalk build output 312 | *.btp.cs 313 | *.btm.cs 314 | *.odx.cs 315 | *.xsd.cs 316 | 317 | # OpenCover UI analysis results 318 | OpenCover/ 319 | 320 | # Azure Stream Analytics local run output 321 | ASALocalRun/ 322 | 323 | # MSBuild Binary and Structured Log 324 | *.binlog 325 | 326 | # NVidia Nsight GPU debugger configuration file 327 | *.nvuser 328 | 329 | # MFractors (Xamarin productivity tool) working folder 330 | .mfractor/ 331 | 332 | # Data files and models 333 | model/ 334 | images/ -------------------------------------------------------------------------------- /NYCTaxiMultiOutputRegression/README.md: -------------------------------------------------------------------------------- 1 | # Multi-Output Regression with ML.Net and TensorFlow 2 | 3 | This sample describes how to create a multi-output regression model using ML.Net and TensorFLow. Multi-output regression, also known as multi-target, multi-variate, or multi-response regression, aims to simultaneously predict multiple real-valued output/target variables. 4 | 5 | In ML.Net, this regression task can be modelled using ML.Net's TensorFlow scoring and training component. 6 | 7 | ## TensorFlow Scoring in ML.Net 8 | ML.Net has components called [TensorFLowTransformer](https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.transforms.tensorflowtransform?view=ml-dotnet) and [TensorFlowEstimator](https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.transforms.tensorflowestimator?view=ml-dotnet) that can be used for 9 | 10 | * Scoring with pretrained TensorFlow model where the `TensorFLowTransformer` extracts hidden layers' values from a pre-trained Tensorflow model and uses outputs as features in ML.Net pipeline. 11 | 12 | * Retraining of TensorFlow model where the `TensorFlowEstimator` retrains a TensorFlow model using the user data passed through ML.Net pipeline. Once the model is trained, it's outputs can be used as features for scoring. 13 | 14 | ## Multi-Output Regression Model 15 | 16 | Let's create a multi-output regression model in TensorFlow and use it in ML.Net. The following script creates the TensorFLow graph. Note that this script only creates the graph and does not do any training. The actual training will be done in ML.Net script. 17 | 18 | ```python 19 | import tensorflow as tf 20 | 21 | f_size = 15 # Number of features passed from ML.Net 22 | num_output = 2 # Number of outputs 23 | tf.set_random_seed(1) 24 | X = tf.placeholder('float', [None, f_size], name="X") 25 | Y = tf.placeholder('float', [None, num_output], name="Y") 26 | lr = tf.placeholder(tf.float32, name = "learning_rate") 27 | 28 | 29 | # Set model weights 30 | W = tf.Variable(tf.random_normal([f_size,num_output], stddev=0.1), name = 'W') 31 | b = tf.Variable(tf.zeros([num_output]), name = 'b') 32 | 33 | l1 = 0 34 | l2 = 0 35 | RegScores = tf.add(tf.matmul(X, W), b, name='RegScores') 36 | loss = tf.reduce_mean(tf.square(Y-tf.squeeze(RegScores))) / 2 + l2 * tf.nn.l2_loss(W) + l1 * tf.reduce_sum(tf.abs(W)) 37 | loss = tf.identity(loss, name="Loss") 38 | optimizer = tf.train.MomentumOptimizer(lr, momentum=0.9, name='MomentumOptimizer').minimize(loss) 39 | 40 | init = tf.global_variables_initializer() 41 | # Launch the graph. 42 | with tf.Session() as sess: 43 | sess.run(init) 44 | tf.saved_model.simple_save(sess, r'NYCTaxi/model', inputs={'X': X, 'Y': Y}, outputs={'RegScores': RegScores} ) 45 | ``` 46 | 47 | Here, 48 | 49 | * `X` and `Y` are the input feature vector and label placeholders. 50 | * `W` and `b` are the parameters of the model. 51 | * `RegScores` is the predicted value. It is vector of length 2 in this case. 52 | * `learning_rate` is placeholder that is dynamically set during training from ML.Net 53 | * `MomentumOptimizer` is the name of optimization operator in the graph that will be used for training. 54 | 55 | Upon executing, this python script will create a checkpoint model directory called `NYCTaxi/model`. 56 | 57 | ## Training and Prediction with Multi-Output LR model in ML.Net 58 | 59 | The sample uses the dataset from [ML.Net's TaxiFarePrediction](https://github.com/dotnet/machinelearning-samples/tree/master/samples/csharp/getting-started/Regression_TaxiFarePrediction) getting-started tutorial. The tutorial explains how to use ML.Net for predicting single real-valued target variable i.e. predicting the `FareAmount`. However, this sample tries to predict `FareAmount` together with `TipAmount`. 60 | 61 | ### 1. Loading Data 62 | This sample loads 8 columns from the dataset instead of 7 in [ML.Net's TaxiFarePrediction](https://github.com/dotnet/machinelearning-samples/tree/master/samples/csharp/getting-started/Regression_TaxiFarePrediction) getting-started tutorial. The additional columns `TipAmount` is used as second target variable. 63 | ``` csharp 64 | string TrainDataPath = "NYCTaxi/train.csv"; 65 | string TestDataPath = "NYCTaxi/test.csv"; 66 | 67 | //Create ML Context with seed for repeteable/deterministic results 68 | MLContext mlContext = new MLContext(seed: 0); 69 | 70 | // STEP 1: Common data loading configuration 71 | TextLoader textLoader = mlContext.Data.CreateTextReader(new[] 72 | { 73 | new TextLoader.Column("VendorId", DataKind.Text, 0), 74 | new TextLoader.Column("RateCode", DataKind.Text, 1), 75 | new TextLoader.Column("PassengerCount", DataKind.R4, 2), 76 | new TextLoader.Column("TripTime", DataKind.R4, 3), 77 | new TextLoader.Column("TripDistance", DataKind.R4, 4), 78 | new TextLoader.Column("PaymentType", DataKind.Text, 5), 79 | new TextLoader.Column("FareAmount", DataKind.R4, 6), 80 | new TextLoader.Column("TipAmount", DataKind.R4, 7) 81 | }, 82 | hasHeader: true, 83 | separatorChar: ',' 84 | ); 85 | 86 | IDataView baseTrainingDataView = textLoader.Read(TrainDataPath); 87 | IDataView testDataView = textLoader.Read(TestDataPath); 88 | ``` 89 | 90 | ### 2. Building the Pipeline 91 | The major difference here is in building the pipeline where `TensorFlowEstimator` is used to estimate the model parameters created with TensorFlow script above. 92 | ``` csharp 93 | var dataProcessPipeline = mlContext.Transforms.Concatenate("Y", "FareAmount", "TipAmount") 94 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("VendorId", "VendorIdEncoded")) 95 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("RateCode", "RateCodeEncoded")) 96 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("PaymentType", "PaymentTypeEncoded")) 97 | .Append(mlContext.Transforms.Normalize(inputName: "PassengerCount", mode: NormalizerMode.MeanVariance)) 98 | .Append(mlContext.Transforms.Normalize(inputName: "TripTime", mode: NormalizerMode.MeanVariance)) 99 | .Append(mlContext.Transforms.Normalize(inputName: "TripDistance", mode: NormalizerMode.MeanVariance)) 100 | .Append(mlContext.Transforms.Concatenate("X", "VendorIdEncoded", "RateCodeEncoded", "PaymentTypeEncoded", "PassengerCount", "TripTime", "TripDistance")) 101 | .Append(new TensorFlowEstimator(mlContext, new TensorFlowTransform.Arguments() 102 | { 103 | ModelLocation = "NYCTaxi/model", // Model is created with this script: DeepLearningWithMLdotNet\NYCTaxiMultiOutputRegression\TF_MultiOutputLR.py 104 | InputColumns = new[] { "X" }, 105 | OutputColumns = new[] { "RegScores" }, 106 | LabelColumn = "Y", 107 | TensorFlowLabel = "Y", 108 | OptimizationOperation = "MomentumOptimizer", 109 | LossOperation = "Loss", 110 | Epoch = 10, 111 | LearningRateOperation = "learning_rate", 112 | LearningRate = 0.01f, 113 | BatchSize = 20, 114 | ReTrain = true 115 | })); 116 | ``` 117 | 118 | In this ML.Net pipeline, the `FareAmount` and `TipAmount` are combined into a vector-valued column called `Y`. `TensorFlowEstimator` uses the `X` (input), `RegScores` (output), `Y` (Label) and optimization related operator names (`MomentumOptimizer`, `Loss`, `learning_rate` etc.) for retraining of model created with TensorFlow script above. 119 | 120 | ### 3. Predicting with Trained Model 121 | Once the model is trained with 122 | ```csharp 123 | var trainedModel = dataProcessPipeline.Fit(baseTrainingDataView); 124 | ``` 125 | , it can be used for prediction as any other ML.Net model. 126 | ```csharp 127 | // Create prediction function and test prediction 128 | var predictFunction = trainedModel.CreatePredictionEngine(mlContext); 129 | 130 | var oneSample = new TaxiTrip() 131 | { 132 | VendorId = "CMT", 133 | RateCode = "1", 134 | PassengerCount = 2, 135 | TripTime = 1405, 136 | TripDistance = 10.3f, 137 | PaymentType = "CRD", 138 | FareAmount = 0, // To predict. Actual/Observed = 31.0 139 | TipAmount = 0 // To predict. Actual/Observed = 7.36 140 | }; 141 | 142 | var prediction = predictFunction.Predict(oneSample); 143 | Console.WriteLine("[FareAmount, TipAmount] = [{0}]", string.Join(", ", prediction.RegScores)); 144 | ``` 145 | Note that, the `RegScores` (the output) is vector type instead of scalar type as used in [ML.Net's TaxiFarePrediction](https://github.com/dotnet/machinelearning-samples/tree/master/samples/csharp/getting-started/Regression_TaxiFarePrediction) getting-started tutorial. 146 | ```csharp 147 | public class TaxiTripFarePrediction 148 | { 149 | [VectorType(2)] 150 | public float[] RegScores; // This is vector because its a MultiOuput regression problem. 151 | } 152 | ``` 153 | 154 | ### 4. Evaluating Model 155 | 156 | Currently, evaluation (i.e. computing metrics such as Root Mean Square Error (RMSE), log-loss etc.) on the test data does not work because ML.Net's regression evaluator does not work on vector-valued. 157 | ```csharp 158 | // The evaluation does not work. It requires the score to be scalar. 159 | // However, for multi-output regression its vector 160 | // var predicted = trainedModel.Transform(testDataView); 161 | // var metrics = mlContext.Regression.Evaluate(predicted, "Y", "RegScores"); 162 | ``` --------------------------------------------------------------------------------