├── README.md
├── NYCTaxiMultiOutputRegression
├── TF_MultiOutputLR.py
├── NYCTaxiMultiOutputRegression.csproj
├── Program.cs
└── README.md
├── DeepLearningWithMLdotNet.sln
└── .gitignore
/README.md:
--------------------------------------------------------------------------------
1 | # Deep Learning with ML.Net
2 |
--------------------------------------------------------------------------------
/NYCTaxiMultiOutputRegression/TF_MultiOutputLR.py:
--------------------------------------------------------------------------------
1 |
2 | import tensorflow as tf
3 |
4 | f_size = 15 # Number of features passed from ML.Net
5 | num_output = 2 # Number of outputs
6 | tf.set_random_seed(1)
7 | X = tf.placeholder('float', [None, f_size], name="X")
8 | Y = tf.placeholder('float', [None, num_output], name="Y")
9 | lr = tf.placeholder(tf.float32, name = "learning_rate")
10 |
11 |
12 | # Set model weights
13 | W = tf.Variable(tf.random_normal([f_size,num_output], stddev=0.1), name = 'W')
14 | b = tf.Variable(tf.zeros([num_output]), name = 'b')
15 |
16 | l1 = 0
17 | l2 = 0
18 | RegScores = tf.add(tf.matmul(X, W), b, name='RegScores')
19 | loss = tf.reduce_mean(tf.square(Y-tf.squeeze(RegScores))) / 2 + l2 * tf.nn.l2_loss(W) + l1 * tf.reduce_sum(tf.abs(W))
20 | loss = tf.identity(loss, name="Loss")
21 | optimizer = tf.train.MomentumOptimizer(lr, momentum=0.9, name='MomentumOptimizer').minimize(loss)
22 |
23 | init = tf.global_variables_initializer()
24 | # Launch the graph.
25 | with tf.Session() as sess:
26 | sess.run(init)
27 | tf.saved_model.simple_save(sess, r'NYCTaxi/model', inputs={'X': X, 'Y': Y}, outputs={'RegScores': RegScores} )
--------------------------------------------------------------------------------
/NYCTaxiMultiOutputRegression/NYCTaxiMultiOutputRegression.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | netcoreapp2.1
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | PreserveNewest
16 |
17 |
18 | PreserveNewest
19 |
20 |
21 | PreserveNewest
22 |
23 |
24 | PreserveNewest
25 |
26 |
27 | PreserveNewest
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/DeepLearningWithMLdotNet.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 15
4 | VisualStudioVersion = 15.0.27703.2000
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NYCTaxiMultiOutputRegression", "NYCTaxiMultiOutputRegression\NYCTaxiMultiOutputRegression.csproj", "{28AABA70-A6FC-4B57-9DF3-26E4C2821BA9}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Any CPU = Debug|Any CPU
11 | Release|Any CPU = Release|Any CPU
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {28AABA70-A6FC-4B57-9DF3-26E4C2821BA9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | {28AABA70-A6FC-4B57-9DF3-26E4C2821BA9}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | {28AABA70-A6FC-4B57-9DF3-26E4C2821BA9}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | {28AABA70-A6FC-4B57-9DF3-26E4C2821BA9}.Release|Any CPU.Build.0 = Release|Any CPU
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {8CC2B3C6-752E-484E-937E-213C210F32DF}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/NYCTaxiMultiOutputRegression/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Linq;
3 | using Microsoft.ML;
4 | using Microsoft.ML.Data;
5 | using Microsoft.ML.Transforms;
6 | using static Microsoft.ML.Transforms.Normalizers.NormalizingEstimator;
7 |
8 | namespace NYCTaxiMultiOutputRegression
9 | {
10 | class Program
11 | {
12 | public class TaxiTrip
13 | {
14 | public string VendorId;
15 | public string RateCode;
16 | public float PassengerCount;
17 | public float TripTime;
18 | public float TripDistance;
19 | public string PaymentType;
20 | public float FareAmount;
21 | public float TipAmount;
22 | }
23 |
24 | public class TaxiTripFarePrediction
25 | {
26 | [VectorType(2)]
27 | public float[] RegScores; // This is vector because its a MultiOuput regression problem.
28 | }
29 |
30 | static void Main(string[] args)
31 | {
32 |
33 | string TrainDataPath = "NYCTaxi/train.csv";
34 | string TestDataPath = "NYCTaxi/test.csv";
35 |
36 | //Create ML Context with seed for repeteable/deterministic results
37 | MLContext mlContext = new MLContext(seed: 0);
38 |
39 | // STEP 1: Common data loading configuration
40 | TextLoader textLoader = mlContext.Data.CreateTextReader(new[]
41 | {
42 | new TextLoader.Column("VendorId", DataKind.Text, 0),
43 | new TextLoader.Column("RateCode", DataKind.Text, 1),
44 | new TextLoader.Column("PassengerCount", DataKind.R4, 2),
45 | new TextLoader.Column("TripTime", DataKind.R4, 3),
46 | new TextLoader.Column("TripDistance", DataKind.R4, 4),
47 | new TextLoader.Column("PaymentType", DataKind.Text, 5),
48 | new TextLoader.Column("FareAmount", DataKind.R4, 6),
49 | new TextLoader.Column("TipAmount", DataKind.R4, 7)
50 | },
51 | hasHeader: true,
52 | separatorChar: ','
53 | );
54 |
55 | IDataView baseTrainingDataView = textLoader.Read(TrainDataPath);
56 | IDataView testDataView = textLoader.Read(TestDataPath);
57 |
58 | //Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data
59 | var cnt = baseTrainingDataView.GetColumn(mlContext, "FareAmount").Count();
60 | IDataView trainingDataView = mlContext.Data.FilterByColumn(baseTrainingDataView, "FareAmount", lowerBound: 1, upperBound: 150);
61 | var cnt2 = trainingDataView.GetColumn(mlContext, "FareAmount").Count();
62 |
63 | // STEP 2: Common data process configuration with pipeline data transformations
64 | var dataProcessPipeline = mlContext.Transforms.Concatenate("Y", "FareAmount", "TipAmount")
65 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("VendorId", "VendorIdEncoded"))
66 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("RateCode", "RateCodeEncoded"))
67 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("PaymentType", "PaymentTypeEncoded"))
68 | .Append(mlContext.Transforms.Normalize(inputName: "PassengerCount", mode: NormalizerMode.MeanVariance))
69 | .Append(mlContext.Transforms.Normalize(inputName: "TripTime", mode: NormalizerMode.MeanVariance))
70 | .Append(mlContext.Transforms.Normalize(inputName: "TripDistance", mode: NormalizerMode.MeanVariance))
71 | .Append(mlContext.Transforms.Concatenate("X", "VendorIdEncoded", "RateCodeEncoded", "PaymentTypeEncoded", "PassengerCount", "TripTime", "TripDistance"))
72 | .Append(new TensorFlowEstimator(mlContext, new TensorFlowTransform.Arguments()
73 | {
74 | ModelLocation = "NYCTaxi/model", // Model is created with this script: DeepLearningWithMLdotNet\NYCTaxiMultiOutputRegression\TF_MultiOutputLR.py
75 | InputColumns = new[] { "X" },
76 | OutputColumns = new[] { "RegScores" },
77 | LabelColumn = "Y",
78 | TensorFlowLabel = "Y",
79 | OptimizationOperation = "MomentumOptimizer",
80 | LossOperation = "Loss",
81 | Epoch = 10,
82 | LearningRateOperation = "learning_rate",
83 | LearningRate = 0.01f,
84 | BatchSize = 20,
85 | ReTrain = true
86 | }));
87 |
88 | var trainedModel = dataProcessPipeline.Fit(baseTrainingDataView);
89 |
90 |
91 | // The evaluation does not work. It requires the score to be scalar.
92 | // However, for multi-output regression its vector
93 | // var predicted = trainedModel.Transform(testDataView);
94 | // var metrics = mlContext.Regression.Evaluate(predicted, "Y", "RegScores");
95 |
96 | // Create prediction function and test prediction
97 | var predictFunction = trainedModel.CreatePredictionEngine(mlContext);
98 |
99 | var oneSample = new TaxiTrip()
100 | {
101 | VendorId = "CMT",
102 | RateCode = "1",
103 | PassengerCount = 2,
104 | TripTime = 1405,
105 | TripDistance = 10.3f,
106 | PaymentType = "CRD",
107 | FareAmount = 0, // To predict. Actual/Observed = 31.0
108 | TipAmount = 0 // To predict. Actual/Observed = 7.36
109 | };
110 |
111 | var prediction = predictFunction.Predict(oneSample);
112 | Console.WriteLine("[FareAmount, TipAmount] = [{0}]", string.Join(", ", prediction.RegScores));
113 | Console.WriteLine("Press any key to exit..");
114 | Console.ReadLine();
115 | }
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.suo
8 | *.user
9 | *.userosscache
10 | *.sln.docstates
11 |
12 | # User-specific files (MonoDevelop/Xamarin Studio)
13 | *.userprefs
14 |
15 | # Build results
16 | [Dd]ebug/
17 | [Dd]ebugPublic/
18 | [Rr]elease/
19 | [Rr]eleases/
20 | x64/
21 | x86/
22 | bld/
23 | [Bb]in/
24 | [Oo]bj/
25 | [Ll]og/
26 |
27 | # Visual Studio 2015/2017 cache/options directory
28 | .vs/
29 | # Uncomment if you have tasks that create the project's static files in wwwroot
30 | #wwwroot/
31 |
32 | # Visual Studio 2017 auto generated files
33 | Generated\ Files/
34 |
35 | # MSTest test Results
36 | [Tt]est[Rr]esult*/
37 | [Bb]uild[Ll]og.*
38 |
39 | # NUNIT
40 | *.VisualState.xml
41 | TestResult.xml
42 |
43 | # Build Results of an ATL Project
44 | [Dd]ebugPS/
45 | [Rr]eleasePS/
46 | dlldata.c
47 |
48 | # Benchmark Results
49 | BenchmarkDotNet.Artifacts/
50 |
51 | # .NET Core
52 | project.lock.json
53 | project.fragment.lock.json
54 | artifacts/
55 | **/Properties/launchSettings.json
56 |
57 | # StyleCop
58 | StyleCopReport.xml
59 |
60 | # Files built by Visual Studio
61 | *_i.c
62 | *_p.c
63 | *_i.h
64 | *.ilk
65 | *.meta
66 | *.obj
67 | *.iobj
68 | *.pch
69 | *.pdb
70 | *.ipdb
71 | *.pgc
72 | *.pgd
73 | *.rsp
74 | *.sbr
75 | *.tlb
76 | *.tli
77 | *.tlh
78 | *.tmp
79 | *.tmp_proj
80 | *.log
81 | *.vspscc
82 | *.vssscc
83 | .builds
84 | *.pidb
85 | *.svclog
86 | *.scc
87 |
88 | # Chutzpah Test files
89 | _Chutzpah*
90 |
91 | # Visual C++ cache files
92 | ipch/
93 | *.aps
94 | *.ncb
95 | *.opendb
96 | *.opensdf
97 | *.sdf
98 | *.cachefile
99 | *.VC.db
100 | *.VC.VC.opendb
101 |
102 | # Visual Studio profiler
103 | *.psess
104 | *.vsp
105 | *.vspx
106 | *.sap
107 |
108 | # Visual Studio Trace Files
109 | *.e2e
110 |
111 | # TFS 2012 Local Workspace
112 | $tf/
113 |
114 | # Guidance Automation Toolkit
115 | *.gpState
116 |
117 | # ReSharper is a .NET coding add-in
118 | _ReSharper*/
119 | *.[Rr]e[Ss]harper
120 | *.DotSettings.user
121 |
122 | # JustCode is a .NET coding add-in
123 | .JustCode
124 |
125 | # TeamCity is a build add-in
126 | _TeamCity*
127 |
128 | # DotCover is a Code Coverage Tool
129 | *.dotCover
130 |
131 | # AxoCover is a Code Coverage Tool
132 | .axoCover/*
133 | !.axoCover/settings.json
134 |
135 | # Visual Studio code coverage results
136 | *.coverage
137 | *.coveragexml
138 |
139 | # NCrunch
140 | _NCrunch_*
141 | .*crunch*.local.xml
142 | nCrunchTemp_*
143 |
144 | # MightyMoose
145 | *.mm.*
146 | AutoTest.Net/
147 |
148 | # Web workbench (sass)
149 | .sass-cache/
150 |
151 | # Installshield output folder
152 | [Ee]xpress/
153 |
154 | # DocProject is a documentation generator add-in
155 | DocProject/buildhelp/
156 | DocProject/Help/*.HxT
157 | DocProject/Help/*.HxC
158 | DocProject/Help/*.hhc
159 | DocProject/Help/*.hhk
160 | DocProject/Help/*.hhp
161 | DocProject/Help/Html2
162 | DocProject/Help/html
163 |
164 | # Click-Once directory
165 | publish/
166 |
167 | # Publish Web Output
168 | *.[Pp]ublish.xml
169 | *.azurePubxml
170 | # Note: Comment the next line if you want to checkin your web deploy settings,
171 | # but database connection strings (with potential passwords) will be unencrypted
172 | *.pubxml
173 | *.publishproj
174 |
175 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
176 | # checkin your Azure Web App publish settings, but sensitive information contained
177 | # in these scripts will be unencrypted
178 | PublishScripts/
179 |
180 | # NuGet Packages
181 | *.nupkg
182 | # The packages folder can be ignored because of Package Restore
183 | **/[Pp]ackages/*
184 | # except build/, which is used as an MSBuild target.
185 | !**/[Pp]ackages/build/
186 | # Uncomment if necessary however generally it will be regenerated when needed
187 | #!**/[Pp]ackages/repositories.config
188 | # NuGet v3's project.json files produces more ignorable files
189 | *.nuget.props
190 | *.nuget.targets
191 |
192 | # Microsoft Azure Build Output
193 | csx/
194 | *.build.csdef
195 |
196 | # Microsoft Azure Emulator
197 | ecf/
198 | rcf/
199 |
200 | # Windows Store app package directories and files
201 | AppPackages/
202 | BundleArtifacts/
203 | Package.StoreAssociation.xml
204 | _pkginfo.txt
205 | *.appx
206 |
207 | # Visual Studio cache files
208 | # files ending in .cache can be ignored
209 | *.[Cc]ache
210 | # but keep track of directories ending in .cache
211 | !*.[Cc]ache/
212 |
213 | # Others
214 | ClientBin/
215 | ~$*
216 | *~
217 | *.dbmdl
218 | *.dbproj.schemaview
219 | *.jfm
220 | *.pfx
221 | *.publishsettings
222 | orleans.codegen.cs
223 |
224 | # Including strong name files can present a security risk
225 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
226 | #*.snk
227 |
228 | # Since there are multiple workflows, uncomment next line to ignore bower_components
229 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
230 | #bower_components/
231 |
232 | # RIA/Silverlight projects
233 | Generated_Code/
234 |
235 | # Backup & report files from converting an old project file
236 | # to a newer Visual Studio version. Backup files are not needed,
237 | # because we have git ;-)
238 | _UpgradeReport_Files/
239 | Backup*/
240 | UpgradeLog*.XML
241 | UpgradeLog*.htm
242 | ServiceFabricBackup/
243 | *.rptproj.bak
244 |
245 | # SQL Server files
246 | *.mdf
247 | *.ldf
248 | *.ndf
249 |
250 | # Business Intelligence projects
251 | *.rdl.data
252 | *.bim.layout
253 | *.bim_*.settings
254 | *.rptproj.rsuser
255 |
256 | # Microsoft Fakes
257 | FakesAssemblies/
258 |
259 | # GhostDoc plugin setting file
260 | *.GhostDoc.xml
261 |
262 | # Node.js Tools for Visual Studio
263 | .ntvs_analysis.dat
264 | node_modules/
265 |
266 | # Visual Studio 6 build log
267 | *.plg
268 |
269 | # Visual Studio 6 workspace options file
270 | *.opt
271 |
272 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
273 | *.vbw
274 |
275 | # Visual Studio LightSwitch build output
276 | **/*.HTMLClient/GeneratedArtifacts
277 | **/*.DesktopClient/GeneratedArtifacts
278 | **/*.DesktopClient/ModelManifest.xml
279 | **/*.Server/GeneratedArtifacts
280 | **/*.Server/ModelManifest.xml
281 | _Pvt_Extensions
282 |
283 | # Paket dependency manager
284 | .paket/paket.exe
285 | paket-files/
286 |
287 | # FAKE - F# Make
288 | .fake/
289 |
290 | # JetBrains Rider
291 | .idea/
292 | *.sln.iml
293 |
294 | # CodeRush
295 | .cr/
296 |
297 | # Python Tools for Visual Studio (PTVS)
298 | __pycache__/
299 | *.pyc
300 |
301 | # Cake - Uncomment if you are using it
302 | # tools/**
303 | # !tools/packages.config
304 |
305 | # Tabs Studio
306 | *.tss
307 |
308 | # Telerik's JustMock configuration file
309 | *.jmconfig
310 |
311 | # BizTalk build output
312 | *.btp.cs
313 | *.btm.cs
314 | *.odx.cs
315 | *.xsd.cs
316 |
317 | # OpenCover UI analysis results
318 | OpenCover/
319 |
320 | # Azure Stream Analytics local run output
321 | ASALocalRun/
322 |
323 | # MSBuild Binary and Structured Log
324 | *.binlog
325 |
326 | # NVidia Nsight GPU debugger configuration file
327 | *.nvuser
328 |
329 | # MFractors (Xamarin productivity tool) working folder
330 | .mfractor/
331 |
332 | # Data files and models
333 | model/
334 | images/
--------------------------------------------------------------------------------
/NYCTaxiMultiOutputRegression/README.md:
--------------------------------------------------------------------------------
1 | # Multi-Output Regression with ML.Net and TensorFlow
2 |
3 | This sample describes how to create a multi-output regression model using ML.Net and TensorFLow. Multi-output regression, also known as multi-target, multi-variate, or multi-response regression, aims to simultaneously predict multiple real-valued output/target variables.
4 |
5 | In ML.Net, this regression task can be modelled using ML.Net's TensorFlow scoring and training component.
6 |
7 | ## TensorFlow Scoring in ML.Net
8 | ML.Net has components called [TensorFLowTransformer](https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.transforms.tensorflowtransform?view=ml-dotnet) and [TensorFlowEstimator](https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.transforms.tensorflowestimator?view=ml-dotnet) that can be used for
9 |
10 | * Scoring with pretrained TensorFlow model where the `TensorFLowTransformer` extracts hidden layers' values from a pre-trained Tensorflow model and uses outputs as features in ML.Net pipeline.
11 |
12 | * Retraining of TensorFlow model where the `TensorFlowEstimator` retrains a TensorFlow model using the user data passed through ML.Net pipeline. Once the model is trained, it's outputs can be used as features for scoring.
13 |
14 | ## Multi-Output Regression Model
15 |
16 | Let's create a multi-output regression model in TensorFlow and use it in ML.Net. The following script creates the TensorFLow graph. Note that this script only creates the graph and does not do any training. The actual training will be done in ML.Net script.
17 |
18 | ```python
19 | import tensorflow as tf
20 |
21 | f_size = 15 # Number of features passed from ML.Net
22 | num_output = 2 # Number of outputs
23 | tf.set_random_seed(1)
24 | X = tf.placeholder('float', [None, f_size], name="X")
25 | Y = tf.placeholder('float', [None, num_output], name="Y")
26 | lr = tf.placeholder(tf.float32, name = "learning_rate")
27 |
28 |
29 | # Set model weights
30 | W = tf.Variable(tf.random_normal([f_size,num_output], stddev=0.1), name = 'W')
31 | b = tf.Variable(tf.zeros([num_output]), name = 'b')
32 |
33 | l1 = 0
34 | l2 = 0
35 | RegScores = tf.add(tf.matmul(X, W), b, name='RegScores')
36 | loss = tf.reduce_mean(tf.square(Y-tf.squeeze(RegScores))) / 2 + l2 * tf.nn.l2_loss(W) + l1 * tf.reduce_sum(tf.abs(W))
37 | loss = tf.identity(loss, name="Loss")
38 | optimizer = tf.train.MomentumOptimizer(lr, momentum=0.9, name='MomentumOptimizer').minimize(loss)
39 |
40 | init = tf.global_variables_initializer()
41 | # Launch the graph.
42 | with tf.Session() as sess:
43 | sess.run(init)
44 | tf.saved_model.simple_save(sess, r'NYCTaxi/model', inputs={'X': X, 'Y': Y}, outputs={'RegScores': RegScores} )
45 | ```
46 |
47 | Here,
48 |
49 | * `X` and `Y` are the input feature vector and label placeholders.
50 | * `W` and `b` are the parameters of the model.
51 | * `RegScores` is the predicted value. It is vector of length 2 in this case.
52 | * `learning_rate` is placeholder that is dynamically set during training from ML.Net
53 | * `MomentumOptimizer` is the name of optimization operator in the graph that will be used for training.
54 |
55 | Upon executing, this python script will create a checkpoint model directory called `NYCTaxi/model`.
56 |
57 | ## Training and Prediction with Multi-Output LR model in ML.Net
58 |
59 | The sample uses the dataset from [ML.Net's TaxiFarePrediction](https://github.com/dotnet/machinelearning-samples/tree/master/samples/csharp/getting-started/Regression_TaxiFarePrediction) getting-started tutorial. The tutorial explains how to use ML.Net for predicting single real-valued target variable i.e. predicting the `FareAmount`. However, this sample tries to predict `FareAmount` together with `TipAmount`.
60 |
61 | ### 1. Loading Data
62 | This sample loads 8 columns from the dataset instead of 7 in [ML.Net's TaxiFarePrediction](https://github.com/dotnet/machinelearning-samples/tree/master/samples/csharp/getting-started/Regression_TaxiFarePrediction) getting-started tutorial. The additional columns `TipAmount` is used as second target variable.
63 | ``` csharp
64 | string TrainDataPath = "NYCTaxi/train.csv";
65 | string TestDataPath = "NYCTaxi/test.csv";
66 |
67 | //Create ML Context with seed for repeteable/deterministic results
68 | MLContext mlContext = new MLContext(seed: 0);
69 |
70 | // STEP 1: Common data loading configuration
71 | TextLoader textLoader = mlContext.Data.CreateTextReader(new[]
72 | {
73 | new TextLoader.Column("VendorId", DataKind.Text, 0),
74 | new TextLoader.Column("RateCode", DataKind.Text, 1),
75 | new TextLoader.Column("PassengerCount", DataKind.R4, 2),
76 | new TextLoader.Column("TripTime", DataKind.R4, 3),
77 | new TextLoader.Column("TripDistance", DataKind.R4, 4),
78 | new TextLoader.Column("PaymentType", DataKind.Text, 5),
79 | new TextLoader.Column("FareAmount", DataKind.R4, 6),
80 | new TextLoader.Column("TipAmount", DataKind.R4, 7)
81 | },
82 | hasHeader: true,
83 | separatorChar: ','
84 | );
85 |
86 | IDataView baseTrainingDataView = textLoader.Read(TrainDataPath);
87 | IDataView testDataView = textLoader.Read(TestDataPath);
88 | ```
89 |
90 | ### 2. Building the Pipeline
91 | The major difference here is in building the pipeline where `TensorFlowEstimator` is used to estimate the model parameters created with TensorFlow script above.
92 | ``` csharp
93 | var dataProcessPipeline = mlContext.Transforms.Concatenate("Y", "FareAmount", "TipAmount")
94 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("VendorId", "VendorIdEncoded"))
95 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("RateCode", "RateCodeEncoded"))
96 | .Append(mlContext.Transforms.Categorical.OneHotEncoding("PaymentType", "PaymentTypeEncoded"))
97 | .Append(mlContext.Transforms.Normalize(inputName: "PassengerCount", mode: NormalizerMode.MeanVariance))
98 | .Append(mlContext.Transforms.Normalize(inputName: "TripTime", mode: NormalizerMode.MeanVariance))
99 | .Append(mlContext.Transforms.Normalize(inputName: "TripDistance", mode: NormalizerMode.MeanVariance))
100 | .Append(mlContext.Transforms.Concatenate("X", "VendorIdEncoded", "RateCodeEncoded", "PaymentTypeEncoded", "PassengerCount", "TripTime", "TripDistance"))
101 | .Append(new TensorFlowEstimator(mlContext, new TensorFlowTransform.Arguments()
102 | {
103 | ModelLocation = "NYCTaxi/model", // Model is created with this script: DeepLearningWithMLdotNet\NYCTaxiMultiOutputRegression\TF_MultiOutputLR.py
104 | InputColumns = new[] { "X" },
105 | OutputColumns = new[] { "RegScores" },
106 | LabelColumn = "Y",
107 | TensorFlowLabel = "Y",
108 | OptimizationOperation = "MomentumOptimizer",
109 | LossOperation = "Loss",
110 | Epoch = 10,
111 | LearningRateOperation = "learning_rate",
112 | LearningRate = 0.01f,
113 | BatchSize = 20,
114 | ReTrain = true
115 | }));
116 | ```
117 |
118 | In this ML.Net pipeline, the `FareAmount` and `TipAmount` are combined into a vector-valued column called `Y`. `TensorFlowEstimator` uses the `X` (input), `RegScores` (output), `Y` (Label) and optimization related operator names (`MomentumOptimizer`, `Loss`, `learning_rate` etc.) for retraining of model created with TensorFlow script above.
119 |
120 | ### 3. Predicting with Trained Model
121 | Once the model is trained with
122 | ```csharp
123 | var trainedModel = dataProcessPipeline.Fit(baseTrainingDataView);
124 | ```
125 | , it can be used for prediction as any other ML.Net model.
126 | ```csharp
127 | // Create prediction function and test prediction
128 | var predictFunction = trainedModel.CreatePredictionEngine(mlContext);
129 |
130 | var oneSample = new TaxiTrip()
131 | {
132 | VendorId = "CMT",
133 | RateCode = "1",
134 | PassengerCount = 2,
135 | TripTime = 1405,
136 | TripDistance = 10.3f,
137 | PaymentType = "CRD",
138 | FareAmount = 0, // To predict. Actual/Observed = 31.0
139 | TipAmount = 0 // To predict. Actual/Observed = 7.36
140 | };
141 |
142 | var prediction = predictFunction.Predict(oneSample);
143 | Console.WriteLine("[FareAmount, TipAmount] = [{0}]", string.Join(", ", prediction.RegScores));
144 | ```
145 | Note that, the `RegScores` (the output) is vector type instead of scalar type as used in [ML.Net's TaxiFarePrediction](https://github.com/dotnet/machinelearning-samples/tree/master/samples/csharp/getting-started/Regression_TaxiFarePrediction) getting-started tutorial.
146 | ```csharp
147 | public class TaxiTripFarePrediction
148 | {
149 | [VectorType(2)]
150 | public float[] RegScores; // This is vector because its a MultiOuput regression problem.
151 | }
152 | ```
153 |
154 | ### 4. Evaluating Model
155 |
156 | Currently, evaluation (i.e. computing metrics such as Root Mean Square Error (RMSE), log-loss etc.) on the test data does not work because ML.Net's regression evaluator does not work on vector-valued.
157 | ```csharp
158 | // The evaluation does not work. It requires the score to be scalar.
159 | // However, for multi-output regression its vector
160 | // var predicted = trainedModel.Transform(testDataView);
161 | // var metrics = mlContext.Regression.Evaluate(predicted, "Y", "RegScores");
162 | ```
--------------------------------------------------------------------------------