Skip to content

Commit 223d87f

Browse files
kevmalCESARDELATORRE
authored andcommitted
F# migrate to v1.0.0-preview (dotnet#354)
* F# - migrate bike sharing sample * F# - migrate spam sample * F# - migrate fraud sample * F# - migrate customer seg sample * F# - migrate iris classification sample * F# - migrate iris cluster sample * F# migrate sentiment sample * F# migrate taxi sample * rename solution * F# migrate tensorflow image classification sample * Label 'Count' column as 'Label' * print full trainer name * print absolute path * sentiment data update, some formatting updates * update tensorflow model * F# migrate issue labeler * update readme * update readme
1 parent d6d3302 commit 223d87f

File tree

38 files changed

+40274
-780
lines changed

38 files changed

+40274
-780
lines changed

samples/fsharp/Directory.Build.props

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project>
22

33
<PropertyGroup>
4-
<MicrosoftMLVersion>0.11.0</MicrosoftMLVersion>
4+
<MicrosoftMLVersion>1.0.0-preview</MicrosoftMLVersion>
55
</PropertyGroup>
66

77
</Project>

samples/fsharp/common/ConsoleHelper.fs

+16-17
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ module ConsoleHelper =
66
open Microsoft.ML.Data
77
//open Microsoft.ML.Api
88
open System.Reflection
9-
open Microsoft.Data.DataView
109

1110
let printPrediction prediction =
1211
printfn "*************************************************"
@@ -23,20 +22,20 @@ module ConsoleHelper =
2322
printfn "*************************************************"
2423
printfn "* Metrics for %s regression model " name
2524
printfn "*------------------------------------------------"
26-
printfn "* LossFn: %.2f" metrics.LossFn
25+
printfn "* LossFn: %.2f" metrics.LossFunction
2726
printfn "* R2 Score: %.2f" metrics.RSquared
28-
printfn "* Absolute loss: %.2f" metrics.L1
29-
printfn "* Squared loss: %.2f" metrics.L2
30-
printfn "* RMS loss: %.2f" metrics.Rms
27+
printfn "* Absolute loss: %.2f" metrics.MeanAbsoluteError
28+
printfn "* Squared loss: %.2f" metrics.MeanSquaredError
29+
printfn "* RMS loss: %.2f" metrics.RootMeanSquaredError
3130
printfn "*************************************************"
3231

3332
let printBinaryClassificationMetrics name (metrics : CalibratedBinaryClassificationMetrics) =
3433
printfn"************************************************************"
3534
printfn"* Metrics for %s binary classification model " name
3635
printfn"*-----------------------------------------------------------"
3736
printfn"* Accuracy: %.2f%%" (metrics.Accuracy * 100.)
38-
printfn"* Auc: %.2f%%" (metrics.Auc * 100.)
39-
printfn"* Auprc: %.2f%%" (metrics.Auprc * 100.)
37+
printfn"* Area Under Curve: %.2f%%" (metrics.AreaUnderRocCurve * 100.)
38+
printfn"* Area under Precision recall Curve: %.2f%%" (metrics.AreaUnderPrecisionRecallCurve * 100.)
4039
printfn"* F1Score: %.2f%%" (metrics.F1Score * 100.)
4140

4241
printfn"* LogLogg: %.2f%%" (metrics.LogLoss)
@@ -47,12 +46,12 @@ module ConsoleHelper =
4746
printfn"* NegativeRecall: %.2f" (metrics.NegativeRecall)
4847
printfn"************************************************************"
4948

50-
let printMultiClassClassificationMetrics name (metrics : MultiClassClassifierMetrics) =
49+
let printMultiClassClassificationMetrics name (metrics : MulticlassClassificationMetrics) =
5150
printfn "************************************************************"
5251
printfn "* Metrics for %s multi-class classification model " name
5352
printfn "*-----------------------------------------------------------"
54-
printfn " AccuracyMacro = %.4f, a value between 0 and 1, the closer to 1, the better" metrics.AccuracyMacro
55-
printfn " AccuracyMicro = %.4f, a value between 0 and 1, the closer to 1, the better" metrics.AccuracyMicro
53+
printfn " AccuracyMacro = %.4f, a value between 0 and 1, the closer to 1, the better" metrics.MacroAccuracy
54+
printfn " AccuracyMicro = %.4f, a value between 0 and 1, the closer to 1, the better" metrics.MacroAccuracy
5655
printfn " LogLoss = %.4f, the closer to 0, the better" metrics.LogLoss
5756
printfn " LogLoss for class 1 = %.4f, the closer to 0, the better" metrics.PerClassLogLoss.[0]
5857
printfn " LogLoss for class 2 = %.4f, the closer to 0, the better" metrics.PerClassLogLoss.[1]
@@ -70,16 +69,16 @@ module ConsoleHelper =
7069
let confidenceInterval95 = 1.96 * calculateStandardDeviation(values) / Math.Sqrt(float (values.Length-1));
7170
confidenceInterval95
7271

73-
let printMulticlassClassificationFoldsAverageMetrics algorithmName (crossValResults : (MultiClassClassifierMetrics * ITransformer * IDataView) array) =
72+
let printMulticlassClassificationFoldsAverageMetrics algorithmName (crossValResults : TrainCatalogBase.CrossValidationResult<MulticlassClassificationMetrics>[]) =
7473

75-
let metricsInMultipleFolds = crossValResults |> Array.map(fun (metrics, model, scoredTestData) -> metrics)
74+
let metricsInMultipleFolds = crossValResults |> Array.map(fun r -> r.Metrics)
7675

77-
let microAccuracyValues = metricsInMultipleFolds |> Array.map(fun m -> m.AccuracyMicro)
76+
let microAccuracyValues = metricsInMultipleFolds |> Array.map(fun m -> m.MicroAccuracy)
7877
let microAccuracyAverage = microAccuracyValues |> Array.average
7978
let microAccuraciesStdDeviation = calculateStandardDeviation microAccuracyValues
8079
let microAccuraciesConfidenceInterval95 = calculateConfidenceInterval95 microAccuracyValues
8180

82-
let macroAccuracyValues = metricsInMultipleFolds |> Array.map(fun m -> m.AccuracyMacro)
81+
let macroAccuracyValues = metricsInMultipleFolds |> Array.map(fun m -> m.MicroAccuracy)
8382
let macroAccuracyAverage = macroAccuracyValues |> Array.average
8483
let macroAccuraciesStdDeviation = calculateStandardDeviation macroAccuracyValues
8584
let macroAccuraciesConfidenceInterval95 = calculateConfidenceInterval95 macroAccuracyValues
@@ -107,8 +106,8 @@ module ConsoleHelper =
107106
printfn "*************************************************"
108107
printfn "* Metrics for %s clustering model " name
109108
printfn "*------------------------------------------------"
110-
printfn "* AvgMinScore: %.15f" metrics.AvgMinScore
111-
printfn "* DBI is: %.15f" metrics.Dbi
109+
printfn "* Average Distance: %.15f" metrics.AverageDistance
110+
printfn "* Davies Bouldin Index is: %.15f" metrics.DaviesBouldinIndex
112111
printfn "*************************************************"
113112

114113
let consoleWriteHeader line =
@@ -157,7 +156,7 @@ module ConsoleHelper =
157156

158157
// Extract the 'Features' column.
159158
let someColumnData =
160-
transformedData.GetColumn<float32[]>(mlContext, columnName)
159+
transformedData.GetColumn<float32[]>(columnName)
161160
|> Seq.take numberOfRows
162161
|> Seq.toList
163162

samples/fsharp/end-to-end-apps/MulticlassClassification-GitHubLabeler/GitHubLabeler/GitHubLabelerConsoleApp/GitHubLabeler.fsproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
<ItemGroup>
2424
<PackageReference Include="Microsoft.Extensions.Configuration" Version="2.1.1" />
2525
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="2.1.1" />
26-
<PackageReference Include="Microsoft.ML" Version="0.11.0" />
26+
<PackageReference Include="Microsoft.ML" Version="$(MicrosoftMLVersion)" />
2727
<PackageReference Include="Octokit" Version="0.29.0" />
2828
</ItemGroup>
2929

samples/fsharp/end-to-end-apps/MulticlassClassification-GitHubLabeler/GitHubLabeler/GitHubLabelerConsoleApp/Labeler.fs

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,14 @@ type GitHubClientFacade =
3737
let initialise modelPath repoOwner repoName accessToken =
3838
let mlContext = MLContext(seed = Nullable 1)
3939

40-
let trainedModel =
40+
let trainedModel, inputSchema =
4141
use f = IO.File.OpenRead(modelPath)
4242
mlContext.Model.Load(f)
4343

4444
let productInformation = ProductHeaderValue "MLGitHubLabeler"
4545
let client = GitHubClient(productInformation, Credentials = Credentials(accessToken))
4646
let gitHubClient = GitHubClientFacade.init client repoOwner repoName
47-
let predictionEngine = trainedModel.CreatePredictionEngine<GitHubIssue, GitHubIssuePrediction>(mlContext)
47+
let predictionEngine = mlContext.Model.CreatePredictionEngine<GitHubIssue, GitHubIssuePrediction>(trainedModel)
4848
predictionEngine, gitHubClient
4949

5050
type FullPrediction =

samples/fsharp/end-to-end-apps/MulticlassClassification-GitHubLabeler/GitHubLabeler/GitHubLabelerConsoleApp/Program.fs

+7-13
Original file line numberDiff line numberDiff line change
@@ -76,17 +76,11 @@ let buildAndTrainModel dataSetLocation modelPath selectedStrategy =
7676
let trainer =
7777
match selectedStrategy with
7878
| MyTrainerStrategy.SdcaMultiClassTrainer ->
79-
mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
80-
DefaultColumnNames.Label,
81-
DefaultColumnNames.Features)
82-
|> downcastPipeline
79+
mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy( "Label", "Features") |> downcastPipeline
8380

8481
| MyTrainerStrategy.OVAAveragedPerceptronTrainer ->
8582
let averagedPerceptronBinaryTrainer =
86-
mlContext.BinaryClassification.Trainers.AveragedPerceptron(
87-
DefaultColumnNames.Label,
88-
DefaultColumnNames.Features,
89-
numIterations = 10)
83+
mlContext.BinaryClassification.Trainers.AveragedPerceptron( "Label", "Features", numberOfIterations = 10)
9084

9185
let downcastTrainer (x : ITrainerEstimator<_,_>) =
9286
match x with
@@ -117,16 +111,16 @@ let buildAndTrainModel dataSetLocation modelPath selectedStrategy =
117111
let watchCrossValTime = System.Diagnostics.Stopwatch.StartNew()
118112

119113
let crossValidationResults =
120-
mlContext.MulticlassClassification.CrossValidate(data = trainingDataView, estimator = downcastPipeline modelBuilder, numFolds = 6, labelColumn = DefaultColumnNames.Label)
114+
mlContext.MulticlassClassification.CrossValidate(data = trainingDataView, estimator = downcastPipeline modelBuilder, numberOfFolds = 6, labelColumnName = "Label")
121115

122116

123117
//Stop measuring time
124118
watchCrossValTime.Stop()
125119
printfn "Time Cross-Validating: %d miliSecs" watchCrossValTime.ElapsedMilliseconds
126120

127121
crossValidationResults
128-
|> Array.map (fun x -> x.Metrics, x.Model, x.ScoredHoldOutSet) //convert struct tuple for print function
129-
|> Common.ConsoleHelper.printMulticlassClassificationFoldsAverageMetrics (trainer.ToString())
122+
|> Seq.toArray
123+
|> Common.ConsoleHelper.printMulticlassClassificationFoldsAverageMetrics (trainer.ToString())
130124

131125
// STEP 5: Train the model fitting to the DataSet
132126
printfn "=============== Training the model ==============="
@@ -140,7 +134,7 @@ let buildAndTrainModel dataSetLocation modelPath selectedStrategy =
140134
Title = "WebSockets communication is slow in my machine"
141135
Description = "The WebSockets communication used under the covers by SignalR looks like is going slow in my development machine.."
142136
}
143-
let predEngine = trainedModel.CreatePredictionEngine<GitHubIssue, GitHubIssuePrediction>(mlContext)
137+
let predEngine = mlContext.Model.CreatePredictionEngine<GitHubIssue, GitHubIssuePrediction>(trainedModel)
144138
let prediction = predEngine.Predict(issue)
145139

146140
printfn "=============== Single Prediction just-trained-model - Result: %s ===============" prediction.Area
@@ -149,7 +143,7 @@ let buildAndTrainModel dataSetLocation modelPath selectedStrategy =
149143
printfn "=============== Saving the model to a file ==============="
150144
do
151145
use f = File.Open(modelPath,FileMode.Create)
152-
mlContext.Model.Save(trainedModel, f)
146+
mlContext.Model.Save(trainedModel, trainingDataView.Schema, f)
153147

154148
Common.ConsoleHelper.consoleWriteHeader "Training process finalized"
155149

samples/fsharp/end-to-end-apps/MulticlassClassification-GitHubLabeler/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
| ML.NET version | API type | Status | App Type | Data sources | Scenario | ML Task | Algorithms |
44
|----------------|-------------------|-------------------------------|-------------|-----------|---------------------|---------------------------|-----------------------------|
5-
| v0.11 | Dynamic API | Up-to-date | Console app | .csv file and GitHub issues | Issues classification | Multi-class classification | SDCA multi-class classifier |
5+
| v1.0.0-preview | Dynamic API | Up-to-date | Console app | .csv file and GitHub issues | Issues classification | Multi-class classification | SDCA multi-class classifier |
66

77

88
This is a simple prototype application to demonstrate how to use [ML.NET](https://www.nuget.org/packages/Microsoft.ML/) APIs. The main focus is on creating, training, and using ML (Machine Learning) model that is implemented in Predictor.cs class.

samples/fsharp/getting-started/BinaryClassification_CreditCardFraudDetection/CreditCardFraudDetection/CreditCardFraudDetection.fsproj

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
<ItemGroup>
1313
<PackageReference Include="Microsoft.ML" Version="$(MicrosoftMLVersion)" />
14+
<PackageReference Include="Microsoft.ML.FastTree" Version="1.0.0-preview" />
1415
</ItemGroup>
1516

1617
</Project>

samples/fsharp/getting-started/BinaryClassification_CreditCardFraudDetection/CreditCardFraudDetection/Program.fs

+10-11
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ let main _ =
9696
let data = mlContext.Data.LoadFromTextFile<TransactionObservation>(inputFile, separatorChar = ',', hasHeader = true, allowQuoting = true)
9797

9898
let trainData, testData =
99-
let y = mlContext.BinaryClassification.TrainTestSplit(data, 0.2, seed = Nullable 1u)
99+
let y = mlContext.Data.TrainTestSplit(data, 0.2, seed = Nullable 1)
100100
y.TrainSet, y.TestSet
101101

102102
// save test split
@@ -132,24 +132,23 @@ let main _ =
132132

133133
let pipeline =
134134
EstimatorChain()
135-
|> fun x -> x.Append(mlContext.Transforms.Concatenate(DefaultColumnNames.Features, featureColumnNames))
135+
|> fun x -> x.Append(mlContext.Transforms.Concatenate("Features", featureColumnNames))
136136
|> fun x -> x.Append(mlContext.Transforms.DropColumns [|"Time"|])
137137
|> fun x ->
138138
x.Append (
139-
mlContext.Transforms.Normalize (
139+
mlContext.Transforms.NormalizeMeanVariance (
140140
"FeaturesNormalizedByMeanVar",
141-
"Features",
142-
NormalizingEstimator.NormalizerMode.MeanVariance
141+
"Features"
143142
)
144143
)
145144
|> fun x ->
146145
x.Append (
147146
mlContext.BinaryClassification.Trainers.FastTree(
148147
"Label",
149148
"FeaturesNormalizedByMeanVar",
150-
numLeaves = 20,
151-
numTrees = 100,
152-
minDatapointsInLeaves = 10,
149+
numberOfLeaves = 20,
150+
numberOfTrees = 100,
151+
minimumExampleCountPerLeaf = 10,
153152
learningRate = 0.2
154153
)
155154
)
@@ -163,18 +162,18 @@ let main _ =
163162
printfn "Saving model to file"
164163
let _ =
165164
use fs = new FileStream (modelFile, FileMode.Create, FileAccess.Write, FileShare.Write)
166-
mlContext.Model.Save(model, fs)
165+
mlContext.Model.Save(model, trainData.Schema, fs)
167166

168167
(*
169168
Read the model and test data from file,
170169
and make predictions
171170
*)
172171

173172
printfn "Reading model and test data"
174-
let modelEvaluator =
173+
let modelEvaluator, inputSchema =
175174
use file = File.OpenRead modelFile
176175
mlContext.Model.Load(file)
177-
let predictionEngine = modelEvaluator.CreatePredictionEngine<TransactionObservation, TransactionFraudPrediction>(mlContext)
176+
let predictionEngine = mlContext.Model.CreatePredictionEngine<TransactionObservation, TransactionFraudPrediction>(modelEvaluator)
178177

179178
let testData = mlContext.Data.LoadFromTextFile<TransactionObservation>(testFile, hasHeader = true, separatorChar = ',')
180179

samples/fsharp/getting-started/BinaryClassification_CreditCardFraudDetection/Readme.md

+15-12
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
| ML.NET version | API type | Status | App Type | Data type | Scenario | ML Task | Algorithms |
44
|----------------|-------------------|-------------------------------|-------------|-----------|---------------------|---------------------------|-----------------------------|
5-
| v0.11 | Dynamic API | Up-to-date | Two console apps | .csv file | Fraud Detection | Two-class classification | FastTree Binary Classification |
5+
| v1.0.0-preview | Dynamic API | Up-to-date | Two console apps | .csv file | Fraud Detection | Two-class classification | FastTree Binary Classification |
66

77
In this introductory sample, you'll see how to use ML.NET to predict a credit card fraud. In the world of machine learning, this type of prediction is known as binary classification.
88

@@ -69,38 +69,40 @@ The initial code is similar to the following:
6969
[...]
7070
7171
let trainData, testData =
72-
let y = mlContext.BinaryClassification.TrainTestSplit(data, 0.2, seed = Nullable 1u)
73-
y.TrainSet, y.TestSet
72+
printfn "Reading train and test data"
73+
let trainData = mlContext.Data.LoadFromTextFile<TransactionObservation>(trainFile, separatorChar = ',', hasHeader = true)
74+
let testData = mlContext.Data.LoadFromTextFile<TransactionObservation>(testFile, separatorChar = ',', hasHeader = true)
75+
trainData, testData
7476
7577
[...]
7678
7779
let featureColumnNames =
7880
trainData.Schema
7981
|> Seq.map (fun column -> column.Name)
82+
|> Seq.filter (fun name -> name <> "Time")
8083
|> Seq.filter (fun name -> name <> "Label")
81-
|> Seq.filter (fun name -> name <> "StratificationColumn")
84+
|> Seq.filter (fun name -> name <> "IdPreservationColumn")
8285
|> Seq.toArray
8386
8487
let pipeline =
8588
EstimatorChain()
86-
|> fun x -> x.Append(mlContext.Transforms.Concatenate(DefaultColumnNames.Features, featureColumnNames))
89+
|> fun x -> x.Append(mlContext.Transforms.Concatenate("Features", featureColumnNames))
8790
|> fun x -> x.Append(mlContext.Transforms.DropColumns [|"Time"|])
8891
|> fun x ->
8992
x.Append (
90-
mlContext.Transforms.Normalize (
93+
mlContext.Transforms.NormalizeMeanVariance (
9194
"FeaturesNormalizedByMeanVar",
92-
"Features",
93-
NormalizingEstimator.NormalizerMode.MeanVariance
95+
"Features"
9496
)
9597
)
9698
|> fun x ->
9799
x.Append (
98100
mlContext.BinaryClassification.Trainers.FastTree(
99101
"Label",
100102
"FeaturesNormalizedByMeanVar",
101-
numLeaves = 20,
102-
numTrees = 100,
103-
minDatapointsInLeaves = 10,
103+
numberOfLeaves = 20,
104+
numberOfTrees = 100,
105+
minimumExampleCountPerLeaf = 10,
104106
learningRate = 0.2
105107
)
106108
)
@@ -130,12 +132,13 @@ After the model is trained, you can use the `Predict()` API to predict if a tran
130132

131133
`````fsharp
132134
printfn "Making predictions"
133-
mlContext.CreateEnumerable<TransactionObservation>(testData, reuseRowObject = false)
135+
mlContext.Data.CreateEnumerable<TransactionObservation>(testData, reuseRowObject = false)
134136
|> Seq.filter (fun x -> x.Label = true)
135137
// use 5 observations from the test data
136138
|> Seq.take 5
137139
|> Seq.iter (fun testData ->
138140
let prediction = predictionEngine.Predict testData
139141
printfn "%A" prediction
140142
printfn "------"
143+
)
141144
`````

0 commit comments

Comments
 (0)