Skip to content

Commit 7cb26f3

Browse files
colbylwilliamsCESARDELATORRE
authored andcommitted
RandomizedPCA Anomaly Detection fraud detection sample (dotnet#589)
* empty console app creation * README and Third Party Notices copied from BinaryClassification_CreditCardFraudDetection * add new phase for solution to .vsts-dotnet-ci.yml * add project references to .Common and Microsoft.ML Nuget * Copy DataModels from BinaryClassification_CreditCardFraudDetection * initial go at sample... lot's of copying from BinaryClassification_CreditCardFraudDetection * gitignores * console helper for anomaly detectcion * cleanup and tweak normalization * sln format * cleanup code comments and update README * Reference PredictedLabel issue (and some cleanup) * Update samples/csharp/getting-started/AnomalyDetection_CreditCardFraudDetection/Readme.md Co-Authored-By: Brigit Murtaugh <[email protected]> * update readme to v1.2.0
1 parent e8e3ac9 commit 7cb26f3

File tree

22 files changed

+1410
-1
lines changed

22 files changed

+1410
-1
lines changed

.vsts-dotnet-ci.yml

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ phases:
88
inputs:
99
projects: '.\samples\csharp\getting-started\BinaryClassification_CreditCardFraudDetection\CreditCardFraudDetection.sln'
1010

11+
- phase: CreditCardFraudDetection2
12+
queue: Hosted VS2017
13+
steps:
14+
- task: DotNetCoreCLI@2
15+
displayName: Build CreditCardFraudDetection (AnomalyDetection)
16+
inputs:
17+
projects: '.\samples\csharp\getting-started\AnomalyDetection_CreditCardFraudDetection\CreditCardFraudDetection.sln'
18+
1119
- phase: SentimentAnalysis
1220
queue: Hosted VS2017
1321
steps:

samples/csharp/common/ConsoleHelper.cs

+10
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,16 @@ public static void PrintBinaryClassificationMetrics(string name, CalibratedBinar
5656
Console.WriteLine($"************************************************************");
5757
}
5858

59+
public static void PrintAnomalyDetectionMetrics(string name, AnomalyDetectionMetrics metrics)
60+
{
61+
Console.WriteLine($"************************************************************");
62+
Console.WriteLine($"* Metrics for {name} anomaly detection model ");
63+
Console.WriteLine($"*-----------------------------------------------------------");
64+
Console.WriteLine($"* Area Under Curve: {metrics.AreaUnderRocCurve:P2}");
65+
Console.WriteLine($"* Detection rate at false positive count: {metrics.DetectionRateAtFalsePositiveCount}");
66+
Console.WriteLine($"************************************************************");
67+
}
68+
5969
public static void PrintMultiClassClassificationMetrics(string name, MulticlassClassificationMetrics metrics)
6070
{
6171
Console.WriteLine($"************************************************************");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>netstandard2.0</TargetFramework>
5+
</PropertyGroup>
6+
7+
<ItemGroup>
8+
<Compile Remove="Assets\**" />
9+
<EmbeddedResource Remove="Assets\**" />
10+
<None Remove="Assets\**" />
11+
</ItemGroup>
12+
13+
<ItemGroup>
14+
<PackageReference Include="Microsoft.ML" Version="$(MicrosoftMLVersion)" />
15+
</ItemGroup>
16+
17+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
using System;
2+
3+
namespace CreditCardFraudDetection.Common.DataModels
4+
{
5+
public class TransactionFraudPrediction : IModelEntity
6+
{
7+
public float Label;
8+
9+
/// <summary>
10+
/// The non-negative, unbounded score that was calculated by the anomaly detection model.
11+
/// Fraudulent transactions (Anomalies) will have higher scores than normal transactions
12+
/// </summary>
13+
public float Score;
14+
15+
/// <summary>
16+
/// The predicted label, based on the score. A value of true indicates an anomaly.
17+
/// </summary>
18+
public bool PredictedLabel;
19+
20+
public void PrintToConsole()
21+
{
22+
// There is currently an issue where PredictedLabel is always set to true
23+
// Due to this issue, we'll manually choose the treshold that will indicate an anomaly
24+
// Issue: https://github.com/dotnet/machinelearning/issues/3990
25+
Console.WriteLine($"Predicted Label: {Score > 0.2f} (Score: {Score})");
26+
27+
//Console.WriteLine($"Predicted Label: {PredictedLabel} (Score: {Score})");
28+
}
29+
}
30+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
using System;
2+
3+
using Microsoft.ML.Data;
4+
5+
namespace CreditCardFraudDetection.Common.DataModels
6+
{
7+
public interface IModelEntity {
8+
void PrintToConsole();
9+
}
10+
11+
public class TransactionObservation : IModelEntity
12+
{
13+
[LoadColumn(0)]
14+
public float Time;
15+
16+
[LoadColumn(1)]
17+
public float V1;
18+
19+
[LoadColumn(2)]
20+
public float V2;
21+
22+
[LoadColumn(3)]
23+
public float V3;
24+
25+
[LoadColumn(4)]
26+
public float V4;
27+
28+
[LoadColumn(5)]
29+
public float V5;
30+
31+
[LoadColumn(6)]
32+
public float V6;
33+
34+
[LoadColumn(7)]
35+
public float V7;
36+
37+
[LoadColumn(8)]
38+
public float V8;
39+
40+
[LoadColumn(9)]
41+
public float V9;
42+
43+
[LoadColumn(10)]
44+
public float V10;
45+
46+
[LoadColumn(11)]
47+
public float V11;
48+
49+
[LoadColumn(12)]
50+
public float V12;
51+
52+
[LoadColumn(13)]
53+
public float V13;
54+
55+
[LoadColumn(14)]
56+
public float V14;
57+
58+
[LoadColumn(15)]
59+
public float V15;
60+
61+
[LoadColumn(16)]
62+
public float V16;
63+
64+
[LoadColumn(17)]
65+
public float V17;
66+
67+
[LoadColumn(18)]
68+
public float V18;
69+
70+
[LoadColumn(19)]
71+
public float V19;
72+
73+
[LoadColumn(20)]
74+
public float V20;
75+
76+
[LoadColumn(21)]
77+
public float V21;
78+
79+
[LoadColumn(22)]
80+
public float V22;
81+
82+
[LoadColumn(23)]
83+
public float V23;
84+
85+
[LoadColumn(24)]
86+
public float V24;
87+
88+
[LoadColumn(25)]
89+
public float V25;
90+
91+
[LoadColumn(26)]
92+
public float V26;
93+
94+
[LoadColumn(27)]
95+
public float V27;
96+
97+
[LoadColumn(28)]
98+
public float V28;
99+
100+
[LoadColumn(29)]
101+
public float Amount;
102+
103+
[LoadColumn(30)]
104+
public float Label;
105+
106+
public void PrintToConsole() {
107+
Console.WriteLine($"Label: {Label}");
108+
Console.WriteLine($"Features: [V1] {V1} [V2] {V2} [V3] {V3} ... [V28] {V28} Amount: {Amount}");
109+
}
110+
}
111+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
using System.IO;
2+
using System.Linq;
3+
4+
using Microsoft.ML.Data;
5+
6+
namespace CreditCardFraudDetection.Common
7+
{
8+
public static class LocalConsoleHelper
9+
{
10+
public static string GetAssetsPath(params string[] paths)
11+
{
12+
FileInfo _dataRoot = new FileInfo(typeof(LocalConsoleHelper).Assembly.Location);
13+
14+
if (paths == null || paths.Length == 0)
15+
{
16+
return null;
17+
}
18+
19+
return Path.Combine(paths.Prepend(_dataRoot.Directory.FullName).ToArray());
20+
}
21+
22+
public static string DeleteAssets(params string[] paths)
23+
{
24+
var location = GetAssetsPath(paths);
25+
26+
if (!string.IsNullOrWhiteSpace(location) && File.Exists(location))
27+
{
28+
File.Delete(location);
29+
}
30+
31+
return location;
32+
}
33+
}
34+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>netcoreapp2.1</TargetFramework>
6+
<LangVersion>7.2</LangVersion>
7+
</PropertyGroup>
8+
9+
<ItemGroup>
10+
<Folder Include="assets\input\" />
11+
</ItemGroup>
12+
13+
<ItemGroup>
14+
<None Remove="assets\input\.gitignore" />
15+
</ItemGroup>
16+
17+
<ItemGroup>
18+
<PackageReference Include="Microsoft.ML" Version="$(MicrosoftMLVersion)" />
19+
</ItemGroup>
20+
21+
<ItemGroup>
22+
<ProjectReference Include="..\CreditCardFraudDetection.Common\CreditCardFraudDetection.Common.csproj" />
23+
</ItemGroup>
24+
25+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
using System;
2+
using System.Linq;
3+
4+
using Microsoft.ML;
5+
6+
using CreditCardFraudDetection.Common.DataModels;
7+
8+
namespace CreditCardFraudDetection.Predictor
9+
{
10+
public class Predictor
11+
{
12+
private readonly string _modelfile;
13+
private readonly string _dasetFile;
14+
15+
public Predictor(string modelfile, string dasetFile)
16+
{
17+
_modelfile = modelfile ?? throw new ArgumentNullException(nameof(modelfile));
18+
_dasetFile = dasetFile ?? throw new ArgumentNullException(nameof(dasetFile));
19+
}
20+
21+
22+
public void RunMultiplePredictions(int numberOfPredictions)
23+
{
24+
var mlContext = new MLContext();
25+
26+
// Load data as input for predictions
27+
IDataView inputDataForPredictions = mlContext.Data.LoadFromTextFile<TransactionObservation>(_dasetFile, separatorChar: ',', hasHeader: true);
28+
29+
Console.WriteLine($"Predictions from saved model:");
30+
31+
ITransformer model = mlContext.Model.Load(_modelfile, out var inputSchema);
32+
33+
var predictionEngine = mlContext.Model.CreatePredictionEngine<TransactionObservation, TransactionFraudPrediction>(model);
34+
35+
Console.WriteLine($"\n \n Test {numberOfPredictions} transactions, from the test datasource, that should be predicted as fraud (true):");
36+
37+
mlContext.Data.CreateEnumerable<TransactionObservation>(inputDataForPredictions, reuseRowObject: false)
38+
.Where(x => x.Label > 0)
39+
.Take(numberOfPredictions)
40+
.Select(testData => testData)
41+
.ToList()
42+
.ForEach(testData =>
43+
{
44+
Console.WriteLine($"--- Transaction ---");
45+
testData.PrintToConsole();
46+
predictionEngine.Predict(testData).PrintToConsole();
47+
Console.WriteLine($"-------------------");
48+
});
49+
50+
51+
Console.WriteLine($"\n \n Test {numberOfPredictions} transactions, from the test datasource, that should NOT be predicted as fraud (false):");
52+
53+
mlContext.Data.CreateEnumerable<TransactionObservation>(inputDataForPredictions, reuseRowObject: false)
54+
.Where(x => x.Label < 1)
55+
.Take(numberOfPredictions)
56+
.ToList()
57+
.ForEach(testData =>
58+
{
59+
Console.WriteLine($"--- Transaction ---");
60+
testData.PrintToConsole();
61+
predictionEngine.Predict(testData).PrintToConsole();
62+
Console.WriteLine($"-------------------");
63+
});
64+
}
65+
}
66+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using System;
2+
using System.IO;
3+
4+
using CreditCardFraudDetection.Common;
5+
6+
namespace CreditCardFraudDetection.Predictor
7+
{
8+
class Program
9+
{
10+
static void Main(string[] args)
11+
{
12+
string assetsPath = GetAbsolutePath(@"../../../assets");
13+
string trainOutput = GetAbsolutePath(@"../../../../CreditCardFraudDetection.Trainer/assets/output");
14+
15+
CopyModelAndDatasetFromTrainingProject(trainOutput, assetsPath);
16+
17+
var inputDatasetForPredictions = Path.Combine(assetsPath, "input", "testData.csv");
18+
var modelFilePath = Path.Combine(assetsPath, "input", "randomizedPca.zip");
19+
20+
// Create model predictor to perform a few predictions
21+
var modelPredictor = new Predictor(modelFilePath, inputDatasetForPredictions);
22+
23+
modelPredictor.RunMultiplePredictions(numberOfPredictions: 5);
24+
25+
Console.WriteLine("=============== Press any key ===============");
26+
Console.ReadKey();
27+
}
28+
29+
30+
public static void CopyModelAndDatasetFromTrainingProject(string trainOutput, string assetsPath)
31+
{
32+
if (!File.Exists(Path.Combine(trainOutput, "testData.csv")) ||
33+
!File.Exists(Path.Combine(trainOutput, "randomizedPca.zip")))
34+
{
35+
Console.WriteLine("***** YOU NEED TO RUN THE TRAINING PROJECT FIRST *****");
36+
Console.WriteLine("=============== Press any key ===============");
37+
Console.ReadKey();
38+
Environment.Exit(0);
39+
}
40+
41+
// Copy files from train output
42+
Directory.CreateDirectory(assetsPath);
43+
44+
foreach (var file in Directory.GetFiles(trainOutput))
45+
{
46+
var fileDestination = Path.Combine(Path.Combine(assetsPath, "input"), Path.GetFileName(file));
47+
48+
if (File.Exists(fileDestination))
49+
{
50+
LocalConsoleHelper.DeleteAssets(fileDestination);
51+
}
52+
53+
File.Copy(file, Path.Combine(Path.Combine(assetsPath, "input"), Path.GetFileName(file)));
54+
}
55+
}
56+
57+
58+
public static string GetAbsolutePath(string relativePath)
59+
{
60+
FileInfo _dataRoot = new FileInfo(typeof(Program).Assembly.Location);
61+
62+
string assemblyFolderPath = _dataRoot.Directory.FullName;
63+
64+
string fullPath = Path.Combine(assemblyFolderPath, relativePath);
65+
66+
return fullPath;
67+
}
68+
}
69+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
!.gitignore
2+
3+
*.csv

0 commit comments

Comments
 (0)