-
Notifications
You must be signed in to change notification settings - Fork 329
Implement ML Features #381. SQLTransformer class and testcase #781
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
8629ba2
Implement ML Features #381. SQLTransformer class and testcase
27dd94b
Corrected the Test cases to remove issue reported in automated test.
fcc2840
Added TransformSchema and GetStatement in SQLTransformer class
7ca85ec
Merge branch 'master' into SQLTransformer
ramanathanv 4fcda89
Corrected the implementation of TransformSchema method
f7b31ce
1. Implemented Testcase for GetStatement(), 2. Corrected formatting s…
63dee79
Merge branch 'master' into SQLTransformer
ramanathanv dedda2c
Order of Assert comparison corrected
3f5a620
Update src/csharp/Microsoft.Spark/ML/Feature/SQLTransformer.cs
ramanathanv 00cc0d6
Update src/csharp/Microsoft.Spark/ML/Feature/SQLTransformer.cs
ramanathanv 0f5870c
Update src/csharp/Microsoft.Spark/ML/Feature/SQLTransformer.cs
ramanathanv f8d8e26
Update src/csharp/Microsoft.Spark/ML/Feature/SQLTransformer.cs
ramanathanv e3364eb
Update src/csharp/Microsoft.Spark/ML/Feature/SQLTransformer.cs
ramanathanv b5c8560
Added method summary for TestSQLTransformer method
03f72f5
Merge branch 'master' into SQLTransformer
ramanathanv 8e33424
Merge branch 'master' into SQLTransformer
imback82 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
73 changes: 73 additions & 0 deletions
73
src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/SQLTransformerTests.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System.Collections.Generic; | ||
using System.IO; | ||
using Microsoft.Spark.ML.Feature; | ||
using Microsoft.Spark.Sql; | ||
using Microsoft.Spark.Sql.Types; | ||
using Microsoft.Spark.UnitTest.TestUtils; | ||
using Xunit; | ||
|
||
namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature | ||
{ | ||
[Collection("Spark E2E Tests")] | ||
public class SQLTransformerTests : FeatureBaseTests<SQLTransformer> | ||
{ | ||
private readonly SparkSession _spark; | ||
|
||
public SQLTransformerTests(SparkFixture fixture) : base(fixture) | ||
{ | ||
_spark = fixture.Spark; | ||
} | ||
|
||
/// <summary> | ||
/// Create a <see cref="DataFrame"/>, create a <see cref="SQLTransformer"/> and test the | ||
/// available methods. | ||
/// </summary> | ||
[Fact] | ||
public void TestSQLTransformer() | ||
{ | ||
DataFrame input = _spark.CreateDataFrame( | ||
new List<GenericRow> | ||
{ | ||
new GenericRow(new object[] { 0, 1.0, 3.0 }), | ||
new GenericRow(new object[] { 2, 2.0, 5.0 }) | ||
}, | ||
new StructType(new List<StructField> | ||
{ | ||
new StructField("id", new IntegerType()), | ||
new StructField("v1", new DoubleType()), | ||
new StructField("v2", new DoubleType()) | ||
})); | ||
|
||
string expectedUid = "theUid"; | ||
string inputStatement = "SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__"; | ||
|
||
SQLTransformer sqlTransformer = new SQLTransformer(expectedUid) | ||
.SetStatement(inputStatement); | ||
|
||
string outputStatement = sqlTransformer.GetStatement(); | ||
|
||
DataFrame output = sqlTransformer.Transform(input); | ||
StructType outputSchema = sqlTransformer.TransformSchema(input.Schema()); | ||
|
||
Assert.Contains(output.Schema().Fields, (f => f.Name == "v3")); | ||
Assert.Contains(output.Schema().Fields, (f => f.Name == "v4")); | ||
Assert.Contains(outputSchema.Fields, (f => f.Name == "v3")); | ||
Assert.Contains(outputSchema.Fields, (f => f.Name == "v4")); | ||
Assert.Equal(inputStatement, outputStatement); | ||
|
||
using (var tempDirectory = new TemporaryDirectory()) | ||
{ | ||
string savePath = Path.Join(tempDirectory.Path, "SQLTransformer"); | ||
sqlTransformer.Save(savePath); | ||
|
||
SQLTransformer loadedsqlTransformer = SQLTransformer.Load(savePath); | ||
Assert.Equal(sqlTransformer.Uid(), loadedsqlTransformer.Uid()); | ||
} | ||
Assert.Equal(expectedUid, sqlTransformer.Uid()); | ||
} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using Microsoft.Spark.Interop; | ||
using Microsoft.Spark.Interop.Ipc; | ||
using Microsoft.Spark.Sql; | ||
using Microsoft.Spark.Sql.Types; | ||
|
||
namespace Microsoft.Spark.ML.Feature | ||
{ | ||
/// <summary> | ||
/// <see cref="SQLTransformer"/> implements the transformations which are defined by SQL statement. | ||
/// </summary> | ||
public class SQLTransformer : FeatureBase<SQLTransformer>, IJvmObjectReferenceProvider | ||
{ | ||
private static readonly string s_sqlTransformerClassName = | ||
"org.apache.spark.ml.feature.SQLTransformer"; | ||
|
||
/// <summary> | ||
/// Create a <see cref="SQLTransformer"/> without any parameters. | ||
/// </summary> | ||
public SQLTransformer() : base(s_sqlTransformerClassName) | ||
{ | ||
} | ||
|
||
/// <summary> | ||
/// Create a <see cref="SQLTransformer"/> with a UID that is used to give the | ||
/// <see cref="SQLTransformer"/> a unique ID. | ||
/// </summary> | ||
/// <param name="uid">An immutable unique ID for the object and its derivatives.</param> | ||
public SQLTransformer(string uid) : base(s_sqlTransformerClassName, uid) | ||
{ | ||
} | ||
|
||
internal SQLTransformer(JvmObjectReference jvmObject) : base(jvmObject) | ||
{ | ||
} | ||
|
||
JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject; | ||
|
||
/// <summary> | ||
/// Executes the <see cref="SQLTransformer"/> and transforms the DataFrame to include the new | ||
/// column. | ||
/// </summary> | ||
/// <param name="source">The DataFrame to transform</param> | ||
/// <returns> | ||
/// New <see cref="DataFrame"/> object with the source <see cref="DataFrame"/> transformed. | ||
/// </returns> | ||
public DataFrame Transform(DataFrame source) => | ||
ramanathanv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", source)); | ||
|
||
/// <summary> | ||
/// Executes the <see cref="SQLTransformer"/> and transforms the schema. | ||
/// </summary> | ||
/// <param name="value">The Schema to be transformed</param> | ||
/// <returns> | ||
/// New <see cref="StructType"/> object with the schema <see cref="StructType"/> transformed. | ||
/// </returns> | ||
public StructType TransformSchema(StructType value) => | ||
new StructType( | ||
(JvmObjectReference)_jvmObject.Invoke( | ||
"transformSchema", | ||
DataType.FromJson(_jvmObject.Jvm, value.Json))); | ||
|
||
/// <summary> | ||
/// Gets the statement. | ||
/// </summary> | ||
/// <returns>Statement</returns> | ||
public string GetStatement() => (string)_jvmObject.Invoke("getStatement"); | ||
|
||
/// <summary> | ||
/// Sets the statement to <see cref="SQLTransformer"/>. | ||
/// </summary> | ||
/// <param name="statement">SQL Statement</param> | ||
/// <returns> | ||
/// <see cref="SQLTransformer"/> with the statement set. | ||
/// </returns> | ||
public SQLTransformer SetStatement(string statement) => | ||
ramanathanv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
WrapAsSQLTransformer((JvmObjectReference)_jvmObject.Invoke("setStatement", statement)); | ||
|
||
/// <summary> | ||
/// Loads the <see cref="SQLTransformer"/> that was previously saved using Save. | ||
/// </summary> | ||
/// <param name="path">The path the previous <see cref="SQLTransformer"/> was saved to</param> | ||
/// <returns>New <see cref="SQLTransformer"/> object, loaded from path</returns> | ||
public static SQLTransformer Load(string path) => | ||
WrapAsSQLTransformer( | ||
SparkEnvironment.JvmBridge.CallStaticJavaMethod( | ||
s_sqlTransformerClassName, | ||
"load", | ||
path)); | ||
|
||
private static SQLTransformer WrapAsSQLTransformer(object obj) => | ||
new SQLTransformer((JvmObjectReference)obj); | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.