diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java index 89eab6cf16..e64a1cdad1 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java @@ -28,7 +28,7 @@ public class SPath extends UnresolvedPlan { @Nullable private final String outField; - private final String path; + @Nullable private final String path; @Override public UnresolvedPlan attach(UnresolvedPlan child) { diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 15158d067e..e7441f8b55 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -688,7 +688,50 @@ public RelNode visitParse(Parse node, CalcitePlanContext context) { @Override public RelNode visitSpath(SPath node, CalcitePlanContext context) { - return visitEval(node.rewriteAsEval(), context); + if (node.getPath() != null) { + return visitEval(node.rewriteAsEval(), context); + } else { + return spathExtractAll(node, context); + } + } + + private RelNode spathExtractAll(SPath node, CalcitePlanContext context) { + visitChildren(node, context); + + // 1. Extract all fields from JSON in `inField` and merge with existing dynamic fields. + // _MAP = MAP_APPEND(_MAP, JSON_EXTRACT_ALL(inField)) + RexNode inField = QualifiedNameResolver.resolveFieldOrThrow(1, 0, node.getInField(), context); + RexNode map = context.rexBuilder.makeCall(BuiltinFunctionName.JSON_EXTRACT_ALL, inField); + if (context.fieldBuilder.isDynamicFieldsExist()) { + map = + context.rexBuilder.makeCall( + BuiltinFunctionName.MAP_APPEND, context.fieldBuilder.getDynamicFieldsMap(), map); + } + + // 2. Merge dynamic fields with static fields. + // static_field1 = APPEND(static_field1, _MAP[static_field1]), static_attr2 = ... + List staticFieldNames = context.fieldBuilder.getStaticFieldNames(); + List fields = new ArrayList<>(); + for (String fieldName : staticFieldNames) { + RexNode field = context.fieldBuilder.staticField(fieldName); + RexNode appended = + context.rexBuilder.makeCall( + BuiltinFunctionName.INTERNAL_APPEND, + field, + context.rexBuilder.createItemAccess(map, fieldName)); + fields.add(context.relBuilder.alias(appended, fieldName)); + } + + // 3. Dedupe dynamic fields with static fields. + // _MAP = MAP_REMOVE(_MAP, [static_attr1, static_attr2, ...]) + RexNode fieldNameArray = context.rexBuilder.createStringArrayLiteral(staticFieldNames); + RexNode dedupedMap = + context.rexBuilder.makeCall(BuiltinFunctionName.MAP_REMOVE, map, fieldNameArray); + fields.add(context.relBuilder.alias(dedupedMap, DynamicFieldsConstants.DYNAMIC_FIELDS_MAP)); + + context.relBuilder.project(fields); + + return context.relBuilder.peek(); } // might need to remove fields in _MAP diff --git a/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java b/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java index 150963376a..186f691ead 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java +++ b/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java @@ -6,6 +6,7 @@ package org.opensearch.sql.calcite; import com.google.common.collect.ImmutableList; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Locale; @@ -25,7 +26,9 @@ import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; public class ExtendedRexBuilder extends RexBuilder { @@ -170,4 +173,25 @@ public RexNode castToString(RexNode node) { RelDataType nullableStringType = getTypeFactory().createTypeWithNullability(stringType, true); return makeCast(nullableStringType, node, true, true); } + + public RexNode createItemAccess(RexNode field, String itemName) { + return PPLFuncImpTable.INSTANCE.resolve( + this, BuiltinFunctionName.INTERNAL_ITEM, field, makeLiteral(itemName)); + } + + public RexNode makeCall(BuiltinFunctionName fn, RexNode... nodes) { + return PPLFuncImpTable.INSTANCE.resolve(this, fn, nodes); + } + + public RexNode createStringArrayLiteral(List values) { + RelDataType stringType = getTypeFactory().createSqlType(SqlTypeName.VARCHAR); + RelDataType arrayType = getTypeFactory().createArrayType(stringType, -1); + + List elements = new ArrayList<>(); + for (String value : values) { + elements.add(makeLiteral(value)); + } + + return makeCall(arrayType, SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR, elements); + } } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index ced98022ca..f467870e6a 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -66,6 +66,7 @@ public enum BuiltinFunctionName { MAP_CONCAT(FunctionName.of("map_concat"), true), MAP_REMOVE(FunctionName.of("map_remove"), true), MVAPPEND(FunctionName.of("mvappend")), + INTERNAL_APPEND(FunctionName.of("append"), true), MVJOIN(FunctionName.of("mvjoin")), FORALL(FunctionName.of("forall")), EXISTS(FunctionName.of("exists")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendCore.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/AppendCore.java similarity index 50% rename from core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendCore.java rename to core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/AppendCore.java index f9a67e4d6d..8608020bc3 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendCore.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/AppendCore.java @@ -8,14 +8,32 @@ import java.util.ArrayList; import java.util.List; -/** Core logic for `mvappend` command to collect elements from list of args */ -public class MVAppendCore { +/** + * Core logic for `mvappend` and internal `append` function to collect elements from list of args. + */ +public class AppendCore { /** * Collect non-null elements from `args`. If an item is a list, it will collect non-null elements - * of the list. See {@ref MVAppendFunctionImplTest} for detailed behavior. + * of the list. See {@ref AppendFunctionImplTest} for detailed behavior. + */ + public static Object collectElements(Object... args) { + List elements = collectElementsToList(args); + + if (elements.isEmpty()) { + return null; + } else if (elements.size() == 1) { + // return the element in case of single element + return elements.get(0); + } else { + return elements; + } + } + + /** + * Collect non-null elements from `args`. If an item is a list, it will collect non-null elements. */ - public static List collectElements(Object... args) { + public static List collectElementsToList(Object... args) { List elements = new ArrayList<>(); for (Object arg : args) { @@ -28,7 +46,7 @@ public static List collectElements(Object... args) { } } - return elements.isEmpty() ? null : elements; + return elements; } private static void addListElements(List list, List elements) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/AppendFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/AppendFunctionImpl.java new file mode 100644 index 0000000000..cecc19a14b --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/AppendFunctionImpl.java @@ -0,0 +1,65 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.CollectionUDF; + +import java.util.List; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.linq4j.tree.Types; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** + * Internal append function that appends all elements from arguments to create an array. Returns + * null if there is no element. Returns the scalar value if there is single element. Otherwise, + * returns a list containing all the elements from inputs. + */ +public class AppendFunctionImpl extends ImplementorUDF { + + public AppendFunctionImpl() { + super(new AppendImplementor(), NullPolicy.ALL); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return sqlOperatorBinding -> { + RelDataTypeFactory typeFactory = sqlOperatorBinding.getTypeFactory(); + + if (sqlOperatorBinding.getOperandCount() == 0) { + return typeFactory.createSqlType(SqlTypeName.NULL); + } + + // Return type is ANY as it could return scalar value (in case of single item) or array + return typeFactory.createSqlType(SqlTypeName.ANY); + }; + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return null; + } + + public static class AppendImplementor implements NotNullImplementor { + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + return Expressions.call( + Types.lookupMethod(AppendFunctionImpl.class, "append", Object[].class), + Expressions.newArrayInit(Object.class, translatedOperands)); + } + } + + public static Object append(Object... args) { + return AppendCore.collectElements(args); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java index 107df5eea4..bd50fea487 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java @@ -97,6 +97,15 @@ public Expression implement( } public static Object mvappend(Object... args) { - return MVAppendCore.collectElements(args); + return collectElements(args); + } + + /** + * Collect non-null elements from `args`. If an item is a list, it will collect non-null elements + * of the list. See {@ref MVAppendFunctionImplTest} for detailed behavior. + */ + public static List collectElements(Object... args) { + List elements = AppendCore.collectElementsToList(args); + return elements.isEmpty() ? null : elements; } } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MapAppendFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MapAppendFunctionImpl.java index 4cb0acae61..c7276a5085 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MapAppendFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MapAppendFunctionImpl.java @@ -24,8 +24,8 @@ import org.opensearch.sql.expression.function.UDFOperandMetadata; /** - * MapAppend function that merges two maps. All the values will be converted to list for type - * consistency. + * MapAppend function that merges two maps. Value for the same key will be merged into an array by + * using {@ref: AppendCore}. */ public class MapAppendFunctionImpl extends ImplementorUDF { @@ -89,7 +89,7 @@ private static Map verifyMap(Object map) { static Map mapAppendImpl(Map map) { Map result = new HashMap<>(); for (String key : map.keySet()) { - result.put(key, MVAppendCore.collectElements(map.get(key))); + result.put(key, AppendCore.collectElements(map.get(key))); } return result; } @@ -99,7 +99,7 @@ static Map mapAppendImpl( Map result = new HashMap<>(); for (String key : mergeKeys(firstMap, secondMap)) { - result.put(key, MVAppendCore.collectElements(firstMap.get(key), secondMap.get(key))); + result.put(key, AppendCore.collectElements(firstMap.get(key), secondMap.get(key))); } return result; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 4e1aadfc84..6bab9bbde0 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -42,6 +42,7 @@ import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.expression.datetime.DateTimeFunctions; +import org.opensearch.sql.expression.function.CollectionUDF.AppendFunctionImpl; import org.opensearch.sql.expression.function.CollectionUDF.ArrayFunctionImpl; import org.opensearch.sql.expression.function.CollectionUDF.ExistsFunctionImpl; import org.opensearch.sql.expression.function.CollectionUDF.FilterFunctionImpl; @@ -389,9 +390,10 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { public static final SqlOperator FORALL = new ForallFunctionImpl().toUDF("forall"); public static final SqlOperator EXISTS = new ExistsFunctionImpl().toUDF("exists"); public static final SqlOperator ARRAY = new ArrayFunctionImpl().toUDF("array"); - public static final SqlOperator MAP_APPEND = new MapAppendFunctionImpl().toUDF("map_append"); + public static final SqlOperator MAP_APPEND = new MapAppendFunctionImpl().toUDF("MAP_APPEND"); public static final SqlOperator MAP_REMOVE = new MapRemoveFunctionImpl().toUDF("MAP_REMOVE"); - public static final SqlOperator MVAPPEND = new MVAppendFunctionImpl().toUDF("mvappend"); + public static final SqlOperator MVAPPEND = new MVAppendFunctionImpl().toUDF("MVAPPEND"); + public static final SqlOperator INTERNAL_APPEND = new AppendFunctionImpl().toUDF("APPEND"); public static final SqlOperator FILTER = new FilterFunctionImpl().toUDF("filter"); public static final SqlOperator TRANSFORM = new TransformFunctionImpl().toUDF("transform"); public static final SqlOperator REDUCE = new ReduceFunctionImpl().toUDF("reduce"); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index a678e1efe7..5b2a344d01 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -78,6 +78,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.HOUR_OF_DAY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IF; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IFNULL; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_APPEND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_GROK; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_ITEM; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PARSE; @@ -957,6 +958,7 @@ void populate() { registerOperator(ARRAY, PPLBuiltinOperators.ARRAY); registerOperator(MVAPPEND, PPLBuiltinOperators.MVAPPEND); + registerOperator(INTERNAL_APPEND, PPLBuiltinOperators.INTERNAL_APPEND); registerOperator(MAP_APPEND, PPLBuiltinOperators.MAP_APPEND); registerOperator(MAP_CONCAT, SqlLibraryOperators.MAP_CONCAT); registerOperator(MAP_REMOVE, PPLBuiltinOperators.MAP_REMOVE); diff --git a/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/AppendFunctionImplTest.java b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/AppendFunctionImplTest.java new file mode 100644 index 0000000000..cbbfa67e8a --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/AppendFunctionImplTest.java @@ -0,0 +1,81 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.CollectionUDF; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +import java.util.Arrays; +import java.util.List; +import org.junit.jupiter.api.Test; + +public class AppendFunctionImplTest { + + @Test + public void testAppendWithNoArguments() { + Object result = AppendFunctionImpl.append(); + assertNull(result); + } + + @Test + public void testAppendWithSingleElement() { + Object result = AppendFunctionImpl.append(42); + assertEquals(42, result); + } + + @Test + public void testAppendWithMultipleElements() { + Object result = AppendFunctionImpl.append(1, 2, 3); + assertEquals(Arrays.asList(1, 2, 3), result); + } + + @Test + public void testAppendWithNullValues() { + Object result = AppendFunctionImpl.append(null, 1, null); + assertEquals(1, result); + } + + @Test + public void testAppendWithAllNulls() { + Object result = AppendFunctionImpl.append(null, null); + assertNull(result); + } + + @Test + public void testAppendWithArrayFlattening() { + List array1 = Arrays.asList(1, 2); + List array2 = Arrays.asList(3, 4); + Object result = AppendFunctionImpl.append(array1, array2); + assertEquals(Arrays.asList(1, 2, 3, 4), result); + } + + @Test + public void testAppendWithMixedTypes() { + List array = Arrays.asList(1, 2); + Object result = AppendFunctionImpl.append(array, 3, "hello"); + assertEquals(Arrays.asList(1, 2, 3, "hello"), result); + } + + @Test + public void testAppendWithArrayAndNulls() { + List array = Arrays.asList(1, 2); + Object result = AppendFunctionImpl.append(null, array, null, 3); + assertEquals(Arrays.asList(1, 2, 3), result); + } + + @Test + public void testAppendWithSingleNull() { + Object result = AppendFunctionImpl.append((Object) null); + assertNull(result); + } + + @Test + public void testAppendWithEmptyArray() { + List emptyArray = Arrays.asList(); + Object result = AppendFunctionImpl.append(emptyArray, 1); + assertEquals(1, result); + } +} diff --git a/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/MapAppendFunctionImplTest.java b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/MapAppendFunctionImplTest.java index 28df3c3cbd..6b131aed86 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/MapAppendFunctionImplTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/MapAppendFunctionImplTest.java @@ -5,8 +5,8 @@ package org.opensearch.sql.expression.function.CollectionUDF; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; import java.util.HashMap; import java.util.List; @@ -21,11 +21,14 @@ void testMapAppendWithNonOverlappingKeys() { Map result = MapAppendFunctionImpl.mapAppendImpl(map1, map2); - assertEquals(4, result.size()); - assertMapListValues(result, "a", "value1"); - assertMapListValues(result, "b", "value2"); - assertMapListValues(result, "c", "value3"); - assertMapListValues(result, "d", "value4"); + assertThat( + result, + allOf( + hasEntry("a", "value1"), + hasEntry("b", "value2"), + hasEntry("c", "value3"), + hasEntry("d", "value4"), + aMapWithSize(4))); } @Test @@ -35,10 +38,13 @@ void testMapAppendWithOverlappingKeys() { Map result = MapAppendFunctionImpl.mapAppendImpl(map1, map2); - assertEquals(3, result.size()); - assertMapListValues(result, "a", "value1"); - assertMapListValues(result, "b", "value2", "value3"); - assertMapListValues(result, "c", "value4"); + assertThat( + result, + allOf( + aMapWithSize(3), + hasEntry("a", (Object) "value1"), + hasEntry("b", (Object) List.of("value2", "value3")), + hasEntry("c", (Object) "value4"))); } @Test @@ -48,10 +54,13 @@ void testMapAppendWithArrayValues() { Map result = MapAppendFunctionImpl.mapAppendImpl(map1, map2); - assertEquals(3, result.size()); - assertMapListValues(result, "a", "item1", "item2", "item3"); - assertMapListValues(result, "b", "single"); - assertMapListValues(result, "c", "item4", "item5"); + assertThat( + result, + allOf( + aMapWithSize(3), + hasEntry("a", (Object) List.of("item1", "item2", "item3")), + hasEntry("b", (Object) "single"), + hasEntry("c", (Object) List.of("item4", "item5")))); } @Test @@ -64,11 +73,14 @@ void testMapAppendWithNullValues() { Map result = MapAppendFunctionImpl.mapAppendImpl(map1, map2); - assertEquals(4, result.size()); - assertMapListValues(result, "a", "value1"); - assertMapListValues(result, "b", "value2"); - assertMapListValues(result, "c", "value3"); - assertMapListValues(result, "d", "value4"); + assertThat( + result, + allOf( + hasEntry("a", "value1"), + hasEntry("b", "value2"), + hasEntry("c", "value3"), + hasEntry("d", "value4"), + aMapWithSize(4))); } @Test @@ -77,9 +89,7 @@ void testMapAppendWithSingleParam() { Map result = MapAppendFunctionImpl.mapAppendImpl(map1); - assertEquals(2, result.size()); - assertMapListValues(result, "a", "value1"); - assertMapListValues(result, "b", "value2"); + assertThat(result, allOf(hasEntry("a", "value1"), hasEntry("b", "value2"), aMapWithSize(2))); } private Map getMap1() { @@ -95,14 +105,4 @@ private Map getMap2() { map2.put("d", "value4"); return map2; } - - private void assertMapListValues(Map map, String key, Object... expectedValues) { - Object val = map.get(key); - assertTrue(val instanceof List); - List result = (List) val; - assertEquals(expectedValues.length, result.size()); - for (int i = 0; i < expectedValues.length; i++) { - assertEquals(expectedValues[i], result.get(i)); - } - } } diff --git a/docs/user/ppl/cmd/spath.rst b/docs/user/ppl/cmd/spath.rst index 7defb4437f..cf3ee331bd 100644 --- a/docs/user/ppl/cmd/spath.rst +++ b/docs/user/ppl/cmd/spath.rst @@ -13,18 +13,17 @@ Description ============ | The `spath` command allows extracting fields from structured text data. It currently allows selecting from JSON data with JSON paths. -Version -======= -3.3.0 - Syntax ============ -spath input= [output=] [path=] +spath input= [output=] [path=] * input: mandatory. The field to scan for JSON data. -* output: optional. The destination field that the data will be loaded to. Defaults to the value of `path`. -* path: mandatory. The path of the data to load for the object. For more information on path syntax, see `json_extract <../functions/json.rst#json_extract>`_. +* output: optional. The destination field that the data will be loaded to. Defaults to the value of `path` when path is specified. +* path: optional. The path of the data to load for the object. For more information on path syntax, see `json_extract <../functions/json.rst#json_extract>`_. + * When path param is omitted, all fields from the input JSON are extracted as separate fields. If field with same name already exist, extracted value will be appended to existing value to make an array. + +* Limitation: When `path` parameter is omitted, it introduce dynamic fields internally and disables field name and type checking during query plan. If you want to enable, you need to explicitly cast with eval and project with fields command. Note ===== @@ -37,15 +36,15 @@ The simplest spath is to extract a single field. This extracts `n` from the `doc PPL query:: - PPL> source=test_spath | spath input=doc n; + os> source=nested_json | spath input=doc id; fetched rows / total rows = 3/3 - +----------+---+ - | doc | n | - |----------+---| - | {"n": 1} | 1 | - | {"n": 2} | 2 | - | {"n": 3} | 3 | - +----------+---+ + +------------------------------------------------------------------------------------------------------------+----+ + | doc | id | + |------------------------------------------------------------------------------------------------------------+----| + | {"id": 1, "name": "John", "age": 30, "list": [1, 2, 3, 4], "extras": [{"nest_in": "a"}, {"nest_in": "b"}]} | 1 | + | {"id": 2, "name": "Jane", "list": [], "extras": [{"nest_in": "a"}]} | 2 | + | {"id": 3, "list": [5, 6], "extras": []} | 3 | + +------------------------------------------------------------------------------------------------------------+----+ Example 2: Lists & Nesting ============================ @@ -54,15 +53,15 @@ These queries demonstrate more JSON path uses, like traversing nested fields and PPL query:: - PPL> source=test_spath | spath input=doc output=first_element list{0} | spath input=doc output=all_elements list{} | spath input=doc output=nested nest_out.nest_in; + os> source=nested_json | spath input=doc output=first_element list{0} | spath input=doc output=all_elements list{} | spath input=doc output=nested extras{0}.nest_in; fetched rows / total rows = 3/3 - +------------------------------------------------------+---------------+--------------+--------+ - | doc | first_element | all_elements | nested | - |------------------------------------------------------+---------------+--------------+--------| - | {"list": [1, 2, 3, 4], "nest_out": {"nest_in": "a"}} | 1 | [1,2,3,4] | a | - | {"list": [], "nest_out": {"nest_in": "a"}} | null | [] | a | - | {"list": [5, 6], "nest_out": {"nest_in": "a"}} | 5 | [5,6] | a | - +------------------------------------------------------+---------------+--------------+--------+ + +------------------------------------------------------------------------------------------------------------+---------------+--------------+--------+ + | doc | first_element | all_elements | nested | + |------------------------------------------------------------------------------------------------------------+---------------+--------------+--------| + | {"id": 1, "name": "John", "age": 30, "list": [1, 2, 3, 4], "extras": [{"nest_in": "a"}, {"nest_in": "b"}]} | 1 | [1,2,3,4] | a | + | {"id": 2, "name": "Jane", "list": [], "extras": [{"nest_in": "a"}]} | null | [] | a | + | {"id": 3, "list": [5, 6], "extras": []} | 5 | [5,6] | null | + +------------------------------------------------------------------------------------------------------------+---------------+--------------+--------+ Example 3: Sum of inner elements ============================ @@ -71,10 +70,29 @@ The example shows extracting an inner field and doing statistics on it, using th PPL query:: - PPL> source=test_spath | spath input=doc n | eval n=cast(n as int) | stats sum(n); + os> source=nested_json | spath input=doc id | eval id=cast(id as int) | stats sum(id); fetched rows / total rows = 1/1 - +--------+ - | sum(n) | - |--------| - | 6 | - +--------+ + +---------+ + | sum(id) | + |---------| + | 6 | + +---------+ + +Example 4: Dynamic Field Extraction +==================================== + +When the path parameter is omitted, `spath` extracts all fields from the JSON as dynamic columns. This is useful when you want to access multiple fields from JSON data without specifying individual paths. + +PPL query:: + + os> source=nested_json | spath input=doc; + fetched rows / total rows = 3/3 + +------------------------------------------------------------------------------------------------------------+------+------------------+----+-------------------+------+ + | doc | age | extras{}.nest_in | id | list{} | name | + |------------------------------------------------------------------------------------------------------------+------+------------------+----+-------------------+------| + | {"id": 1, "name": "John", "age": 30, "list": [1, 2, 3, 4], "extras": [{"nest_in": "a"}, {"nest_in": "b"}]} | 30 | ["a","b"] | 1 | ["1","2","3","4"] | John | + | {"id": 2, "name": "Jane", "list": [], "extras": [{"nest_in": "a"}]} | null | a | 2 | null | Jane | + | {"id": 3, "list": [5, 6], "extras": []} | null | null | 3 | ["5","6"] | null | + +------------------------------------------------------------------------------------------------------------+------+------------------+----+-------------------+------+ + +This approach allows you to reference any field that exists in the JSON data structure, making it particularly useful for semi-structured data where different documents may contain different fields. diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 81936ffbfb..2adad80a7f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -1416,4 +1416,11 @@ public void testTopKThenSortExplain() throws IOException { + "| sort age " + "| fields age")); } + + @Test + public void testSpathWithoutOutputExplain() throws IOException { + String expected = loadExpectedPlan("explain_spath_without_output.yaml"); + assertYamlEqualsIgnoreId( + expected, explainQueryYaml(source(TEST_INDEX_LOGS, "spath input=message | fields test"))); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java index 51b5bd4030..92db6dcb26 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java @@ -12,37 +12,313 @@ import java.io.IOException; import org.json.JSONObject; +import org.junit.Ignore; import org.junit.jupiter.api.Test; -import org.opensearch.client.Request; import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalcitePPLSpathCommandIT extends PPLIntegTestCase { + + private static final String TEST_INDEX_DYNAMIC_FIELDS = "test_dynamic_fields"; + private static final String TEST_INDEX_COMPLEX_JSON = "test_complex_json"; + @Override public void init() throws Exception { super.init(); enableCalcite(); - loadIndex(Index.BANK); + loadIndex(Index.JSON_TEST); - // Create test data for string concatenation - Request request1 = new Request("PUT", "/test_spath/_doc/1?refresh=true"); - request1.setJsonEntity("{\"doc\": \"{\\\"n\\\": 1}\"}"); - client().performRequest(request1); + createDynamicFieldsTestData(); + createComplexJsonTestData(); + } - Request request2 = new Request("PUT", "/test_spath/_doc/2?refresh=true"); - request2.setJsonEntity("{\"doc\": \"{\\\"n\\\": 2}\"}"); - client().performRequest(request2); + private void createDynamicFieldsTestData() throws IOException { + createDocumentsWithJsonField( + TEST_INDEX_DYNAMIC_FIELDS, + "json_data", + "{\"name\": \"John\", \"age\": 30, \"city\": \"New York\"}", + "{\"name\": \"Jane\", \"age\": 25, \"country\": \"USA\"}", + "{\"product\": \"laptop\", \"price\": 999.99, \"brand\": \"Dell\"}"); + } - Request request3 = new Request("PUT", "/test_spath/_doc/3?refresh=true"); - request3.setJsonEntity("{\"doc\": \"{\\\"n\\\": 3}\"}"); - client().performRequest(request3); + private void createComplexJsonTestData() throws IOException { + createDocumentsWithJsonField( + TEST_INDEX_COMPLEX_JSON, + "data", + "{\"user\": {\"name\": \"Alice\", \"profile\": {\"age\": 28, \"location\": \"Seattle\"}}," + + " \"preferences\": [\"music\", \"travel\"]}", + "{\"user\": {\"name\": \"Bob\", \"profile\": {\"age\": 35, \"location\": \"Portland\"}}," + + " \"settings\": {\"theme\": \"dark\", \"notifications\": true}}", + "{\"user\": {\"name\": \"John\", \"profile\": {\"age\": 40, \"location\": \"Kirkland\"}}," + + " \"nested\": [{\"a\": \"v1\", \"arr\": [1, 2, 3]}, {\"a\": \"v2\", \"arr\": [4," + + " 5]}]}"); } @Test public void testSimpleSpath() throws IOException { JSONObject result = - executeQuery("source=test_spath | spath input=doc output=result path=n | fields result"); - verifySchema(result, schema("result", "string")); - verifyDataRows(result, rows("1"), rows("2"), rows("3")); + executeQuery( + source( + TEST_INDEX_DYNAMIC_FIELDS, + "spath input=json_data output=result path=name | fields id, result")); + verifySchema(result, schema("id", "bigint"), schema("result", "string")); + verifyDataRows(result, rows(1L, "John"), rows(2L, "Jane"), rows(3L, "null")); + } + + @Test + public void testSpathDynamicFieldsBasic() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_DYNAMIC_FIELDS, "spath input=json_data | fields id, name, age, city")); + + verifySchema( + result, + schema("id", "bigint"), + schema("name", "string"), + schema("age", "int"), + schema("city", "string")); + + verifyDataRows( + result, + rows(1, "John", 30, "New York"), + rows(2, "Jane", 25, null), + rows(3, null, null, null)); + } + + @Test + public void testSpathDynamicFieldsWithDifferentFields() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_DYNAMIC_FIELDS, + "spath input=json_data | fields id, name, country, product, price")); + + verifySchema( + result, + schema("id", "bigint"), + schema("name", "string"), + schema("country", "string"), + schema("product", "string"), + schema("price", "double")); + + verifyDataRows( + result, + rows(1, "John", null, null, null), + rows(2, "Jane", "USA", null, null), + rows(3, null, null, "laptop", 999.99)); + } + + @Test + public void testSpathDynamicFieldsWithFiltering() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_DYNAMIC_FIELDS, + "spath input=json_data | where name = 'John' | fields id, name, age")); + + verifySchema(result, schema("id", "bigint"), schema("name", "string"), schema("age", "int")); + verifyDataRows(result, rows(1, "John", 30)); + } + + @Test + public void testSpathDynamicFieldsWithAggregation() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_DYNAMIC_FIELDS, "spath input=json_data | stats count() as total_count")); + + verifySchema(result, schema("total_count", "bigint")); + verifyDataRows(result, rows(3)); + } + + @Test + public void testSpathDynamicFieldsWithSorting() throws IOException { + JSONObject result = + executeQuery( + source(TEST_INDEX_DYNAMIC_FIELDS, "spath input=json_data | sort id | fields id, name")); + + verifySchema(result, schema("id", "bigint"), schema("name", "string")); + verifyDataRows(result, rows(1, "John"), rows(2, "Jane"), rows(3, null)); + } + + @Test + public void testSpathAndSpath() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_DYNAMIC_FIELDS, + "spath input=json_data | spath input=json_data | fields id, name | head 1")); + + System.out.println(result.toString(2)); // TODO: To be deleted + + verifySchema(result, schema("id", "bigint"), schema("name", "array")); + + verifyDataRows(result, rows(1, arr("John", "John"))); + } + + @Test + public void testFieldsSpathSpath() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_DYNAMIC_FIELDS, + "fields id, json_data | spath input=json_data | spath input=json_data" + + "| fields id, name | head 1")); + + verifySchema(result, schema("id", "bigint"), schema("name", "array")); + + verifyDataRows(result, rows(1, arr("John", "John"))); + } + + @Test + public void testSpathAndStats() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_DYNAMIC_FIELDS, + "spath input=json_data | stats count() as name_count by name" + + " | fields name_count, name")); + + verifySchema(result, schema("name_count", "bigint"), schema("name", "string")); + verifyDataRows(result, rows(1L, null), rows(1L, "Jane"), rows(1L, "John")); + } + + @Test + public void testSpathAndEventStats() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_DYNAMIC_FIELDS, + "spath input=json_data | eventstats avg(age) as avg | fields id, name, avg")); + + verifySchema(result, schema("id", "bigint"), schema("name", "string"), schema("avg", "double")); + verifyDataRows(result, rows(1, "John", 27.5), rows(2, "Jane", 27.5), rows(3, null, 27.5)); + } + + @Test + public void testSpathDynamicFieldsExplain() throws IOException { + executeQuery( + source(TEST_INDEX_DYNAMIC_FIELDS, "spath input=json_data | fields id, name, age, city")); + } + + @Test + public void testSpathWithComplexJsonNestedFieldAccess() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_COMPLEX_JSON, + "spath input=data | fields id, user.name, user.profile.age," + + " user.profile.location")); + + verifySchema( + result, + schema("id", "bigint"), + schema("user.name", "string"), + schema("user.profile.age", "int"), + schema("user.profile.location", "string")); + verifyDataRows( + result, + rows(1L, "Alice", 28, "Seattle"), + rows(2L, "Bob", 35, "Portland"), + rows(3L, "John", 40, "Kirkland")); + } + + @Test + public void testSpathWithComplexJsonFiltering() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_COMPLEX_JSON, + "spath input=data | where isnotnull(user.name) | fields id, user.name")); + + verifySchema(result, schema("id", "bigint"), schema("user.name", "string")); + verifyDataRows(result, rows(1L, "Alice"), rows(2L, "Bob"), rows(3L, "John")); + } + + @Test + public void testSpathWithComplexJsonAggregation() throws IOException { + JSONObject result = + executeQuery( + source(TEST_INDEX_COMPLEX_JSON, "spath input=data | stats count() as total_count")); + + verifySchema(result, schema("total_count", "bigint")); + verifyDataRows(result, rows(3L)); + } + + @Test + public void testSpathWithComplexJsonArrayAccess() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_COMPLEX_JSON, + "spath input=data | where isnotnull(`preferences{}`) | fields id," + + " `preferences{}`")); + + verifySchema(result, schema("id", "bigint"), schema("preferences{}", "array")); + verifyDataRows(result, rows(1L, arr("music", "travel"))); + } + + @Test + public void testSpathWithNestedArray() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_COMPLEX_JSON, + "spath input=data | where isnotnull(`nested{}.a`) | fields id, `nested{}.a`," + + " `nested{}.arr{}`")); + + verifySchema( + result, + schema("id", "bigint"), + schema("nested{}.a", "array"), + schema("nested{}.arr{}", "array")); + verifyDataRows(result, rows(3L, arr("v1", "v2"), arr(1, 2, 3, 4, 5))); + } + + @Test + @Ignore("Join does not work now") + public void testSpathWithJoin() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_COMPLEX_JSON, + "spath input=data | join id test_dynamic_fields | fields id, `nested{}.a`," + + " json_data")); + + verifySchema( + result, + schema("id", "bigint"), + schema("nested{}.a", "string"), + schema("json_data", "string")); + verifyDataRows( + result, + rows(1L, null, "{\"name\": \"John\", \"age\": 30, \"city\": \"New York\"}"), + rows(2L, null, "{\"name\": \"Jane\", \"age\": 25, \"country\": \"USA\"}"), + rows( + 3L, + "[\"v1\",\"v2\"]", + "{\"product\": \"laptop\", \"price\": 999.99, \"brand\": \"Dell\"}")); + } + + @Test + @Ignore("Join does not work now") + public void testSpathJoinWithSpath() throws IOException { + JSONObject result = + executeQuery( + source( + TEST_INDEX_COMPLEX_JSON, + "spath input=data | join id [index=test_dynamic_fields | spath input=json_data ] |" + + " fields id, `nested{}.a`, name, age")); + + verifySchema( + result, + schema("id", "bigint"), + schema("nested{}.a", "string"), + schema("name", "string"), + schema("age", "int")); + // Current limitation: dynamic columns from left side won't be preserved after join + verifyDataRows( + result, rows(1L, null, "John", 30), rows(2L, null, "Jane", 25), rows(3L, null, null, null)); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/AppendFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/AppendFunctionIT.java new file mode 100644 index 0000000000..a50877525a --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/AppendFunctionIT.java @@ -0,0 +1,198 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.standalone; + +import static org.hamcrest.Matchers.equalTo; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexNode; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; + +public class AppendFunctionIT extends CalcitePPLRelNodeIntegTestCase { + + private static final String RESULT_FIELD = "result"; + private static final String ID_FIELD = "id"; + + @Test + public void testAppendWithNoArguments() throws Exception { + RexNode appendCall = + PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, BuiltinFunctionName.INTERNAL_APPEND); + RelNode relNode = + context + .relBuilder + .values(new String[] {ID_FIELD}, 1) + .project(context.relBuilder.alias(appendCall, RESULT_FIELD)) + .build(); + + executeRelNodeAndVerify( + context.planContext, + relNode, + resultSet -> { + assertTrue(resultSet.next()); + verifyColumns(resultSet, RESULT_FIELD); + assertNull(resultSet.getObject(1)); + }); + } + + @Test + public void testAppendWithSingleElement() throws Exception { + RexNode value = context.rexBuilder.makeExactLiteral(java.math.BigDecimal.valueOf(42)); + RexNode appendCall = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.INTERNAL_APPEND, value); + RelNode relNode = + context + .relBuilder + .values(new String[] {ID_FIELD}, 1) + .project(context.relBuilder.alias(appendCall, RESULT_FIELD)) + .build(); + + executeRelNodeAndVerify( + context.planContext, + relNode, + resultSet -> { + assertTrue(resultSet.next()); + verifyColumns(resultSet, RESULT_FIELD); + assertEquals(42, resultSet.getObject(1)); + }); + } + + @Test + public void testAppendWithMultipleElements() throws Exception { + RexNode value1 = context.rexBuilder.makeExactLiteral(java.math.BigDecimal.valueOf(1)); + RexNode value2 = context.rexBuilder.makeExactLiteral(java.math.BigDecimal.valueOf(2)); + RexNode value3 = context.rexBuilder.makeExactLiteral(java.math.BigDecimal.valueOf(3)); + RexNode appendCall = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.INTERNAL_APPEND, value1, value2, value3); + RelNode relNode = + context + .relBuilder + .values(new String[] {ID_FIELD}, 1) + .project(context.relBuilder.alias(appendCall, RESULT_FIELD)) + .build(); + + executeRelNodeAndVerify( + context.planContext, + relNode, + resultSet -> { + assertTrue(resultSet.next()); + verifyColumns(resultSet, RESULT_FIELD); + List result = getResultList(resultSet); + assertThat(result, equalTo(List.of(1, 2, 3))); + }); + } + + @Test + public void testAppendWithArrayFlattening() throws Exception { + RexNode array1 = createStringArray(context.rexBuilder, "a", "b"); + RexNode array2 = createStringArray(context.rexBuilder, "c", "d"); + RexNode appendCall = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.INTERNAL_APPEND, array1, array2); + RelNode relNode = + context + .relBuilder + .values(new String[] {ID_FIELD}, 1) + .project(context.relBuilder.alias(appendCall, RESULT_FIELD)) + .build(); + + executeRelNodeAndVerify( + context.planContext, + relNode, + resultSet -> { + assertTrue(resultSet.next()); + verifyColumns(resultSet, RESULT_FIELD); + List result = getResultList(resultSet); + assertThat(result, equalTo(List.of("a", "b", "c", "d"))); + }); + } + + @Test + public void testAppendWithMixedTypes() throws Exception { + RexNode array = createStringArray(context.rexBuilder, "a", "b"); + RexNode number = context.rexBuilder.makeExactLiteral(java.math.BigDecimal.valueOf(42)); + RexNode string = context.rexBuilder.makeLiteral("hello"); + RexNode appendCall = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.INTERNAL_APPEND, array, number, string); + RelNode relNode = + context + .relBuilder + .values(new String[] {ID_FIELD}, 1) + .project(context.relBuilder.alias(appendCall, RESULT_FIELD)) + .build(); + + executeRelNodeAndVerify( + context.planContext, + relNode, + resultSet -> { + assertTrue(resultSet.next()); + verifyColumns(resultSet, RESULT_FIELD); + List result = getResultList(resultSet); + assertThat(result, equalTo(List.of("a", "b", 42, "hello"))); + }); + } + + @Test + public void testAppendWithSingleString() throws Exception { + RexNode value = context.rexBuilder.makeLiteral("test"); + RexNode appendCall = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.INTERNAL_APPEND, value); + RelNode relNode = + context + .relBuilder + .values(new String[] {ID_FIELD}, 1) + .project(context.relBuilder.alias(appendCall, RESULT_FIELD)) + .build(); + + executeRelNodeAndVerify( + context.planContext, + relNode, + resultSet -> { + assertTrue(resultSet.next()); + verifyColumns(resultSet, RESULT_FIELD); + assertEquals("test", resultSet.getObject(1)); + }); + } + + @Test + public void testAppendWithMultipleStrings() throws Exception { + RexNode value1 = context.rexBuilder.makeLiteral("hello"); + RexNode value2 = context.rexBuilder.makeLiteral("world"); + RexNode appendCall = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.INTERNAL_APPEND, value1, value2); + RelNode relNode = + context + .relBuilder + .values(new String[] {ID_FIELD}, 1) + .project(context.relBuilder.alias(appendCall, RESULT_FIELD)) + .build(); + + executeRelNodeAndVerify( + context.planContext, + relNode, + resultSet -> { + assertTrue(resultSet.next()); + verifyColumns(resultSet, RESULT_FIELD); + List result = getResultList(resultSet); + assertThat(result, equalTo(List.of("hello", "world"))); + }); + } + + private List getResultList(ResultSet resultSet) throws SQLException { + Object result = resultSet.getObject(1); + assertTrue(result instanceof List); + return (List) result; + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/MapAppendFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/MapAppendFunctionIT.java index a77c3270e3..e231b8ae68 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/MapAppendFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/MapAppendFunctionIT.java @@ -5,6 +5,8 @@ package org.opensearch.sql.calcite.standalone; +import static org.hamcrest.Matchers.*; + import java.sql.ResultSet; import java.sql.SQLException; import java.util.List; @@ -41,11 +43,14 @@ public void testMapAppendWithNonOverlappingKeys() throws Exception { relNode, resultSet -> { Map result = getResultMapField(resultSet); - assertEquals(4, result.size()); - assertMapListValue(result, "key1", "value1"); - assertMapListValue(result, "key2", "value2"); - assertMapListValue(result, "key3", "value3"); - assertMapListValue(result, "key4", "value4"); + assertThat( + result, + allOf( + hasEntry("key1", "value1"), + hasEntry("key2", "value2"), + hasEntry("key3", "value3"), + hasEntry("key4", "value4"), + aMapWithSize(4))); }); } @@ -68,10 +73,13 @@ public void testMapAppendWithOverlappingKeys() throws Exception { relNode, resultSet -> { Map result = getResultMapField(resultSet); - assertEquals(3, result.size()); - assertMapListValue(result, "key1", "value1"); - assertMapListValue(result, "key2", "value2", "value3"); - assertMapListValue(result, "key3", "value4"); + assertThat( + result, + allOf( + aMapWithSize(3), + hasEntry("key1", (Object) "value1"), + hasEntry("key2", (Object) List.of("value2", "value3")), + hasEntry("key3", (Object) "value4"))); }); } @@ -100,8 +108,7 @@ private void testWithSingleNull(RexNode map1, RexNode map2) throws Exception { relNode, resultSet -> { Map result = getResultMapField(resultSet); - assertEquals(1, result.size()); - assertMapListValue(result, "key1", "value1"); + assertThat(result, allOf(hasEntry("key1", "value1"), aMapWithSize(1))); }); } @@ -143,16 +150,4 @@ private Map getResultMapField(ResultSet resultSet) throws SQLExc Map result = (Map) resultSet.getObject(1); return result; } - - @SuppressWarnings("unchecked") - private void assertMapListValue(Map map, String key, Object... expectedValues) { - map.containsKey(key); - Object value = map.get(key); - assertTrue(value instanceof List); - List list = (List) value; - assertEquals(expectedValues.length, list.size()); - for (int i = 0; i < expectedValues.length; i++) { - assertEquals(expectedValues[i], list.get(i)); - } - } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java index 799666efb3..60e93bf100 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java @@ -14,7 +14,9 @@ import java.net.URI; import java.nio.file.Files; import java.nio.file.Paths; +import java.util.List; import java.util.Locale; +import java.util.Map; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.json.JSONException; @@ -50,7 +52,8 @@ protected void init() throws Exception { } protected JSONObject executeQuery(String query) throws IOException { - return jsonify(executeQueryToString(query)); + JSONObject result = jsonify(executeQueryToString(query)); + return result; } protected String executeQueryToString(String query) throws IOException { @@ -392,4 +395,74 @@ protected static String loadFromFile(String filename) { throw new RuntimeException(e); } } + + protected void createDocumentsWithJsonField( + String index, String fieldName, String... jsonContents) throws IOException { + createDocumentsWithIdAndJsonField(index, fieldName, Map.of(), jsonContents); + } + + protected void createDocumentsWithIdAndJsonField( + String index, String fieldName, Map additionalFields, String... jsonContent) + throws IOException { + StringBuilder bulkRequest = new StringBuilder(); + for (int i = 0; i < jsonContent.length; i++) { + String request = + fmt( + "{\"id\": %d, \"%s\": \"%s\" %s}", + i + 1, + fieldName, + escapeForJson(jsonContent[i]), + formatAdditionalFields(additionalFields)); + addBulkIndexRequest(bulkRequest, i + 1, request); + } + Request request = new Request("POST", "/" + index + "/_bulk?refresh=true"); + request.setJsonEntity(bulkRequest.toString()); + client().performRequest(request); + } + + private static void addBulkIndexRequest(StringBuilder sb, int id, String record) { + sb.append(indexRequest(id)); + sb.append("\n"); + sb.append(record); + sb.append("\n"); + } + + private static String indexRequest(int id) { + return fmt("{\"index\":{\"_id\":\"%d\"}}", id); + } + + private static String fmt(String str, Object... params) { + return String.format(Locale.US, str, params); + } + + private String formatAdditionalFields(Map additionalFields) { + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : additionalFields.entrySet()) { + sb.append(fmt(", \"%s\": \"%s\"", entry.getKey(), escapeForJson(entry.getValue()))); + } + return sb.toString(); + } + + protected String escapeForJson(String jsonContent) { + return jsonContent.replace("\"", "\\\""); + } + + protected List arr(Object... items) { + return List.of(items); + } + + protected void assertExplainYaml(String query, String expectedYaml) throws IOException { + String actualYaml = explainQueryYaml(query); + assertTrue(getDiffMessage(expectedYaml, actualYaml), expectedYaml.equals(actualYaml)); + } + + private String getDiffMessage(String expectedYaml, String actualYaml) { + return "Explain did not match:\n" + + String.format( + "# Expected: %s# Actual: %s", blockQuote(expectedYaml), blockQuote(actualYaml)); + } + + private String blockQuote(String str) { + return "```\n" + str + "```\n"; + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_spath_without_output.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_spath_without_output.yaml new file mode 100644 index 0000000000..5256c17fdd --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_spath_without_output.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(test=[ITEM(MAP_REMOVE(JSON_EXTRACT_ALL($3), ARRAY('created_at', 'server', '@timestamp', 'message', 'level', '_id', '_index', '_score', '_maxscore', '_sort', '_routing')), 'test')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) + physical: | + EnumerableCalc(expr#0=[{inputs}], expr#1=[JSON_EXTRACT_ALL($t0)], expr#2=['created_at'], expr#3=['server'], expr#4=['@timestamp'], expr#5=['message'], expr#6=['level'], expr#7=['_id'], expr#8=['_index'], expr#9=['_score'], expr#10=['_maxscore'], expr#11=['_sort'], expr#12=['_routing'], expr#13=[ARRAY($t2, $t3, $t4, $t5, $t6, $t7, $t8, $t9, $t10, $t11, $t12)], expr#14=[MAP_REMOVE($t1, $t13)], expr#15=['test'], expr#16=[ITEM($t14, $t15)], $f0=[$t16]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[message], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["message"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_spath_without_output.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_spath_without_output.yaml new file mode 100644 index 0000000000..b0ec16da26 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_spath_without_output.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(test=[ITEM(MAP_REMOVE(JSON_EXTRACT_ALL($3), ARRAY('created_at', 'server', '@timestamp', 'message', 'level', '_id', '_index', '_score', '_maxscore', '_sort', '_routing')), 'test')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..10=[{inputs}], expr#11=[JSON_EXTRACT_ALL($t3)], expr#12=['created_at'], expr#13=['server'], expr#14=['@timestamp'], expr#15=['message'], expr#16=['level'], expr#17=['_id'], expr#18=['_index'], expr#19=['_score'], expr#20=['_maxscore'], expr#21=['_sort'], expr#22=['_routing'], expr#23=[ARRAY($t12, $t13, $t14, $t15, $t16, $t17, $t18, $t19, $t20, $t21, $t22)], expr#24=[MAP_REMOVE($t11, $t23)], expr#25=['test'], expr#26=[ITEM($t24, $t25)], test=[$t26]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index f303cb725e..3a4031b2c5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -17,6 +17,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; +import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED; import static org.opensearch.sql.utils.DateTimeFormatters.STRICT_HOUR_MINUTE_SECOND_FORMATTER; import static org.opensearch.sql.utils.DateTimeFormatters.STRICT_YEAR_MONTH_DAY_FORMATTER; @@ -133,6 +134,7 @@ public void extendTypeMapping(Map typeMapping) { .put( OpenSearchDataType.of(OpenSearchDataType.MappingType.Binary), (c, dt) -> new OpenSearchExprBinaryValue(c.stringValue())) + .put(OpenSearchDataType.of(UNDEFINED), (c, dt) -> parseContent(c)) .build(); /** Constructor of OpenSearchExprValueFactory. */ @@ -213,7 +215,7 @@ private ExprValue parse( } } - private ExprValue parseContent(Content content) { + private static ExprValue parseContent(Content content) { if (content.isNumber()) { if (content.isInt()) { return new ExprIntegerValue(content.intValue()); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 32ba07d4d5..d6ec27357d 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -58,6 +58,7 @@ import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; @@ -122,6 +123,7 @@ class OpenSearchExprValueFactoryTest { .put(fieldIp, OpenSearchDataType.of(OpenSearchDataType.MappingType.Ip)) .put("geoV", OpenSearchDataType.of(OpenSearchDataType.MappingType.GeoPoint)) .put("binaryV", OpenSearchDataType.of(OpenSearchDataType.MappingType.Binary)) + .put("undefinedV", OpenSearchDataType.of(ExprCoreType.UNDEFINED)) .build(); private static final double TOLERANCE = 1E-5; private final OpenSearchExprValueFactory exprValueFactory = @@ -948,6 +950,17 @@ public void constructFromInvalidJsonThrowException() { assertEquals("invalid json: {\"invalid_json:1}.", exception.getMessage()); } + @Test + public void constructUndefined() { + assertAll( + () -> + assertEquals( + stringValue("text"), tupleValue("{\"undefinedV\":\"text\"}").get("undefinedV")), + () -> assertEquals(stringValue("text"), constructFromObject("undefinedV", "text")), + () -> assertEquals(integerValue(1), tupleValue("{\"undefinedV\":1}").get("undefinedV")), + () -> assertEquals(integerValue(1), constructFromObject("undefinedV", 1))); + } + @Test public void noTypeFoundForMapping() { assertEquals(nullValue(), tupleValue("{\"not_exist\":[]}").get("not_exist")); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index f6ce4b1093..bb95eab677 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -764,8 +764,9 @@ public UnresolvedPlan visitSpathCommand(OpenSearchPPLParser.SpathCommandContext if (inField == null) { throw new IllegalArgumentException("`input` parameter is required for `spath`"); } - if (path == null) { - throw new IllegalArgumentException("`path` parameter is required for `spath`"); + if (outField != null && path == null) { + throw new IllegalArgumentException( + "`path` parameter is required when `output` is specified for `spath`"); } return new SPath(inField, outField, path); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java index a6de6e2803..309c3b911a 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java @@ -45,4 +45,16 @@ public void testEvalWithOutput() { "SELECT `JSON_EXTRACT`(`ENAME`, 'src.path') `custom`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testSpathWithoutPath() { + String ppl = "source=EMP | fields ENAME | spath input=ENAME"; + RelNode root = getRelNode(ppl); + + String expectedSparkSql = + "SELECT `APPEND`(`ENAME`, `JSON_EXTRACT_ALL`(`ENAME`)['ENAME']) `ENAME`," + + " `MAP_REMOVE`(`JSON_EXTRACT_ALL`(`ENAME`), ARRAY ('ENAME')) `_MAP`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } }