Skip to content

Commit

Permalink
Allow incomplete json in json_extract().
Browse files Browse the repository at this point in the history
Summary: Allow incomplete json in json_extract().

Differential Revision: D58002204
  • Loading branch information
Sergey Pershin authored and facebook-github-bot committed May 31, 2024
1 parent 76424c0 commit 6f085c7
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 3 deletions.
4 changes: 4 additions & 0 deletions velox/docs/functions/presto/json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ JSON Functions
SELECT json_extract(json, '$.store.book');

Current implementation supports limited subset of JSONPath syntax.
JSON string can be incomplete.

.. _JSONPath: http://goessner.net/articles/JsonPath/

Expand All @@ -146,6 +147,9 @@ JSON Functions
SELECT json_extract_scalar('[1, 2, 3]', '$[2]');
SELECT json_extract_scalar(json, '$.store.book[0].author');

Current implementation supports limited subset of JSONPath syntax.
JSON string can be incomplete.

.. _JSONPath: http://goessner.net/articles/JsonPath/

.. function:: json_format(json) -> varchar
Expand Down
2 changes: 1 addition & 1 deletion velox/functions/prestosql/json/SIMDJsonExtractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ simdjson::error_code simdJsonExtract(
SIMDJsonExtractor& extractor,
TConsumer&& consumer) {
simdjson::padded_string paddedJson(json.data(), json.size());
SIMDJSON_ASSIGN_OR_RAISE(auto jsonDoc, simdjsonParse(paddedJson));
SIMDJSON_ASSIGN_OR_RAISE(auto jsonDoc, simdjsonParse(paddedJson, true));

if (extractor.isRootOnlyPath()) {
// If the path is just to return the original object, call consumer on the
Expand Down
6 changes: 5 additions & 1 deletion velox/functions/prestosql/json/SIMDJsonUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,12 @@ void simdjsonErrorsToExceptions(
}

simdjson::simdjson_result<simdjson::ondemand::document> simdjsonParse(
const simdjson::padded_string_view& json) {
const simdjson::padded_string_view& json,
bool allowIncompleteJson) {
thread_local simdjson::ondemand::parser parser;
if (allowIncompleteJson) {
return parser.iterate_allow_incomplete_json(json);
}
return parser.iterate(json);
}

Expand Down
3 changes: 2 additions & 1 deletion velox/functions/prestosql/json/SIMDJsonUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ void simdjsonErrorsToExceptions(

/// Parse the input json string using a thread local on demand parser.
simdjson::simdjson_result<simdjson::ondemand::document> simdjsonParse(
const simdjson::padded_string_view& json);
const simdjson::padded_string_view& json,
bool allowIncompleteJson = false);

} // namespace facebook::velox
5 changes: 5 additions & 0 deletions velox/functions/prestosql/json/SIMDJsonWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,13 @@

#pragma once

// Enable optional handling of incomplete json in simdjson library.
#define SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON

#if __has_include("simdjson/singleheader/simdjson.h")
#include "simdjson/singleheader/simdjson.h"
#else
#include "simdjson.h"
#endif

#undef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON
13 changes: 13 additions & 0 deletions velox/functions/prestosql/tests/JsonFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,19 @@ TEST_F(JsonFunctionsTest, invalidPath) {
VELOX_ASSERT_THROW(jsonSize(R"({"k1":"v1)", "$.k1]"), "Invalid JSON path");
}

TEST_F(JsonFunctionsTest, jsonExtractIncomplete) {
auto run = [&](std::optional<std::string> json,
const std::string& func,
const std::string& path) {
return evaluateOnce<std::string>(
fmt::format("{}(c0, c1)", func),
makeRowVector(
{makeJsonVector(json), makeFlatVector<std::string>({path})}));
};
EXPECT_EQ("123", run("{\"a\": 123, \"b", "json_extract", "$.a"));
EXPECT_EQ("123", run("{\"a\": 123, \"b", "json_extract_scalar", "$.a"));
}

TEST_F(JsonFunctionsTest, jsonExtract) {
auto jsonExtract = [&](std::optional<std::string> json,
const std::string& path) {
Expand Down

0 comments on commit 6f085c7

Please sign in to comment.