diff --git a/ydb/core/external_sources/s3/ut/s3_aws_credentials_ut.cpp b/ydb/core/external_sources/s3/ut/s3_aws_credentials_ut.cpp index 75a4e4c44bba..b5c7eb899284 100644 --- a/ydb/core/external_sources/s3/ut/s3_aws_credentials_ut.cpp +++ b/ydb/core/external_sources/s3/ut/s3_aws_credentials_ut.cpp @@ -252,6 +252,88 @@ Y_UNIT_TEST_SUITE(S3AwsCredentials) { } } + + Y_UNIT_TEST(TestInsertEscaping) { + const TString externalDataSourceName = "/Root/external_data_source"; + auto s3ActorsFactory = NYql::NDq::CreateS3ActorsFactory(); + auto kikimr = MakeKikimrRunner(true, nullptr, nullptr, std::nullopt, s3ActorsFactory); + + auto tc = kikimr->GetTableClient(); + auto session = tc.CreateSession().GetValueSync().GetSession(); + const TString query = fmt::format(R"( + CREATE OBJECT id (TYPE SECRET) WITH (value=`minio`); + CREATE OBJECT key (TYPE SECRET) WITH (value=`minio123`); + CREATE EXTERNAL DATA SOURCE `{external_source}` WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="{location}", + AUTH_METHOD="AWS", + AWS_ACCESS_KEY_ID_SECRET_NAME="id", + AWS_SECRET_ACCESS_KEY_SECRET_NAME="key", + AWS_REGION="ru-central-1" + ); + )", + "external_source"_a = externalDataSourceName, + "location"_a = "localhost:" + GetExternalPort("minio", "9000") + "/datalake/" + ); + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + + WaitBucket(kikimr, externalDataSourceName); + + auto db = kikimr->GetQueryClient(); + + TString path = TStringBuilder() << "exp_folder/some_" << EscapeC(GetSymbolsString(' ', '~', "*?{}`")) << "\\`"; + + { + // NB: AtomicUploadCommit = "false" because in minio ListMultipartUploads by prefix is not supported + auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"( + PRAGMA s3.AtomicUploadCommit = "false"; + INSERT INTO `{external_source}`.`{path}/` WITH (FORMAT = "csv_with_names") + SELECT * FROM `{external_source}`.`/a/` WITH ( + format="json_each_row", + schema( + key Utf8 NOT NULL, + value Utf8 NOT NULL + ) + ) + )", "external_source"_a = externalDataSourceName, "path"_a = path)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(!scriptExecutionOperation.Metadata().ExecutionId.empty()); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + } + + { + auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"( + SELECT * FROM `{external_source}`.`{path}/` WITH ( + format="csv_with_names", + schema( + key Utf8 NOT NULL, + value Utf8 NOT NULL + ) + ) + )", "external_source"_a = externalDataSourceName, "path"_a = path)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString()); + UNIT_ASSERT(!scriptExecutionOperation.Metadata().ExecutionId.empty()); + + NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver()); + UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString()); + TFetchScriptResultsResult results = db.FetchScriptResults(scriptExecutionOperation.Id(), 0).ExtractValueSync(); + UNIT_ASSERT_C(results.IsSuccess(), results.GetIssues().ToString()); + + TResultSetParser resultSet(results.ExtractResultSet()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnsCount(), 2); + UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 2); + UNIT_ASSERT(resultSet.TryNextRow()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "1"); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "trololo"); + UNIT_ASSERT(resultSet.TryNextRow()); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "2"); + UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "hello world"); + } + } } } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/federated_query/common/common.cpp b/ydb/core/kqp/ut/federated_query/common/common.cpp index bd95dd3a6246..508862dd7313 100644 --- a/ydb/core/kqp/ut/federated_query/common/common.cpp +++ b/ydb/core/kqp/ut/federated_query/common/common.cpp @@ -3,6 +3,16 @@ #include namespace NKikimr::NKqp::NFederatedQueryTest { + TString GetSymbolsString(char start, char end, const TString& skip) { + TStringBuilder result; + for (char symbol = start; symbol <= end; ++symbol) { + if (skip.Contains(symbol)) { + continue; + } + result << symbol; + } + return result; + } NYdb::NQuery::TScriptExecutionOperation WaitScriptExecutionOperation(const NYdb::TOperation::TOperationId& operationId, const NYdb::TDriver& ydbDriver) { NYdb::NOperation::TOperationClient client(ydbDriver); diff --git a/ydb/core/kqp/ut/federated_query/common/common.h b/ydb/core/kqp/ut/federated_query/common/common.h index 06be0e9a5a41..853c8fe1a640 100644 --- a/ydb/core/kqp/ut/federated_query/common/common.h +++ b/ydb/core/kqp/ut/federated_query/common/common.h @@ -8,6 +8,8 @@ namespace NKikimr::NKqp::NFederatedQueryTest { using namespace NKikimr::NKqp; + TString GetSymbolsString(char start, char end, const TString& skip = ""); + NYdb::NQuery::TScriptExecutionOperation WaitScriptExecutionOperation( const NYdb::TOperation::TOperationId& operationId, const NYdb::TDriver& ydbDriver); diff --git a/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp b/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp index 3fcf790121e9..bda7bc0bebde 100644 --- a/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp +++ b/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp @@ -20,17 +20,6 @@ using namespace NTestUtils; using namespace fmt::literals; Y_UNIT_TEST_SUITE(KqpFederatedQuery) { - TString GetSymbolsString(char start, char end, const TString& skip = "") { - TStringBuilder result; - for (char symbol = start; symbol <= end; ++symbol) { - if (skip.Contains(symbol)) { - continue; - } - result << symbol; - } - return result; - } - Y_UNIT_TEST(ExecuteScriptWithExternalTableResolve) { const TString externalDataSourceName = "/Root/external_data_source"; const TString externalTableName = "/Root/test_binding_resolve"; diff --git a/ydb/library/yql/providers/common/http_gateway/yql_aws_signature.cpp b/ydb/library/yql/providers/common/http_gateway/yql_aws_signature.cpp index 699d43f4f119..eea7e35507d4 100644 --- a/ydb/library/yql/providers/common/http_gateway/yql_aws_signature.cpp +++ b/ydb/library/yql/providers/common/http_gateway/yql_aws_signature.cpp @@ -138,11 +138,11 @@ TString TAwsSignature::HashSHA256(TStringBuf data) { return to_lower(HexEncode(hash, SHA256_DIGEST_LENGTH)); } -TString TAwsSignature::UriEncode(const TStringBuf input, bool encodeSlash) { +TString TAwsSignature::UriEncode(const TStringBuf input, bool encodeSlash, bool encodePercent) { TStringStream result; for (const char ch : input) { if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '_' || - ch == '-' || ch == '~' || ch == '.') { + ch == '-' || ch == '~' || ch == '.' || (ch == '%' && !encodePercent)) { result << ch; } else if (ch == '/') { if (encodeSlash) { @@ -175,11 +175,10 @@ void TAwsSignature::PrepareCgiParameters() { auto printSingleParam = [&canonicalCgi](const TString& key, const TVector& values) { auto it = values.begin(); - canonicalCgi << UriEncode(key, true) << "=" << UriEncode(*it, true); + canonicalCgi << UriEncode(key, true, true) << "=" << UriEncode(*it, true, true); while (++it != values.end()) { - canonicalCgi << "&" << UriEncode(key, true) << "=" << UriEncode(*it, true); + canonicalCgi << "&" << UriEncode(key, true, true) << "=" << UriEncode(*it, true, true); } - }; auto it = sortedCgi.begin(); diff --git a/ydb/library/yql/providers/common/http_gateway/yql_aws_signature.h b/ydb/library/yql/providers/common/http_gateway/yql_aws_signature.h index 7a5032814b5b..469c03e10431 100644 --- a/ydb/library/yql/providers/common/http_gateway/yql_aws_signature.h +++ b/ydb/library/yql/providers/common/http_gateway/yql_aws_signature.h @@ -32,7 +32,7 @@ struct TAwsSignature { static TString HashSHA256(TStringBuf data); - static TString UriEncode(const TStringBuf input, bool encodeSlash = false); + static TString UriEncode(const TStringBuf input, bool encodeSlash = false, bool encodePercent = false); void PrepareCgiParameters(); diff --git a/ydb/library/yql/providers/common/http_gateway/yql_aws_signature_ut.cpp b/ydb/library/yql/providers/common/http_gateway/yql_aws_signature_ut.cpp index b3f053bf216f..bf6e3e01a8bb 100644 --- a/ydb/library/yql/providers/common/http_gateway/yql_aws_signature_ut.cpp +++ b/ydb/library/yql/providers/common/http_gateway/yql_aws_signature_ut.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace NYql { @@ -68,5 +69,15 @@ Y_UNIT_TEST_SUITE(TAwsSignature) { UNIT_ASSERT_VALUES_EQUAL(signature1.GetAmzDate(), signature2.GetAmzDate()); UNIT_ASSERT_VALUES_EQUAL(signature1.GetAuthorization(), signature2.GetAuthorization()); } + + Y_UNIT_TEST(SignWithEscaping) { + auto time = TInstant::FromValue(30); + NYql::TAwsSignature signature("GET", UrlEscapeRet("http://os.com/my-bucket/ !\"#$%&'()+,-./0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz|~/", true), "application/json", {}, "key", "pwd", time); + UNIT_ASSERT_VALUES_EQUAL(signature.GetContentType(), "application/json"); + UNIT_ASSERT_VALUES_EQUAL(signature.GetXAmzContentSha256(), "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); + UNIT_ASSERT_VALUES_EQUAL(signature.GetAuthorization(), "AWS4-HMAC-SHA256 Credential=/19700101///aws4_request, SignedHeaders=content-type;host;x-amz-content-sha256;x-amz-date, Signature=21470c8f999941fdc785c508f0c55afa1a12735eddd868aa7276e532d687c436"); + UNIT_ASSERT_VALUES_UNEQUAL(signature.GetAmzDate(), ""); + } } // Y_UNIT_TEST_SUITE(TAwsSignature) + } // namespace NYql diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp index 4cf07f3f8e16..d569df2bd037 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp @@ -49,7 +49,7 @@ struct TCompleteMultipartUpload { } TString BuildUrl() const { - TUrlBuilder urlBuilder(Url); + NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url)); urlBuilder.AddUrlParam("uploadId", UploadId); return urlBuilder.Build(); } @@ -87,7 +87,7 @@ struct TListMultipartUploads { // This requirement will be fixed in the curl library // https://github.com/curl/curl/commit/fc76a24c53b08cdf6eec8ba787d8eac64651d56e // https://github.com/curl/curl/commit/c87920353883ef9d5aa952e724a8e2589d76add5 - TUrlBuilder urlBuilder(Url); + NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url)); if (KeyMarker) { urlBuilder.AddUrlParam("key-marker", KeyMarker); } @@ -114,7 +114,7 @@ struct TAbortMultipartUpload { } TString BuildUrl() const { - TUrlBuilder urlBuilder(Url); + NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url)); urlBuilder.AddUrlParam("uploadId", UploadId); return urlBuilder.Build(); } @@ -141,7 +141,7 @@ struct TListParts { // This requirement will be fixed in the curl library // https://github.com/curl/curl/commit/fc76a24c53b08cdf6eec8ba787d8eac64651d56e // https://github.com/curl/curl/commit/c87920353883ef9d5aa952e724a8e2589d76add5 - TUrlBuilder urlBuilder(Url); + NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url)); if (PartNumberMarker) { urlBuilder.AddUrlParam("part-number-marker", PartNumberMarker); } @@ -682,4 +682,4 @@ THolder MakeS3ApplicatorActor( ); } -} // namespace NYql::NDq \ No newline at end of file +} // namespace NYql::NDq diff --git a/ydb/library/yql/providers/s3/common/util.cpp b/ydb/library/yql/providers/s3/common/util.cpp index 074b116cc8c2..54c99fbad3fe 100644 --- a/ydb/library/yql/providers/s3/common/util.cpp +++ b/ydb/library/yql/providers/s3/common/util.cpp @@ -64,4 +64,34 @@ bool ValidateS3ReadWriteSchema(const TStructExprType* schemaStructRowType, TExpr return true; } +TUrlBuilder::TUrlBuilder(const TString& uri) + : MainUri(uri) +{} + +TUrlBuilder& TUrlBuilder::AddUrlParam(const TString& name, const TString& value) { + Params.emplace_back(name, value); + return *this; +} + +TString TUrlBuilder::Build() const { + if (Params.empty()) { + return MainUri; + } + + TStringBuilder result; + result << MainUri << "?"; + + TStringBuf separator = ""sv; + for (const auto& p : Params) { + result << separator << p.Name; + if (auto value = p.Value) { + Quote(value, ""); + result << "=" << value; + } + separator = "&"sv; + } + + return std::move(result); +} + } diff --git a/ydb/library/yql/providers/s3/common/util.h b/ydb/library/yql/providers/s3/common/util.h index d364e971078f..00df60e48f89 100644 --- a/ydb/library/yql/providers/s3/common/util.h +++ b/ydb/library/yql/providers/s3/common/util.h @@ -15,4 +15,22 @@ TString UrlEscapeRet(const TStringBuf from); bool ValidateS3ReadWriteSchema(const TStructExprType* schemaStructRowType, TExprContext& ctx); +class TUrlBuilder { + struct TParam { + TString Name; + TString Value; + }; + +public: + explicit TUrlBuilder(const TString& uri); + + TUrlBuilder& AddUrlParam(const TString& name, const TString& value = ""); + + TString Build() const; + +private: + std::vector Params; + TString MainUri; +}; + } diff --git a/ydb/library/yql/providers/s3/common/util_ut.cpp b/ydb/library/yql/providers/s3/common/util_ut.cpp index 2dcbf47ceef3..6e6a02e936e7 100644 --- a/ydb/library/yql/providers/s3/common/util_ut.cpp +++ b/ydb/library/yql/providers/s3/common/util_ut.cpp @@ -30,4 +30,37 @@ Y_UNIT_TEST_SUITE(TestS3UrlEscape) { } } +Y_UNIT_TEST_SUITE(TestUrlBuilder) { + Y_UNIT_TEST(UriOnly) { + TUrlBuilder builder("https://localhost/abc"); + UNIT_ASSERT_VALUES_EQUAL(builder.Build(), "https://localhost/abc"); + } + + Y_UNIT_TEST(Basic) { + TUrlBuilder builder("https://localhost/abc"); + builder.AddUrlParam("param1", "val1"); + builder.AddUrlParam("param2", "val2"); + + UNIT_ASSERT_VALUES_EQUAL(builder.Build(), "https://localhost/abc?param1=val1¶m2=val2"); + } + + Y_UNIT_TEST(BasicWithEncoding) { + auto url = TUrlBuilder("https://localhost/abc") + .AddUrlParam("param1", "=!@#$%^&*(){}[]\" ") + .AddUrlParam("param2", "val2") + .Build(); + + UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc?param1=%3D%21%40%23%24%25%5E%26%2A%28%29%7B%7D%5B%5D%22+¶m2=val2"); + } + + Y_UNIT_TEST(BasicWithAdditionalEncoding) { + auto url = TUrlBuilder("https://localhost/abc") + .AddUrlParam("param1", ":/?#[]@!$&\'()*+,;=") + .AddUrlParam("param2", "val2") + .Build(); + + UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc?param1=%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D¶m2=val2"); + } +} + } // namespace NYql::NS3Util diff --git a/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp b/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp index c12bd71f0613..ea05c077a71e 100644 --- a/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp +++ b/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp @@ -304,7 +304,7 @@ class TS3Lister : public IS3Lister { // This requirement will be fixed in the curl library // https://github.com/curl/curl/commit/fc76a24c53b08cdf6eec8ba787d8eac64651d56e // https://github.com/curl/curl/commit/c87920353883ef9d5aa952e724a8e2589d76add5 - TUrlBuilder urlBuilder(ctx.ListingRequest.Url); + NS3Util::TUrlBuilder urlBuilder(ctx.ListingRequest.Url); if (ctx.ContinuationToken.Defined()) { urlBuilder.AddUrlParam("continuation-token", *ctx.ContinuationToken); }