From 05118890ecbe8008786a73efa46235f309d39f1d Mon Sep 17 00:00:00 2001 From: David Li Date: Mon, 1 Jun 2026 10:34:42 +0900 Subject: [PATCH 1/2] test(go): at least exercise all auth methods --- go/docs/spark.md | 40 +++++++----- go/internal/connectimpl/client.go | 3 +- go/validation/tests/test_auth.py | 105 ++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 18 deletions(-) create mode 100644 go/validation/tests/test_auth.py diff --git a/go/docs/spark.md b/go/docs/spark.md index de62cef..6d58610 100644 --- a/go/docs/spark.md +++ b/go/docs/spark.md @@ -68,38 +68,45 @@ Reserved characters in URI elements must be URI-encoded. For example, `@` become These parameters can be specified in the URI as query parameters, or as connection parameters: `spark.api` (query parameter: `api`) -: **Values**: `connect`, `livy`, or `thrift+binary`. +: **Values**: `connect`, `livy`, `thrift+binary`, or `thrift+http`. - How to connect to Spark. + The protocol used to connect to Spark. - | Value | Backend | - |-----------------|------------------------| - | `connect` | Spark Connect | - | `livy` | Apache Livy | - | `thrift+binary` | HiveServer2 (over TCP) | + | Value | Backend | + |-----------------|--------------------------------| + | `connect` | Spark Connect | + | `livy` | Apache Livy | + | `thrift+binary` | HiveServer2 Thrift (over TCP) | + | `thrift+http` | HiveServer2 Thrift (over HTTP) | `spark.auth_type` (query parameter: `auth_type`) : **Values**: `sql`, `spark`, or `pyspark`. How to authenticate to Spark. - | Auth Type | Applicable Backends | - |-------------|---------------------| - | `aws_sigv4` | `livy` | - | `basic` | `connect`, `livy` | - | `none` | `connect`, `livy` | - | `nosasl` | `thrift+binary` | - | `plain` | `thrift+binary` | - | `token` | `connect` | + | Auth Type | Applicable Backends | Description | + |-------------|--------------------------------|---------------------------| + | `aws_sigv4` | `livy` | Use AWS SDK | + | `basic` | `livy` | Username/password | + | `ldap` | `thrift+binary`, `thrift+http` | Not yet implemented | + | `kerberos` | `thrift+binary`, `thrift+http` | Not yet implemented | + | `none` | `connect`, `livy` | No authentication | + | `nosasl` | `thrift+binary`, `thrift+http` | No authentication | + | `plain` | `thrift+binary`, `thrift+http` | Username/password | + | `token` | `connect` | Username/password (token) | `spark.livy.session_kind` (query parameter: `livy.session_kind`) : **Values**: `sql`, `spark`, or `pyspark`. For the Livy backend, what kind of session to create. + :::{warning} + Currently only `sql` is tested/supported. + ::: + ## Limitations -Different backends have limitations; some limitations related to data type support are also noted below. +Different backends have limitations; some limitations related to data type support are also noted further below. ### HiveServer2/Thrift Protocol @@ -109,6 +116,7 @@ Different backends have limitations; some limitations related to data type suppo ### Livy - Only the first 1000 rows of a result set can be fetched. This can be tuned by configuring Spark with `spark.sql.repl.eagerEval.maxNumRows`. +- In general, we have found that performance is worse than with Spark Connect or HiveServer2. ## Feature & Type Support diff --git a/go/internal/connectimpl/client.go b/go/internal/connectimpl/client.go index 0351384..362b101 100644 --- a/go/internal/connectimpl/client.go +++ b/go/internal/connectimpl/client.go @@ -39,7 +39,7 @@ type ConnectionOpts struct { // Host is "hostname" or "hostname:port". Host string - AuthType AuthType + AuthType AuthType // not used Username string // Token is the OAuth2 bearer token used when AuthType is AuthTypeToken. // The spark-connect-go client enables TLS when a token is present. @@ -74,7 +74,6 @@ func NewClient(ctx context.Context, opts ConnectionOpts, sessionConfig map[strin return &connectClient{session: session}, nil } -// TODO(lidavidm): AuthType isn't used above, and this puts username but no password? func buildConnectionString(opts ConnectionOpts) string { var b strings.Builder b.WriteString("sc://") diff --git a/go/validation/tests/test_auth.py b/go/validation/tests/test_auth.py new file mode 100644 index 0000000..5ae2bd3 --- /dev/null +++ b/go/validation/tests/test_auth.py @@ -0,0 +1,105 @@ +# Copyright (c) 2026 ADBC Drivers Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import adbc_driver_manager.dbapi +import pytest + +from . import spark + + +def pytest_generate_tests(metafunc) -> None: + quirks = spark.get_quirks(metafunc.config.getoption("vendor_version")) + driver_param = f"{quirks.name}:{quirks.short_version}" + combinations = [pytest.param(driver_param, id=driver_param)] + metafunc.parametrize( + "driver", + combinations, + scope="module", + indirect=["driver"], + ) + + +def test_auth(subtests, driver, driver_path): + all_options = { + f"auth_type={t}" + for t in [ + "aws_sigv4", + "basic", + "ldap", + "kerberos", + "none", + "nosasl", + "plain", + "token", + ] + } + + if driver.short_version.endswith("-thrifthttp") or driver.short_version.endswith( + "-thrift" + ): + # ensure none leads to auth failure, and that all other types are not accepted + if driver.short_version.endswith("-thrift"): + uri = os.environ["SPARK_URI"] + else: + uri = os.environ["SPARK_THRIFTHTTP_URI"] + orig = "auth_type=plain" + cases = [ + ("auth_type=nosasl", "Could not open HiveServer2 session"), + ("auth_type=ldap", "auth type 'ldap' has not been implemented"), + ("auth_type=kerberos", "auth type 'kerberos' has not been implemented"), + ] + elif driver.short_version.endswith("-connect"): + uri = os.environ["SPARK_CONNECT_URI"] + orig = "auth_type=none" + cases = [ + # Spark Connect client forces TLS + ("auth_type=token", "Could not execute query"), + ] + elif driver.short_version.endswith("-livy"): + uri = os.environ["SPARK_LIVY_URI"] + orig = "auth_type=basic" + cases = [ + ("auth_type=aws_sigv4", "failed to sign request"), + ] + else: + raise NotImplementedError(driver.short_version) + + for option in all_options: + seen = set([orig] + [c[0] for c in cases]) + if option not in seen: + cases.append( + ( + option, + f"invalid option value '{option[10:]}' for option spark.auth_type", + ) + ) + cases.sort(key=lambda c: c[0]) + + for replacement, error_message in cases: + with subtests.test(auth_type=replacement[10:]): + new_uri = uri.replace(orig, replacement) + with pytest.raises(adbc_driver_manager.Error, match=error_message): + with adbc_driver_manager.dbapi.connect( + driver=driver_path, + uri=new_uri, + autocommit=True, + db_kwargs={ + "username": "spark", + "password": "spark", + }, + ) as conn: + with conn.cursor() as cursor: + cursor.execute("SELECT 1") From add6303a9ffc2c3cd0fd2f69600df2e7d5ba1f6f Mon Sep 17 00:00:00 2001 From: David Li Date: Mon, 1 Jun 2026 11:18:27 +0900 Subject: [PATCH 2/2] fix --- go/validation/tests/test_auth.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/go/validation/tests/test_auth.py b/go/validation/tests/test_auth.py index 5ae2bd3..d581e39 100644 --- a/go/validation/tests/test_auth.py +++ b/go/validation/tests/test_auth.py @@ -89,17 +89,22 @@ def test_auth(subtests, driver, driver_path): cases.sort(key=lambda c: c[0]) for replacement, error_message in cases: + new_uri = uri.replace(orig, replacement) + if replacement == "auth_type=nosasl": + kwargs = {} + else: + kwargs = { + "username": "spark", + "password": "spark", + } + with subtests.test(auth_type=replacement[10:]): - new_uri = uri.replace(orig, replacement) with pytest.raises(adbc_driver_manager.Error, match=error_message): with adbc_driver_manager.dbapi.connect( driver=driver_path, uri=new_uri, autocommit=True, - db_kwargs={ - "username": "spark", - "password": "spark", - }, + db_kwargs=kwargs, ) as conn: with conn.cursor() as cursor: cursor.execute("SELECT 1")