From 05118890ecbe8008786a73efa46235f309d39f1d Mon Sep 17 00:00:00 2001
From: David Li
Date: Mon, 1 Jun 2026 10:34:42 +0900
Subject: [PATCH 1/2] test(go): at least exercise all auth methods
---
go/docs/spark.md | 40 +++++++-----
go/internal/connectimpl/client.go | 3 +-
go/validation/tests/test_auth.py | 105 ++++++++++++++++++++++++++++++
3 files changed, 130 insertions(+), 18 deletions(-)
create mode 100644 go/validation/tests/test_auth.py
diff --git a/go/docs/spark.md b/go/docs/spark.md
index de62cef..6d58610 100644
--- a/go/docs/spark.md
+++ b/go/docs/spark.md
@@ -68,38 +68,45 @@ Reserved characters in URI elements must be URI-encoded. For example, `@` become
These parameters can be specified in the URI as query parameters, or as connection parameters:
`spark.api` (query parameter: `api`)
-: **Values**: `connect`, `livy`, or `thrift+binary`.
+: **Values**: `connect`, `livy`, `thrift+binary`, or `thrift+http`.
- How to connect to Spark.
+ The protocol used to connect to Spark.
- | Value | Backend |
- |-----------------|------------------------|
- | `connect` | Spark Connect |
- | `livy` | Apache Livy |
- | `thrift+binary` | HiveServer2 (over TCP) |
+ | Value | Backend |
+ |-----------------|--------------------------------|
+ | `connect` | Spark Connect |
+ | `livy` | Apache Livy |
+ | `thrift+binary` | HiveServer2 Thrift (over TCP) |
+ | `thrift+http` | HiveServer2 Thrift (over HTTP) |
`spark.auth_type` (query parameter: `auth_type`)
: **Values**: `sql`, `spark`, or `pyspark`.
How to authenticate to Spark.
- | Auth Type | Applicable Backends |
- |-------------|---------------------|
- | `aws_sigv4` | `livy` |
- | `basic` | `connect`, `livy` |
- | `none` | `connect`, `livy` |
- | `nosasl` | `thrift+binary` |
- | `plain` | `thrift+binary` |
- | `token` | `connect` |
+ | Auth Type | Applicable Backends | Description |
+ |-------------|--------------------------------|---------------------------|
+ | `aws_sigv4` | `livy` | Use AWS SDK |
+ | `basic` | `livy` | Username/password |
+ | `ldap` | `thrift+binary`, `thrift+http` | Not yet implemented |
+ | `kerberos` | `thrift+binary`, `thrift+http` | Not yet implemented |
+ | `none` | `connect`, `livy` | No authentication |
+ | `nosasl` | `thrift+binary`, `thrift+http` | No authentication |
+ | `plain` | `thrift+binary`, `thrift+http` | Username/password |
+ | `token` | `connect` | Username/password (token) |
`spark.livy.session_kind` (query parameter: `livy.session_kind`)
: **Values**: `sql`, `spark`, or `pyspark`.
For the Livy backend, what kind of session to create.
+ :::{warning}
+ Currently only `sql` is tested/supported.
+ :::
+
## Limitations
-Different backends have limitations; some limitations related to data type support are also noted below.
+Different backends have limitations; some limitations related to data type support are also noted further below.
### HiveServer2/Thrift Protocol
@@ -109,6 +116,7 @@ Different backends have limitations; some limitations related to data type suppo
### Livy
- Only the first 1000 rows of a result set can be fetched. This can be tuned by configuring Spark with `spark.sql.repl.eagerEval.maxNumRows`.
+- In general, we have found that performance is worse than with Spark Connect or HiveServer2.
## Feature & Type Support
diff --git a/go/internal/connectimpl/client.go b/go/internal/connectimpl/client.go
index 0351384..362b101 100644
--- a/go/internal/connectimpl/client.go
+++ b/go/internal/connectimpl/client.go
@@ -39,7 +39,7 @@ type ConnectionOpts struct {
// Host is "hostname" or "hostname:port".
Host string
- AuthType AuthType
+ AuthType AuthType // not used
Username string
// Token is the OAuth2 bearer token used when AuthType is AuthTypeToken.
// The spark-connect-go client enables TLS when a token is present.
@@ -74,7 +74,6 @@ func NewClient(ctx context.Context, opts ConnectionOpts, sessionConfig map[strin
return &connectClient{session: session}, nil
}
-// TODO(lidavidm): AuthType isn't used above, and this puts username but no password?
func buildConnectionString(opts ConnectionOpts) string {
var b strings.Builder
b.WriteString("sc://")
diff --git a/go/validation/tests/test_auth.py b/go/validation/tests/test_auth.py
new file mode 100644
index 0000000..5ae2bd3
--- /dev/null
+++ b/go/validation/tests/test_auth.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2026 ADBC Drivers Contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import adbc_driver_manager.dbapi
+import pytest
+
+from . import spark
+
+
+def pytest_generate_tests(metafunc) -> None:
+ quirks = spark.get_quirks(metafunc.config.getoption("vendor_version"))
+ driver_param = f"{quirks.name}:{quirks.short_version}"
+ combinations = [pytest.param(driver_param, id=driver_param)]
+ metafunc.parametrize(
+ "driver",
+ combinations,
+ scope="module",
+ indirect=["driver"],
+ )
+
+
+def test_auth(subtests, driver, driver_path):
+ all_options = {
+ f"auth_type={t}"
+ for t in [
+ "aws_sigv4",
+ "basic",
+ "ldap",
+ "kerberos",
+ "none",
+ "nosasl",
+ "plain",
+ "token",
+ ]
+ }
+
+ if driver.short_version.endswith("-thrifthttp") or driver.short_version.endswith(
+ "-thrift"
+ ):
+ # ensure none leads to auth failure, and that all other types are not accepted
+ if driver.short_version.endswith("-thrift"):
+ uri = os.environ["SPARK_URI"]
+ else:
+ uri = os.environ["SPARK_THRIFTHTTP_URI"]
+ orig = "auth_type=plain"
+ cases = [
+ ("auth_type=nosasl", "Could not open HiveServer2 session"),
+ ("auth_type=ldap", "auth type 'ldap' has not been implemented"),
+ ("auth_type=kerberos", "auth type 'kerberos' has not been implemented"),
+ ]
+ elif driver.short_version.endswith("-connect"):
+ uri = os.environ["SPARK_CONNECT_URI"]
+ orig = "auth_type=none"
+ cases = [
+ # Spark Connect client forces TLS
+ ("auth_type=token", "Could not execute query"),
+ ]
+ elif driver.short_version.endswith("-livy"):
+ uri = os.environ["SPARK_LIVY_URI"]
+ orig = "auth_type=basic"
+ cases = [
+ ("auth_type=aws_sigv4", "failed to sign request"),
+ ]
+ else:
+ raise NotImplementedError(driver.short_version)
+
+ for option in all_options:
+ seen = set([orig] + [c[0] for c in cases])
+ if option not in seen:
+ cases.append(
+ (
+ option,
+ f"invalid option value '{option[10:]}' for option spark.auth_type",
+ )
+ )
+ cases.sort(key=lambda c: c[0])
+
+ for replacement, error_message in cases:
+ with subtests.test(auth_type=replacement[10:]):
+ new_uri = uri.replace(orig, replacement)
+ with pytest.raises(adbc_driver_manager.Error, match=error_message):
+ with adbc_driver_manager.dbapi.connect(
+ driver=driver_path,
+ uri=new_uri,
+ autocommit=True,
+ db_kwargs={
+ "username": "spark",
+ "password": "spark",
+ },
+ ) as conn:
+ with conn.cursor() as cursor:
+ cursor.execute("SELECT 1")
From add6303a9ffc2c3cd0fd2f69600df2e7d5ba1f6f Mon Sep 17 00:00:00 2001
From: David Li
Date: Mon, 1 Jun 2026 11:18:27 +0900
Subject: [PATCH 2/2] fix
---
go/validation/tests/test_auth.py | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/go/validation/tests/test_auth.py b/go/validation/tests/test_auth.py
index 5ae2bd3..d581e39 100644
--- a/go/validation/tests/test_auth.py
+++ b/go/validation/tests/test_auth.py
@@ -89,17 +89,22 @@ def test_auth(subtests, driver, driver_path):
cases.sort(key=lambda c: c[0])
for replacement, error_message in cases:
+ new_uri = uri.replace(orig, replacement)
+ if replacement == "auth_type=nosasl":
+ kwargs = {}
+ else:
+ kwargs = {
+ "username": "spark",
+ "password": "spark",
+ }
+
with subtests.test(auth_type=replacement[10:]):
- new_uri = uri.replace(orig, replacement)
with pytest.raises(adbc_driver_manager.Error, match=error_message):
with adbc_driver_manager.dbapi.connect(
driver=driver_path,
uri=new_uri,
autocommit=True,
- db_kwargs={
- "username": "spark",
- "password": "spark",
- },
+ db_kwargs=kwargs,
) as conn:
with conn.cursor() as cursor:
cursor.execute("SELECT 1")