Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/build_python_connect.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ jobs:
env:
SPARK_TESTING: 1
SPARK_CONNECT_TESTING_REMOTE: sc://localhost
# Increase socket timeout for CI environment reliability
SPARK_AUTH_SOCKET_TIMEOUT: 60
run: |
# Make less noisy
cp conf/log4j2.properties.template conf/log4j2.properties
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/build_python_connect35.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ name: Build / Python-only, Connect-only (master-server, branch-3.5-client, Pytho
on:
schedule:
- cron: '0 21 * * *'
pull_request:
branches: [ master, main ]
workflow_dispatch:

jobs:
Expand Down Expand Up @@ -82,6 +84,8 @@ jobs:
SPARK_TESTING: 1
SPARK_SKIP_CONNECT_COMPAT_TESTS: 1
SPARK_CONNECT_TESTING_REMOTE: sc://localhost
# Increase socket timeout for CI environment reliability
SPARK_AUTH_SOCKET_TIMEOUT: 60
run: |
# Make less noisy
cp conf/log4j2.properties.template conf/log4j2.properties
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,28 @@ def process(df_id, batch_id): # type: ignore[no-untyped-def]
f"{log_name} Completed batch {batch_id} with DF id {df_id} and session id {session_id}"
)

def create_spark_session_with_retry(connect_url, session_id, max_retries=3):
"""Create Spark Connect session with retry logic for better reliability in CI environments."""
import time

for attempt in range(max_retries):
try:
print(f"{log_name} Attempting to connect (attempt {attempt + 1}/{max_retries})")
spark_connect_session = SparkSession.builder.remote(connect_url).getOrCreate()
assert spark_connect_session.session_id == session_id
print(f"{log_name} Successfully connected to Spark Connect server")
return spark_connect_session
except Exception as e:
print(f"{log_name} Connection attempt {attempt + 1} failed: {str(e)}")
if attempt < max_retries - 1:
# Exponential backoff: 1s, 2s, 4s
wait_time = 2 ** attempt
print(f"{log_name} Retrying in {wait_time} seconds...")
time.sleep(wait_time)
else:
print(f"{log_name} All connection attempts failed")
raise

try:
check_python_version(infile)

Expand All @@ -68,8 +90,9 @@ def process(df_id, batch_id): # type: ignore[no-untyped-def]

# To attach to the existing SparkSession, we're setting the session_id in the URL.
connect_url = connect_url + ";session_id=" + session_id
spark_connect_session = SparkSession.builder.remote(connect_url).getOrCreate()
assert spark_connect_session.session_id == session_id

# Use retry logic for better reliability in CI environments
spark_connect_session = create_spark_session_with_retry(connect_url, session_id)
spark = spark_connect_session

func = worker.read_command(pickle_ser, infile)
Expand Down