Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 93 additions & 10 deletions src/kaggle/api/kaggle_api_extended.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from __future__ import print_function

import csv
from datetime import datetime
from datetime import datetime, timezone
from enum import Enum
import io

Expand Down Expand Up @@ -616,6 +616,8 @@ def __init__(self, enable_oauth: bool = False):
self.enable_oauth = enable_oauth

def _is_retriable(self, e: HTTPError) -> bool:
if self._is_rate_limited(e):
return True
return (
issubclass(type(e), ConnectionError)
or issubclass(type(e), urllib3_exceptions.ConnectionError)
Expand All @@ -625,6 +627,48 @@ def _is_retriable(self, e: HTTPError) -> bool:
or issubclass(type(e), requests.exceptions.ConnectTimeout)
)

@staticmethod
def _is_rate_limited(e: Exception) -> bool:
"""Check if an HTTPError represents a 429 Too Many Requests response."""
return (
isinstance(e, HTTPError)
and hasattr(e, "response")
and e.response is not None
and e.response.status_code == 429
)

@staticmethod
def _get_retry_after_delay(response: Response) -> Optional[float]:
"""Parse the Retry-After header from an HTTP response.

Supports both integer seconds and HTTP-date formats per RFC 9110 §10.2.3.

Args:
response: The HTTP response object.

Returns:
The delay in seconds, or None if the header is absent or unparseable.
"""
retry_after = response.headers.get("Retry-After")
if retry_after is None:
return None

# Try integer seconds first
try:
return max(0.0, float(retry_after))
except ValueError:
pass

# Try HTTP-date format (e.g. "Wed, 26 Mar 2026 00:00:00 GMT")
try:
retry_date = datetime.strptime(retry_after, "%a, %d %b %Y %H:%M:%S %Z")
delay = (retry_date - datetime.now(timezone.utc).replace(tzinfo=None)).total_seconds()
return max(0.0, delay)
except (ValueError, TypeError):
pass

return None

def _calculate_backoff_delay(self, attempt, initial_delay_millis, retry_multiplier, randomness_factor):
delay_ms = initial_delay_millis * (retry_multiplier**attempt)
random_wait_ms = int(random() - 0.5) * 2 * delay_ms * randomness_factor
Expand All @@ -647,9 +691,28 @@ def retriable_func(*args):
except Exception as e:
if type(e) is HTTPError:
if self._is_retriable(e) and i < max_retries:
total_delay = self._calculate_backoff_delay(
i, initial_delay_millis, retry_multiplier, randomness_factor
)
# Use Retry-After header for 429 responses when available
if self._is_rate_limited(e):
retry_delay = self._get_retry_after_delay(e.response)
if retry_delay is not None:
total_delay = retry_delay
self.logger.info(
"Rate limited (429). Retry-After: %.1f seconds (attempt %d/%d)",
total_delay, i, max_retries,
)
else:
total_delay = self._calculate_backoff_delay(
i, initial_delay_millis, retry_multiplier, randomness_factor
)
self.logger.info(
"Rate limited (429). No valid Retry-After header; "
"backing off %.1f seconds (attempt %d/%d)",
total_delay, i, max_retries,
)
else:
total_delay = self._calculate_backoff_delay(
i, initial_delay_millis, retry_multiplier, randomness_factor
)
print("Request failed: %s. Will retry in %2.1f seconds" % (e, total_delay))
time.sleep(total_delay)
continue
Expand Down Expand Up @@ -2767,6 +2830,7 @@ def download_file(
requests.exceptions.ConnectionError,
requests.exceptions.Timeout,
requests.exceptions.ChunkedEncodingError,
requests.exceptions.HTTPError,
urllib3_exceptions.ProtocolError,
urllib3_exceptions.ReadTimeoutError,
OSError,
Expand All @@ -2782,12 +2846,31 @@ def download_file(
print(f"You can resume by running the same command again.")
raise

# Calculate backoff time (exponential with jitter)
backoff_time = min(2**retry_count + random(), 60) # Cap at 60 seconds

if not quiet:
print(f"\nConnection error: {type(e).__name__}: {str(e)}")
print(f"Retrying in {backoff_time:.1f} seconds... (attempt {retry_count}/{max_retries})")
# Use Retry-After header for 429 responses when available
if self._is_rate_limited(e):
retry_delay = self._get_retry_after_delay(e.response)
if retry_delay is not None:
backoff_time = retry_delay
self.logger.info(
"Rate limited (429). Retry-After: %.1f seconds (attempt %d/%d)",
backoff_time, retry_count, max_retries,
)
else:
backoff_time = min(2**retry_count + random(), 60)
self.logger.info(
"Rate limited (429). No valid Retry-After header; "
"backing off %.1f seconds (attempt %d/%d)",
backoff_time, retry_count, max_retries,
)
if not quiet:
print(f"\nRate limited (HTTP 429). Retrying in {backoff_time:.1f} seconds... "
f"(attempt {retry_count}/{max_retries})")
else:
# Calculate backoff time (exponential with jitter)
backoff_time = min(2**retry_count + random(), 60) # Cap at 60 seconds
if not quiet:
print(f"\nConnection error: {type(e).__name__}: {str(e)}")
print(f"Retrying in {backoff_time:.1f} seconds... (attempt {retry_count}/{max_retries})")

time.sleep(backoff_time)

Expand Down
172 changes: 172 additions & 0 deletions tests/test_retry_after.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
# coding=utf-8
"""Unit tests for Retry-After header handling (issue #938).

These tests use unittest.mock to simulate HTTP 429 responses and verify
that the retry logic respects the Retry-After header.
"""

import unittest
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, patch

from requests.exceptions import HTTPError
from requests.models import Response

from kaggle.api.kaggle_api_extended import KaggleApi


class TestIsRateLimited(unittest.TestCase):
"""Tests for KaggleApi._is_rate_limited()."""

def test_returns_true_for_429_http_error(self):
response = Response()
response.status_code = 429
error = HTTPError(response=response)
self.assertTrue(KaggleApi._is_rate_limited(error))

def test_returns_false_for_500_http_error(self):
response = Response()
response.status_code = 500
error = HTTPError(response=response)
self.assertFalse(KaggleApi._is_rate_limited(error))

def test_returns_false_for_non_http_error(self):
self.assertFalse(KaggleApi._is_rate_limited(ValueError("oops")))

def test_returns_false_for_none_response(self):
error = HTTPError(response=None)
self.assertFalse(KaggleApi._is_rate_limited(error))


class TestGetRetryAfterDelay(unittest.TestCase):
"""Tests for KaggleApi._get_retry_after_delay()."""

def _make_response(self, retry_after_value=None):
response = Response()
response.status_code = 429
if retry_after_value is not None:
response.headers["Retry-After"] = retry_after_value
return response

def test_returns_none_when_header_absent(self):
response = self._make_response()
self.assertIsNone(KaggleApi._get_retry_after_delay(response))

def test_parses_integer_seconds(self):
response = self._make_response("120")
self.assertAlmostEqual(KaggleApi._get_retry_after_delay(response), 120.0)

def test_parses_float_seconds(self):
response = self._make_response("30.5")
self.assertAlmostEqual(KaggleApi._get_retry_after_delay(response), 30.5)

def test_negative_value_clamped_to_zero(self):
response = self._make_response("-5")
self.assertAlmostEqual(KaggleApi._get_retry_after_delay(response), 0.0)

def test_parses_http_date(self):
future = datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(seconds=60)
date_str = future.strftime("%a, %d %b %Y %H:%M:%S GMT")
response = self._make_response(date_str)
delay = KaggleApi._get_retry_after_delay(response)
self.assertIsNotNone(delay)
# Allow some tolerance for time elapsed during test
self.assertAlmostEqual(delay, 60.0, delta=2.0)

def test_past_http_date_clamped_to_zero(self):
past = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(seconds=60)
date_str = past.strftime("%a, %d %b %Y %H:%M:%S GMT")
response = self._make_response(date_str)
self.assertAlmostEqual(KaggleApi._get_retry_after_delay(response), 0.0)

def test_returns_none_for_garbage(self):
response = self._make_response("not-a-number-or-date")
self.assertIsNone(KaggleApi._get_retry_after_delay(response))


class TestIsRetriable(unittest.TestCase):
"""Tests for KaggleApi._is_retriable() with rate-limiting."""

def setUp(self):
self.api = KaggleApi.__new__(KaggleApi)

def test_429_is_retriable(self):
response = Response()
response.status_code = 429
error = HTTPError(response=response)
self.assertTrue(self.api._is_retriable(error))

def test_403_is_not_retriable(self):
response = Response()
response.status_code = 403
error = HTTPError(response=response)
self.assertFalse(self.api._is_retriable(error))


class TestWithRetryRateLimiting(unittest.TestCase):
"""Tests for KaggleApi.with_retry() handling 429 responses."""

def setUp(self):
self.api = KaggleApi.__new__(KaggleApi)
self.api.logger = MagicMock()

def _make_429_error(self, retry_after=None):
response = Response()
response.status_code = 429
if retry_after is not None:
response.headers["Retry-After"] = str(retry_after)
error = HTTPError(response=response)
return error

@patch("kaggle.api.kaggle_api_extended.time.sleep")
@patch("builtins.print")
def test_respects_retry_after_header(self, mock_print, mock_sleep):
error = self._make_429_error(retry_after=42)

call_count = 0

def failing_then_succeeding(*args):
nonlocal call_count
call_count += 1
if call_count == 1:
raise error
return "success"

wrapped = self.api.with_retry(failing_then_succeeding, max_retries=3)
result = wrapped()

self.assertEqual(result, "success")
# Should have slept for 42 seconds (from Retry-After)
mock_sleep.assert_called_once_with(42.0)
# Logger should have been called with rate-limit info
self.api.logger.info.assert_called()
log_msg = self.api.logger.info.call_args[0][0]
self.assertIn("Retry-After", log_msg)

@patch("kaggle.api.kaggle_api_extended.time.sleep")
@patch("builtins.print")
def test_falls_back_to_backoff_without_retry_after(self, mock_print, mock_sleep):
error = self._make_429_error(retry_after=None)

call_count = 0

def failing_then_succeeding(*args):
nonlocal call_count
call_count += 1
if call_count == 1:
raise error
return "success"

wrapped = self.api.with_retry(failing_then_succeeding, max_retries=3)
result = wrapped()

self.assertEqual(result, "success")
# Should have slept for some backoff delay (not the retry-after value)
mock_sleep.assert_called_once()
# Logger should mention missing Retry-After
log_msg = self.api.logger.info.call_args[0][0]
self.assertIn("No valid Retry-After", log_msg)


if __name__ == "__main__":
unittest.main()
Loading