-
Notifications
You must be signed in to change notification settings - Fork 62
Fix stream error handling #705
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
requests==2.28.2 | ||
ratelimiter==1.2.0.post0 | ||
aiolimiter==1.1.0 | ||
prometheus-client==0.17.0 | ||
quantile-estimator==0.1.2 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
import asyncio | ||
import ydb.aio | ||
import time | ||
import logging | ||
from aiolimiter import AsyncLimiter | ||
|
||
from .base import BaseJobManager | ||
from core.metrics import OP_TYPE_READ, OP_TYPE_WRITE | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class AsyncTopicJobManager(BaseJobManager): | ||
def __init__(self, driver, args, metrics): | ||
super().__init__(driver, args, metrics) | ||
self.driver: ydb.aio.Driver = driver | ||
|
||
async def run_tests(self): | ||
tasks = [ | ||
*await self._run_topic_write_jobs(), | ||
*await self._run_topic_read_jobs(), | ||
*self._run_metric_job(), | ||
] | ||
|
||
await asyncio.gather(*tasks) | ||
|
||
async def _run_topic_write_jobs(self): | ||
logger.info("Start async topic write jobs") | ||
|
||
write_limiter = AsyncLimiter(max_rate=self.args.write_rps, time_period=1) | ||
|
||
tasks = [] | ||
for i in range(self.args.write_threads): | ||
task = asyncio.create_task(self._run_topic_writes(write_limiter, i), name=f"slo_topic_write_{i}") | ||
tasks.append(task) | ||
|
||
return tasks | ||
|
||
async def _run_topic_read_jobs(self): | ||
logger.info("Start async topic read jobs") | ||
|
||
read_limiter = AsyncLimiter(max_rate=self.args.read_rps, time_period=1) | ||
|
||
tasks = [] | ||
for i in range(self.args.read_threads): | ||
task = asyncio.create_task(self._run_topic_reads(read_limiter), name=f"slo_topic_read_{i}") | ||
tasks.append(task) | ||
|
||
return tasks | ||
|
||
async def _run_topic_writes(self, limiter, partition_id=None): | ||
start_time = time.time() | ||
logger.info("Start async topic write workload") | ||
|
||
async with self.driver.topic_client.writer( | ||
self.args.path, | ||
codec=ydb.TopicCodec.GZIP, | ||
partition_id=partition_id, | ||
) as writer: | ||
logger.info("Async topic writer created") | ||
|
||
message_count = 0 | ||
while time.time() - start_time < self.args.time: | ||
async with limiter: | ||
message_count += 1 | ||
|
||
content = f"message_{message_count}_{asyncio.current_task().get_name()}".encode("utf-8") | ||
|
||
if len(content) < self.args.message_size: | ||
content += b"x" * (self.args.message_size - len(content)) | ||
|
||
message = ydb.TopicWriterMessage(data=content) | ||
|
||
ts = self.metrics.start((OP_TYPE_WRITE,)) | ||
try: | ||
await writer.write_with_ack(message) | ||
logger.info("Write message: %s", content) | ||
self.metrics.stop((OP_TYPE_WRITE,), ts) | ||
except Exception as e: | ||
self.metrics.stop((OP_TYPE_WRITE,), ts, error=e) | ||
logger.error("Write error: %s", e) | ||
|
||
logger.info("Stop async topic write workload") | ||
|
||
async def _run_topic_reads(self, limiter): | ||
start_time = time.time() | ||
logger.info("Start async topic read workload") | ||
|
||
async with self.driver.topic_client.reader( | ||
self.args.path, | ||
self.args.consumer, | ||
) as reader: | ||
logger.info("Async topic reader created") | ||
|
||
while time.time() - start_time < self.args.time: | ||
async with limiter: | ||
ts = self.metrics.start((OP_TYPE_READ,)) | ||
try: | ||
msg = await reader.receive_message() | ||
if msg is not None: | ||
logger.info("Read message: %s", msg.data.decode()) | ||
vgvoleg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
await reader.commit_with_ack(msg) | ||
|
||
self.metrics.stop((OP_TYPE_READ,), ts) | ||
except Exception as e: | ||
self.metrics.stop((OP_TYPE_READ,), ts, error=e) | ||
logger.error("Read error: %s", e) | ||
|
||
logger.info("Stop async topic read workload") | ||
|
||
def _run_metric_job(self): | ||
if not self.args.prom_pgw: | ||
return [] | ||
|
||
# Create async task for metrics | ||
task = asyncio.create_task(self._async_metric_sender(self.args.time), name="slo_metrics_sender") | ||
return [task] | ||
|
||
async def _async_metric_sender(self, runtime): | ||
start_time = time.time() | ||
logger.info("Start push metrics (async)") | ||
|
||
limiter = AsyncLimiter(max_rate=10**6 // self.args.report_period, time_period=1) | ||
|
||
while time.time() - start_time < runtime: | ||
async with limiter: | ||
# Call sync metrics.push() in executor to avoid blocking | ||
await asyncio.get_event_loop().run_in_executor(None, self.metrics.push) | ||
|
||
logger.info("Stop push metrics (async)") |
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -434,7 +434,7 @@ def _check_stop(self): | |||||||||||
raise self._stop_reason.exception() | ||||||||||||
|
||||||||||||
async def _connection_loop(self): | ||||||||||||
retry_settings = RetrySettings() # todo | ||||||||||||
retry_settings = RetrySettings(retry_cancelled=True) # todo | ||||||||||||
|
||||||||||||
while True: | ||||||||||||
attempt = 0 # todo calc and reset | ||||||||||||
|
@@ -485,15 +485,16 @@ async def _connection_loop(self): | |||||||||||
except issues.Error as err: | ||||||||||||
err_info = check_retriable_error(err, retry_settings, attempt) | ||||||||||||
if not err_info.is_retriable or self._tx is not None: # no retries in tx writer | ||||||||||||
logger.debug("writer reconnector %s stop connection loop due to %s", self._id, err) | ||||||||||||
self._stop(err) | ||||||||||||
return | ||||||||||||
|
||||||||||||
await asyncio.sleep(err_info.sleep_timeout_seconds) | ||||||||||||
logger.debug( | ||||||||||||
"writer reconnector %s retry in %s seconds", | ||||||||||||
self._id, | ||||||||||||
err_info.sleep_timeout_seconds, | ||||||||||||
) | ||||||||||||
await asyncio.sleep(err_info.sleep_timeout_seconds) | ||||||||||||
|
||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moving the logging statement before the sleep operation could help with debugging timing issues, but the current placement after sleep makes more sense for clarity - the log message indicates what action will be taken next. However, this creates a gap between the log message about retrying 'in X seconds' and the actual sleep, which could be confusing. Consider updating the log message to reflect that the retry is happening now.
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||||||||
except (asyncio.CancelledError, Exception) as err: | ||||||||||||
self._stop(err) | ||||||||||||
|
Uh oh!
There was an error while loading. Please reload this page.