Do event's timeout take priority over the event in the libevent adapter? #1286

plainbanana · 2025-02-19T18:19:51Z

Hello, hiredis team,

We are using the async context from hiredis-cluster to connect to a Redis Cluster, and I have a question regarding the behavior of the libevent adapter.

Let's assume a scenario where the libevent adapter is used and the execution of the event loop is delayed due to a high system load or any heavy handlers. Even if the Redis response is already available to be read by the time the event loop runs, the timeout event is always fired. In such cases, it seems better to be able to read the already received response and execute the appropriate callback instead of resulting in a timeout.

In libevent.h, the timeout is specified with event_add(e->ev, tv);, but when both EV_READ/EV_WRITE and the timeout become active at the same time, there is no defined priority. In my environment, the timeout is always prioritized (?).

hiredis/adapters/libevent.h

Line 103 in 4ece9a0

event_add(e->ev, tv);

Here are simple reproduction test codes. To keep things simple, the event loop is executed one iteration at a time using event_base_loop(base, EVLOOP_ONCE), and a sleep is used to intentionally delay the execution of the event loop.

Is this timeout behavior considered a specification or is it a bug?

static void commandCallbackFail(struct redisAsyncContext *ac, void* _reply, void* _privdata)
{
    redisReply *reply = (redisReply*)_reply;
    assert(strcmp(ac->errstr, "Timeout") == 0);
    redisAsyncFree(ac);
}

static void test_libevent_ping_fail(struct config config) {
    struct timeval tv;
    int status;
    test("Async libevent PING/PONG: ");
    /* Setup event dispatcher with a testcase timeout */
    struct event_base *base = event_base_new();

    /* Connect */
    redisOptions options = get_redis_tcp_options(config);
    tv = (struct timeval) {.tv_sec = 1, .tv_usec = 0};
    options.command_timeout = &tv;
    redisAsyncContext *ac = redisAsyncConnectWithOptions(&options);
    assert(ac != NULL && ac->err == 0);
    redisLibeventAttach(ac,base);

    status = redisAsyncCommand(ac, commandCallbackFail, NULL, "PING");
    assert(status == REDIS_OK);

    /* Send command */
    test_cond(event_base_loop(base, EVLOOP_ONCE) == 0);
    /* Intentionally causing a timeout by sleeping longer than the command_timeout, despite the response being readable */
    sleep(2);
    /* wait response */
    test_cond(event_base_loop(base, EVLOOP_ONCE) == 0);

    event_base_free(base);
}

For example, applying a patch like the one below to libevent.h resolved this issue. In the patch, IO and timer events are registered separately, and priorities are set so that IO events are triggered before timer events. And, we probably need to call event_base_priority_init(base, 2); to init priorities of the event base.

Index: adapters/libevent.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/adapters/libevent.h b/adapters/libevent.h
--- a/adapters/libevent.h	(revision 77bcc73ebbc562d8e8173832b710bfbfa4327b13)
+++ b/adapters/libevent.h	(date 1739984228918)
@@ -39,9 +39,9 @@
 
 typedef struct redisLibeventEvents {
     redisAsyncContext *context;
-    struct event *ev;
+    struct event *ev_io;
+    struct event *ev_timer;
     struct event_base *base;
-    struct timeval tv;
     short flags;
     short state;
 } redisLibeventEvents;
@@ -60,11 +60,6 @@
         return; \
     }
 
-    if ((event & EV_TIMEOUT) && (e->state & REDIS_LIBEVENT_DELETED) == 0) {
-        redisAsyncHandleTimeout(e->context);
-        CHECK_DELETED();
-    }
-
     if ((event & EV_READ) && e->context && (e->state & REDIS_LIBEVENT_DELETED) == 0) {
         redisAsyncHandleRead(e->context);
         CHECK_DELETED();
@@ -74,6 +69,26 @@
         redisAsyncHandleWrite(e->context);
         CHECK_DELETED();
     }
+
+    e->state &= ~REDIS_LIBEVENT_ENTERED;
+    #undef CHECK_DELETED
+}
+
+static void redisLibeventTimerHandler(evutil_socket_t fd, short event, void *arg) {
+    ((void)fd);
+    ((void)event);
+    redisLibeventEvents *e = (redisLibeventEvents*)arg;
+    e->state |= REDIS_LIBEVENT_ENTERED;
+
+    #define CHECK_DELETED() if (e->state & REDIS_LIBEVENT_DELETED) {\
+        redisLibeventDestroy(e);\
+        return; \
+    }
+
+    if (e->context && (e->state & REDIS_LIBEVENT_DELETED) == 0) {
+        redisAsyncHandleTimeout(e->context);
+        CHECK_DELETED();
+    }
 
     e->state &= ~REDIS_LIBEVENT_ENTERED;
     #undef CHECK_DELETED
@@ -81,7 +96,6 @@
 
 static void redisLibeventUpdate(void *privdata, short flag, int isRemove) {
     redisLibeventEvents *e = (redisLibeventEvents *)privdata;
-    const struct timeval *tv = e->tv.tv_sec || e->tv.tv_usec ? &e->tv : NULL;
 
     if (isRemove) {
         if ((e->flags & flag) == 0) {
@@ -97,10 +111,10 @@
         }
     }
 
-    event_del(e->ev);
-    event_assign(e->ev, e->base, e->context->c.fd, e->flags | EV_PERSIST,
+    event_del(e->ev_io);
+    event_assign(e->ev_io, e->base, e->context->c.fd, e->flags | EV_PERSIST,
                  redisLibeventHandler, privdata);
-    event_add(e->ev, tv);
+    event_add(e->ev_io, NULL);
 }
 
 static void redisLibeventAddRead(void *privdata) {
@@ -124,9 +138,16 @@
     if (!e) {
         return;
     }
-    event_del(e->ev);
-    event_free(e->ev);
-    e->ev = NULL;
+    if (e->ev_io) {
+        event_del(e->ev_io);
+        event_free(e->ev_io);
+        e->ev_io = NULL;
+    }
+    if (e->ev_timer) {
+        evtimer_del(e->ev_timer);
+        event_free(e->ev_timer);
+        e->ev_timer = NULL;
+    }
 
     if (e->state & REDIS_LIBEVENT_ENTERED) {
         e->state |= REDIS_LIBEVENT_DELETED;
@@ -137,10 +158,8 @@
 
 static void redisLibeventSetTimeout(void *privdata, struct timeval tv) {
     redisLibeventEvents *e = (redisLibeventEvents *)privdata;
-    short flags = e->flags;
-    e->flags = 0;
-    e->tv = tv;
-    redisLibeventUpdate(e, flags, 0);
+    evtimer_del(e->ev_timer);
+    evtimer_add(e->ev_timer, &tv);
 }
 
 static int redisLibeventAttach(redisAsyncContext *ac, struct event_base *base) {
@@ -168,7 +187,13 @@
     ac->ev.data = e;
 
     /* Initialize and install read/write events */
-    e->ev = event_new(base, c->fd, EV_READ | EV_WRITE, redisLibeventHandler, e);
+    e->ev_io = event_new(base, c->fd, EV_READ | EV_WRITE, redisLibeventHandler, e);
+    event_priority_set(e->ev_io, 0);
+
+    /* Initialize and install timer events */
+    e->ev_timer = evtimer_new(base, redisLibeventTimerHandler, e);
+    event_priority_set(e->ev_timer, 1);
+
     e->base = base;
     return REDIS_OK;
 }

Thank you so much.

The text was updated successfully, but these errors were encountered:

bjosv · 2025-02-20T10:06:16Z

I also believe I have seen signs that libevent prioritize timeouts, and possibly its due to that it calls timeout_process and the callback (with EV_TIMEOUT) through event_active_nolock before handling active events in event_process_active.

If we use priority queues and have multiple clients connected using the same event base, wouldn't there be a risk of that one or many heavy loaded client(s) could starve out a failing client? The failing client might never get its timeout due to the new prioritization and we would never reconnect.

plainbanana · 2025-02-21T09:19:45Z

If we use priority queues and have multiple clients connected using the same event base, wouldn't there be a risk of that one or many heavy loaded client(s) could starve out a failing client? The failing client might never get its timeout due to the new prioritization, and we would never reconnect.

I agree with your comment. For example, when managing connections to many nodes (such as in hiredis-cluster) with a single event_base, this may occur.

Depending on how the client is used, this might not be an issue, but we should avoid setting priorities in general situations.

Alternatively, would it be possible to mitigate the issue by separating ev into ev_io and ev_timer without setting any priorities? It seems that the event_base_loop execute the callback queue in the order in which callbacks are activated. Although, it depends on the internal implementation of libevent.

I might be mistaken, but if I register the I/O event (ev_io) and the timeout event (ev_timer) separately, will ev_io be added at the head of the active queue, followed by ev_timer?

In the current implementation, the activated callback of EV_READ is deleted in timeout_process, resulting in only EV_TIMEOUT being triggered.

evsel->dispatch(base, tv_p) (e.g. epoll)
- This function registers the I/O events by calling evmap_io_active_, and event_active_nolock_
timeout_process
- It first removes the I/O event using event_del_nolock_ and then registers it in the queue with event_active_nolock_ so that the handler is executed with EV_TIMEOUT only.
event_process_active
- This function scans the active queue and executes all events with the lowest priority.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Do event's timeout take priority over the event in the libevent adapter? #1286

Do event's timeout take priority over the event in the libevent adapter? #1286

plainbanana commented Feb 19, 2025

bjosv commented Feb 20, 2025

plainbanana commented Feb 21, 2025 •

edited

Loading

Do event's timeout take priority over the event in the libevent adapter? #1286

Do event's timeout take priority over the event in the libevent adapter? #1286

Comments

plainbanana commented Feb 19, 2025

bjosv commented Feb 20, 2025

plainbanana commented Feb 21, 2025 • edited Loading

plainbanana commented Feb 21, 2025 •

edited

Loading