From 979e957346f63156eb8d97d5539aab3b1f6544a8 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Thu, 9 Apr 2026 10:55:01 +0900 Subject: [PATCH] DAOS-18613 container: Reduce false -DER_NO_HDLs If a request (e.g., an CONT_OID_ALLOC) arrives when the local pool is still recovering pool handles, the request's ds_pool_hdl_lookup call may return a false -DER_NO_HDL. This patch lets the client retry the operation in this case, by looking up the ds_pool and checking if the handle recovery is done. The cont_oid_alloc_complete function only retries upon certain crt errors. This patch has to refactor the function a bit to also retry upon certain daos errors. Features: container Signed-off-by: Li Wei --- src/container/cli.c | 97 ++++++++++++++++++++++------------- src/container/rpc.h | 12 +++++ src/container/srv_container.c | 36 ++++++++----- src/container/srv_target.c | 23 +++++---- src/include/daos_srv/pool.h | 7 ++- src/pool/srv_target.c | 69 +++++++++++++++++++++---- 6 files changed, 172 insertions(+), 72 deletions(-) diff --git a/src/container/cli.c b/src/container/cli.c index 978ec45d79f..979bf418f8e 100644 --- a/src/container/cli.c +++ b/src/container/cli.c @@ -1995,48 +1995,39 @@ struct cont_oid_alloc_args { static int cont_oid_alloc_complete(tse_task_t *task, void *data) { - struct cont_oid_alloc_args *arg = (struct cont_oid_alloc_args *)data; - struct cont_oid_alloc_out *out = crt_reply_get(arg->rpc); - struct dc_pool *pool = arg->coaa_pool; - struct dc_cont *cont = arg->coaa_cont; - int rc = task->dt_result; - - if (daos_rpc_retryable_rc(rc) || rc == -DER_STALE) { - tse_sched_t *sched = tse_task2sched(task); - tse_task_t *ptask; - unsigned int map_version = out->coao_op.co_map_version; - - /** pool map refresh task */ - rc = dc_pool_create_map_refresh_task(arg->coaa_cont->dc_pool_hdl, map_version, - sched, &ptask); - if (rc != 0) - D_GOTO(out, rc); - - rc = dc_task_depend(task, 1, &ptask); - if (rc != 0) { - dc_pool_abandon_map_refresh_task(ptask); - D_GOTO(out, rc); - } - - rc = dc_task_resched(task); - if (rc != 0) { - dc_pool_abandon_map_refresh_task(ptask); - D_GOTO(out, rc); - } - - /* ignore returned value, error is reported by comp_cb */ - tse_task_schedule(ptask, true); - D_GOTO(out, rc = 0); + struct cont_oid_alloc_args *arg = data; + struct cont_oid_alloc_out *out = crt_reply_get(arg->rpc); + struct dc_pool *pool = arg->coaa_pool; + struct dc_cont *cont = arg->coaa_cont; + bool resched = false; + bool refresh_map = false; + unsigned int map_version = 0; + int rc = task->dt_result; + + if (daos_rpc_retryable_rc(rc)) { + resched = true; + refresh_map = true; + goto out_resched; } else if (rc != 0) { /** error but non retryable RPC */ - D_ERROR("failed to allocate oids: "DF_RC"\n", DP_RC(rc)); - D_GOTO(out, rc); + DL_ERROR(rc, DF_CONT ": failed to allocate oids", + DP_CONT(pool->dp_pool, cont->dc_uuid)); + goto out_resched; } rc = out->coao_op.co_rc; - if (rc != 0) { - D_ERROR("failed to allocate oids: "DF_RC"\n", DP_RC(rc)); - D_GOTO(out, rc); + if (rc == -DER_STALE) { + resched = true; + refresh_map = true; + map_version = out->coao_op.co_map_version; + goto out_resched; + } else if (daos_rpc_retryable_rc(rc)) { + resched = true; + goto out_resched; + } else if (rc != 0) { + DL_ERROR(rc, DF_CONT ": failed to allocate oids", + DP_CONT(pool->dp_pool, cont->dc_uuid)); + goto out_resched; } D_DEBUG(DB_MD, DF_CONT": OID ALLOC: using hdl="DF_UUID" oid "DF_U64"/"DF_U64"\n", @@ -2046,6 +2037,38 @@ cont_oid_alloc_complete(tse_task_t *task, void *data) if (arg->oid) *arg->oid = out->oid; +out_resched: + if (resched) { + tse_sched_t *sched = tse_task2sched(task); + tse_task_t *ptask; + + D_DEBUG(DB_MD, DF_CONT ": resched: refresh_map=%d map_version=%u\n", + DP_CONT(pool->dp_pool, cont->dc_uuid), refresh_map, map_version); + if (refresh_map) { + rc = dc_pool_create_map_refresh_task(arg->coaa_cont->dc_pool_hdl, + map_version, sched, &ptask); + if (rc != 0) + goto out; + + rc = dc_task_depend(task, 1, &ptask); + if (rc != 0) { + dc_pool_abandon_map_refresh_task(ptask); + goto out; + } + } + rc = dc_task_resched(task); + if (rc != 0) { + if (refresh_map) + dc_pool_abandon_map_refresh_task(ptask); + goto out; + } + if (refresh_map) { + /* ignore returned value, error is reported by comp_cb */ + tse_task_schedule(ptask, true); + rc = 0; + goto out; + } + } out: crt_req_decref(arg->rpc); dc_cont_put(cont); diff --git a/src/container/rpc.h b/src/container/rpc.h index b3d33bf969c..d08d04d6461 100644 --- a/src/container/rpc.h +++ b/src/container/rpc.h @@ -143,6 +143,18 @@ CRT_RPC_DECLARE(cont_op, DAOS_ISEQ_CONT_OP, DAOS_OSEQ_CONT_OP) CRT_RPC_DECLARE(cont_op_v8, DAOS_ISEQ_CONT_OP_V8, DAOS_OSEQ_CONT_OP) CRT_RPC_DECLARE(cont_op_v9, DAOS_ISEQ_CONT_OP_V9, DAOS_OSEQ_CONT_OP) +static inline void +cont_op_in_get_pool_uuid(crt_rpc_t *rpc, uuid_t pool_uuid_out) +{ + if (opc_get_rpc_ver(rpc->cr_opc) >= CONT_PROTO_VER_WITH_POOL_UUID) { + struct cont_op_v9_in *in9 = crt_req_get(rpc); + + uuid_copy(pool_uuid_out, in9->ci_pool); + return; + } + uuid_clear(pool_uuid_out); +} + #define DAOS_ISEQ_CONT_CREATE_V8 /* input fields */ \ /* .ci_hdl unused */ \ ((struct cont_op_v8_in) (cci_op) CRT_VAR) \ diff --git a/src/container/srv_container.c b/src/container/srv_container.c index 69b8b88ac22..5261c998d47 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -1462,7 +1462,7 @@ belongs_to_user(d_iov_t *key, struct find_hdls_by_cont_arg *arg) hdl = value.iov_buf; /* Usually we already have the pool handle in memory. */ - pool_hdl = ds_pool_hdl_lookup(hdl->ch_pool_hdl); + pool_hdl = ds_pool_hdl_lookup_cached(hdl->ch_pool_hdl); if (pool_hdl == NULL) { /* Otherwise, look it up in the pool metadata via a hack. */ rc = ds_pool_lookup_hdl_cred(arg->fha_tx, cont->c_svc->cs_pool_uuid, @@ -6018,14 +6018,16 @@ cont_cli_opc_name(crt_opcode_t opc) void ds_cont_op_handler(crt_rpc_t *rpc) { - struct cont_op_in *in = crt_req_get(rpc); - struct cont_op_out *out = crt_reply_get(rpc); - struct ds_pool_hdl *pool_hdl; - crt_opcode_t opc = opc_get(rpc->cr_opc); - daos_prop_t *prop = NULL; - const char *lbl; - struct cont_svc *svc; - int rc; + struct cont_op_in *in = crt_req_get(rpc); + struct cont_op_out *out = crt_reply_get(rpc); + uuid_t pool_uuid; + struct ds_pool_hdl *pool_hdl; + crt_opcode_t opc = opc_get(rpc->cr_opc); + unsigned int opv = opc_get_rpc_ver(rpc->cr_opc); + daos_prop_t *prop = NULL; + const char *lbl; + struct cont_svc *svc; + int rc; /* * Some mgmt RPCs may come from either client or server (admin/dRPC) calls. RPCs from @@ -6041,13 +6043,19 @@ ds_cont_op_handler(crt_rpc_t *rpc) } } - pool_hdl = ds_pool_hdl_lookup(in->ci_pool_hdl); - if (pool_hdl == NULL) - D_GOTO(out, rc = -DER_NO_HDL); + cont_op_in_get_pool_uuid(rpc, pool_uuid); + rc = ds_pool_hdl_lookup(pool_uuid, in->ci_pool_hdl, &pool_hdl); + if (rc != 0) { + D_DEBUG(DB_MD, + DF_CONT ": ds_pool_hdl_lookup: rpc=%p opc=%u(%s) pool_hdl=" DF_UUID "\n", + DP_CONT(pool_uuid, in->ci_uuid), rpc, opc, cont_cli_opc_name(opc), + DP_UUID(in->ci_pool_hdl)); + goto out; + } D_DEBUG(DB_MD, DF_CONT ": processing rpc: %p proto=%d hdl=" DF_UUID ", opc=%u(%s)\n", - DP_CONT(pool_hdl->sph_pool->sp_uuid, in->ci_uuid), rpc, - opc_get_rpc_ver(rpc->cr_opc), DP_UUID(in->ci_hdl), opc, cont_cli_opc_name(opc)); + DP_CONT(pool_hdl->sph_pool->sp_uuid, in->ci_uuid), rpc, opv, DP_UUID(in->ci_hdl), + opc, cont_cli_opc_name(opc)); /* * TODO: How to map to the correct container service among those diff --git a/src/container/srv_target.c b/src/container/srv_target.c index 90bcee92a59..a9c08e67ccd 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -2542,15 +2542,20 @@ cont_oid_alloc(struct ds_pool_hdl *pool_hdl, crt_rpc_t *rpc) void ds_cont_oid_alloc_handler(crt_rpc_t *rpc) { - struct cont_op_in *in = crt_req_get(rpc); - struct cont_op_out *out = crt_reply_get(rpc); - struct ds_pool_hdl *pool_hdl; - crt_opcode_t opc = opc_get(rpc->cr_opc); - int rc; - - pool_hdl = ds_pool_hdl_lookup(in->ci_pool_hdl); - if (pool_hdl == NULL) - D_GOTO(out, rc = -DER_NO_HDL); + struct cont_op_in *in = crt_req_get(rpc); + struct cont_op_out *out = crt_reply_get(rpc); + uuid_t pool_uuid; + struct ds_pool_hdl *pool_hdl; + crt_opcode_t opc = opc_get(rpc->cr_opc); + int rc; + + cont_op_in_get_pool_uuid(rpc, pool_uuid); + rc = ds_pool_hdl_lookup(pool_uuid, in->ci_pool_hdl, &pool_hdl); + if (rc != 0) { + D_DEBUG(DB_MD, DF_CONT ": failed to lookup pool handle: " DF_RC "\n", + DP_CONT(pool_uuid, in->ci_uuid), DP_RC(rc)); + goto out; + } D_DEBUG(DB_MD, DF_CONT ": processing rpc: %p hdl=" DF_UUID " opc=%u\n", DP_CONT(pool_hdl->sph_pool->sp_uuid, in->ci_uuid), rpc, DP_UUID(in->ci_hdl), opc); diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h index cb1fe8d1eb5..dc4a7c747d7 100644 --- a/src/include/daos_srv/pool.h +++ b/src/include/daos_srv/pool.h @@ -89,7 +89,7 @@ struct ds_pool { uuid_t sp_srv_cont_hdl; uuid_t sp_srv_pool_hdl; uint32_t sp_stopping : 1, sp_cr_checked : 1, sp_immutable : 1, sp_disable_rebuild : 1, - sp_disable_dtx_resync : 1, sp_incr_reint : 1; + sp_disable_dtx_resync : 1, sp_incr_reint : 1, sp_hdl_fetched : 1; /* pool_uuid + map version + leader term + rebuild generation define a * rebuild job. */ @@ -146,8 +146,11 @@ struct ds_pool_hdl { d_iov_t sph_cred; }; -struct ds_pool_hdl *ds_pool_hdl_lookup(const uuid_t uuid); +/* clang-format off */ +int ds_pool_hdl_lookup(const uuid_t pool_uuid, const uuid_t uuid, struct ds_pool_hdl **hdl_out); +struct ds_pool_hdl *ds_pool_hdl_lookup_cached(const uuid_t uuid); void ds_pool_hdl_put(struct ds_pool_hdl *hdl); +/* clang-format on */ enum pool_child_state { POOL_CHILD_NEW = 0, diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index 98bd3591042..740931088ba 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -1148,7 +1148,7 @@ eph_report_ult(void *data) { struct ds_pool *pool = data; int rc, sleep_intvl; - bool conn_hdl_fetched = false, srv_hdl_fetched = false; + bool srv_hdl_fetched = false; D_DEBUG(DB_MD, DF_UUID " Enter eph report.\n", DP_UUID(pool->sp_uuid)); D_ASSERT(pool->sp_ec_ephs_req != NULL); @@ -1172,7 +1172,7 @@ eph_report_ult(void *data) sleep_intvl = EPH_REPORT_INTVL; /* Fetch pool connection handles */ - if (!conn_hdl_fetched) { + if (!pool->sp_hdl_fetched) { D_INFO(DF_UUID ": Fetching connection handles.\n", DP_UUID(pool->sp_uuid)); rc = ds_pool_iv_conn_hdl_fetch(pool); if (rc) { @@ -1180,7 +1180,7 @@ eph_report_ult(void *data) DP_UUID(pool->sp_uuid), DP_RC(rc)); sleep_intvl = EPH_REPORT_RETRY_INTVL; } else { - conn_hdl_fetched = true; + pool->sp_hdl_fetched = true; } if (eph_report_exiting(pool)) @@ -1550,7 +1550,7 @@ pool_hdl_delete(struct ds_pool_hdl *hdl) } struct ds_pool_hdl * -ds_pool_hdl_lookup(const uuid_t uuid) +ds_pool_hdl_lookup_cached(const uuid_t uuid) { d_list_t *rlink; @@ -1561,6 +1561,56 @@ ds_pool_hdl_lookup(const uuid_t uuid) return pool_hdl_obj(rlink); } +/** + * Look up pool handle \a hdl_uuid. + * + * \param[in] pool_uuid pool UUID (may be NULL or null UUID if the + * caller is an older-version RPC handler, who do + * not have the pool UUID info) + * \param[in] hdl_uuid pool handle UUID + * \param[out] hdl_out pool handle + * + * \return 0 success + * -DER_NO_HDL handle not found + * -DER_TIMEDOUT try again + */ +int +ds_pool_hdl_lookup(const uuid_t pool_uuid, const uuid_t hdl_uuid, struct ds_pool_hdl **hdl_out) +{ + *hdl_out = ds_pool_hdl_lookup_cached(hdl_uuid); + if (*hdl_out == NULL) { + struct ds_pool *pool; + const int retry_rc = -DER_TIMEDOUT; + int rc; + + /* + * Has the handle recovery completed? If not, let the caller + * (usually, the client) retry. + */ + if (pool_uuid == NULL || uuid_is_null(pool_uuid)) + return -DER_NO_HDL; + rc = ds_pool_lookup(pool_uuid, &pool); + if (rc == -DER_SHUTDOWN) { + D_DEBUG(DB_MD, + DF_UUID ": pool stopping for handle " DF_UUID ": " DF_RC "\n", + DP_UUID(pool_uuid), DP_UUID(hdl_uuid), DP_RC(rc)); + return retry_rc; + } else if (rc != 0) { + D_DEBUG(DB_MD, + DF_UUID ": pool not found for handle " DF_UUID ": " DF_RC "\n", + DP_UUID(pool_uuid), DP_UUID(hdl_uuid), DP_RC(rc)); + return -DER_NO_HDL; + } + if (pool->sp_hdl_fetched) + rc = -DER_NO_HDL; + else + rc = retry_rc; + ds_pool_put(pool); + return rc; + } + return 0; +} + static void ds_pool_hdl_get(struct ds_pool_hdl *hdl) { @@ -1762,7 +1812,7 @@ ds_pool_tgt_connect(struct ds_pool *pool, struct pool_iv_conn *pic) D_ASSERT(dss_get_module_info()->dmi_xs_id == 0); - hdl = ds_pool_hdl_lookup(pic->pic_hdl); + hdl = ds_pool_hdl_lookup_cached(pic->pic_hdl); if (hdl != NULL) { if (hdl->sph_sec_capas == pic->pic_capas) { D_DEBUG(DB_MD, DF_UUID": found compatible pool " @@ -1844,7 +1894,7 @@ ds_pool_tgt_disconnect(uuid_t uuid) { struct ds_pool_hdl *hdl; - hdl = ds_pool_hdl_lookup(uuid); + hdl = ds_pool_hdl_lookup_cached(uuid); if (hdl == NULL) { D_DEBUG(DB_MD, "handle "DF_UUID" does not exist\n", DP_UUID(uuid)); @@ -2377,11 +2427,10 @@ ds_pool_tgt_query_map_handler(crt_rpc_t *rpc) if (daos_rpc_from_client(rpc)) { struct ds_pool_hdl *hdl; - hdl = ds_pool_hdl_lookup(in->tmi_op.pi_hdl); - if (hdl == NULL) { - D_ERROR(DF_UUID": cannot find pool handle "DF_UUID"\n", + rc = ds_pool_hdl_lookup(in->tmi_op.pi_uuid, in->tmi_op.pi_hdl, &hdl); + if (rc != 0) { + D_ERROR(DF_UUID ": cannot find pool handle " DF_UUID "\n", DP_UUID(in->tmi_op.pi_uuid), DP_UUID(in->tmi_op.pi_hdl)); - rc = -DER_NO_HDL; goto out; } ds_pool_get(hdl->sph_pool);