From bc6e96cb6f2de9932dc3877d3ec98ce277f8c2ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?okhowang=28=E7=8E=8B=E6=B2=9B=E6=96=87=29?= Date: Wed, 10 Sep 2025 16:06:44 +0800 Subject: [PATCH] lxcfs: add recursive flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: okhowang(王沛文) --- src/bindings.h | 6 ++- src/lxcfs.c | 5 ++ src/proc_cpuview.c | 126 +++++++++++++++++++++++++-------------------- src/proc_cpuview.h | 4 +- src/proc_fuse.c | 4 +- src/sysfs_fuse.c | 2 +- 6 files changed, 86 insertions(+), 61 deletions(-) diff --git a/src/bindings.h b/src/bindings.h index 3abd7570..a1b9fd03 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -120,6 +120,7 @@ struct lxcfs_opts { bool swap_off; bool use_pidfd; bool use_cfs; + bool recursive; /* * Ideally we'd version by size but because of backwards compatability * and the use of bool instead of explicited __u32 and __u64 we can't. @@ -135,7 +136,8 @@ typedef enum lxcfs_opt_t { LXCFS_PIDFD_ON = 1, LXCFS_CFS_ON = 2, LXCFS_ZSWAP_ON = 3, - LXCFS_OPTS_MAX = LXCFS_ZSWAP_ON, + LXCFS_RECURSIVE = 4, + LXCFS_OPTS_MAX = LXCFS_RECURSIVE, } lxcfs_opt_t; @@ -172,6 +174,8 @@ static inline bool lxcfs_has_opt(struct lxcfs_opts *opts, lxcfs_opt_t opt) if (opts->version >= 3 && !opts->zswap_off) return liblxcfs_can_use_zswap(); return false; + case LXCFS_RECURSIVE: + return opts->recursive; } return false; diff --git a/src/lxcfs.c b/src/lxcfs.c index 246ff4b6..b8f19845 100644 --- a/src/lxcfs.c +++ b/src/lxcfs.c @@ -885,6 +885,7 @@ static void usage(void) lxcfs_info(" --enable-cfs Enable CPU virtualization via CPU shares"); lxcfs_info(" --enable-pidfd Use pidfd for process tracking"); lxcfs_info(" --enable-cgroup Enable cgroup emulation code"); + lxcfs_info(" --enable-recursive Enable cgroup cpu/memory quota emulation in recusive"); lxcfs_info(" --runtime-dir=DIR Path to use as the runtime directory."); lxcfs_info(" Default is %s", DEFAULT_RUNTIME_PATH); exit(EXIT_FAILURE); @@ -937,6 +938,7 @@ static const struct option long_options[] = { {"enable-cfs", no_argument, 0, 0 }, {"enable-pidfd", no_argument, 0, 0 }, {"enable-cgroup", no_argument, 0, 0 }, + {"enable-recursive", no_argument, 0, 0 }, {"pidfile", required_argument, 0, 'p' }, {"runtime-dir", required_argument, 0, 0 }, @@ -1011,6 +1013,7 @@ int main(int argc, char *argv[]) opts->zswap_off = false; opts->use_pidfd = false; opts->use_cfs = false; + opts->recursive = false; opts->version = 3; while ((c = getopt_long(argc, argv, "dulfhvso:p:", long_options, &idx)) != -1) { @@ -1022,6 +1025,8 @@ int main(int argc, char *argv[]) opts->use_cfs = true; else if (strcmp(long_options[idx].name, "enable-cgroup") == 0) cgroup_is_enabled = true; + else if (strcmp(long_options[idx].name, "enable-recursive") == 0) + opts->recursive = true; else if (strcmp(long_options[idx].name, "runtime-dir") == 0) runtime_path_arg = optarg; else diff --git a/src/proc_cpuview.c b/src/proc_cpuview.c index 0063fd58..07740fe1 100644 --- a/src/proc_cpuview.c +++ b/src/proc_cpuview.c @@ -436,30 +436,68 @@ static uint64_t diff_cpu_usage(struct cpuacct_usage *older, } /* - * Read cgroup CPU quota parameters from `cpu.cfs_quota_us` or - * `cpu.cfs_period_us`, depending on `param`. Parameter value is returned - * through `value`. + * Read CPU count from cgroup CPU quota parameter. + * In cgroup v1, parameters are `cpu.cfs_quota_us` and `cpu.cfs_period_us`. + * In cgroup v2, parameters are `cpu.max`. + * Parameter value is returned through `value`.If there is no quota set, zero is returned. */ -static bool read_cpu_cfs_param(const char *cg, const char *param, int64_t *value) +static bool read_cpu_count_cfs(const char *cg, int *value) { __do_free char *str = NULL; - char file[STRLITERALLEN("cpu.cfs_period_us") + 1]; - bool first = true; - int ret; + int64_t quota = 0, period = 0; if (pure_unified_layout(cgroup_ops)) { - first = !strcmp(param, "quota"); - ret = snprintf(file, sizeof(file), "cpu.max"); + if (!cgroup_ops->get(cgroup_ops, "cpu", cg, "cpu.max", &str)) + return false; + /* + * When has no quota, cpu.max will be `max 100000`. + * then return false + */ + if (sscanf(str, "%" PRId64 " %" PRId64, "a, &period) != 2) + return false; } else { - ret = snprintf(file, sizeof(file), "cpu.cfs_%s_us", param); + if (!cgroup_ops->get(cgroup_ops, "cpu", cg, "cpu.cfs_quota_us", &str)) + return false; + if (sscanf(str, "%" PRId64, "a) != 1) + return false; + if (!cgroup_ops->get(cgroup_ops, "cpu", cg, "cpu.cfs_period_us", &str)) + return false; + if (sscanf(str, "%" PRId64, &period) != 1) + return false; } - if (ret < 0 || (size_t)ret >= sizeof(file)) + if (quota < 0 || period < 0) return false; + *value = quota / period; + /* + * In case quota/period does not yield a whole number, add one CPU for + * the remainder. + */ + if (quota % period > 0) + *value += 1; + return true; +} - if (!cgroup_ops->get(cgroup_ops, "cpu", cg, file, &str)) - return false; +/* + * Same with read_cpu_count_cfs, but run recursively for parent. + */ +static bool read_cpu_count_cfs_hierarchy(const char *cg, int *rv) +{ + __do_free char *cur_sg = strdup(cg); + int value; + char *last_sep = NULL; + *rv = 0; + do { + if (read_cpu_count_cfs(cur_sg, &value)) + if (*rv == 0 || value < *rv) + *rv = value; - return sscanf(str, first ? "%" PRId64 : "%*d %" PRId64, value) == 1; + last_sep = strrchr(cur_sg, '/'); + if (last_sep != NULL) + last_sep[0] = '\0'; + else + cur_sg[0] = '\0'; + } while (cur_sg[0] != '\0'); + return *rv != 0; } /* @@ -468,21 +506,12 @@ static bool read_cpu_cfs_param(const char *cg, const char *param, int64_t *value */ static double exact_cpu_count(const char *cg) { - double rv; + int rv; int nprocs; - int64_t cfs_quota, cfs_period; - - if (!read_cpu_cfs_param(cg, "quota", &cfs_quota)) - return 0; - if (!read_cpu_cfs_param(cg, "period", &cfs_period)) + if (!read_cpu_count_cfs(cg, &rv)) return 0; - if (cfs_quota <= 0 || cfs_period <= 0) - return 0; - - rv = (double)cfs_quota / (double)cfs_period; - nprocs = get_nprocs(); if (rv > nprocs) @@ -496,51 +525,38 @@ static double exact_cpu_count(const char *cg) */ static bool cfs_quota_disabled(const char *cg) { - int64_t cfs_quota; - - if (!read_cpu_cfs_param(cg, "quota", &cfs_quota)) - return true; + int cpu_count; - return cfs_quota < 0; + return read_cpu_count_cfs(cg, &cpu_count); } /* * Return the maximum number of visible CPUs based on CPU quotas. * If there is no quota set, cpu number in cpuset value is returned. */ -int max_cpu_count(const char *cpuset_cg, const char *cpu_cg) +int max_cpu_count(const char *cpuset_cg, const char *cpu_cg, bool recursive) { __do_free char *cpuset = NULL; - int rv, nprocs; - int64_t cfs_quota, cfs_period; + int nprocs; + int rv; int nr_cpus_in_cpuset = 0; - if (!read_cpu_cfs_param(cpu_cg, "quota", &cfs_quota)) - cfs_quota = 0; - - if (!read_cpu_cfs_param(cpu_cg, "period", &cfs_period)) - cfs_period = 0; + if (recursive) { + if (!read_cpu_count_cfs_hierarchy(cpu_cg, &rv)) + rv = 0; + } else { + if (!read_cpu_count_cfs(cpu_cg, &rv)) + rv = 0; + } cpuset = get_cpuset(cpuset_cg); if (cpuset) nr_cpus_in_cpuset = cpu_number_in_cpuset(cpuset); - if (cfs_quota <= 0 || cfs_period <= 0) { - if (nr_cpus_in_cpuset > 0) - return nr_cpus_in_cpuset; - - return 0; + if (rv == 0 && nr_cpus_in_cpuset > 0) { + return nr_cpus_in_cpuset; } - rv = cfs_quota / cfs_period; - - /* - * In case quota/period does not yield a whole number, add one CPU for - * the remainder. - */ - if ((cfs_quota % cfs_period) > 0) - rv += 1; - nprocs = get_nprocs(); if (rv > nprocs) rv = nprocs; @@ -554,7 +570,7 @@ int max_cpu_count(const char *cpuset_cg, const char *cpu_cg) int cpuview_proc_stat(const char *cg, const char *cpu_cg, const char *cpuset, struct cpuacct_usage *cg_cpu_usage, int cg_cpu_usage_size, - FILE *f, char *buf, size_t buf_size) + FILE *f, char *buf, size_t buf_size, bool recursive) { __do_free char *line = NULL; __do_free struct cpuacct_usage *diff = NULL; @@ -640,7 +656,7 @@ int cpuview_proc_stat(const char *cg, const char *cpu_cg, const char *cpuset, } /* Cannot use more CPUs than is available in cpuset. */ - max_cpus = max_cpu_count(cg, cpu_cg); + max_cpus = max_cpu_count(cg, cpu_cg, recursive); if (max_cpus > cpu_cnt || !max_cpus) max_cpus = cpu_cnt; @@ -998,7 +1014,7 @@ int proc_cpuinfo_read(char *buf, size_t size, off_t offset, else use_view = false; if (use_view) - max_cpus = max_cpu_count(cg, cpu_cg); + max_cpus = max_cpu_count(cg, cpu_cg, lxcfs_has_opt(opts, LXCFS_RECURSIVE)); f = fopen_cached("/proc/cpuinfo", "re", &fopen_cache); if (!f) diff --git a/src/proc_cpuview.h b/src/proc_cpuview.h index b80f6dc0..108c148f 100644 --- a/src/proc_cpuview.h +++ b/src/proc_cpuview.h @@ -24,14 +24,14 @@ struct cpuacct_usage { extern int cpuview_proc_stat(const char *cg, const char *cpu_cg, const char *cpuset, struct cpuacct_usage *cg_cpu_usage, int cg_cpu_usage_size, FILE *f, char *buf, - size_t buf_size); + size_t buf_size, bool recursive); extern int proc_cpuinfo_read(char *buf, size_t size, off_t offset, struct fuse_file_info *fi); extern int read_cpuacct_usage_all(char *cg, char *cpuset, struct cpuacct_usage **return_usage, int *size); extern bool init_cpuview(void); extern void free_cpuview(void); -extern int max_cpu_count(const char *cpuset_cg, const char *cpu_cg); +extern int max_cpu_count(const char *cpuset_cg, const char *cpu_cg, bool recursive); #endif /* __LXCFS_PROC_CPUVIEW_FUSE_H */ diff --git a/src/proc_fuse.c b/src/proc_fuse.c index 335ff403..ac05b192 100644 --- a/src/proc_fuse.c +++ b/src/proc_fuse.c @@ -1136,7 +1136,7 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, if (cgroup_ops->can_use_cpuview(cgroup_ops) && opts && opts->use_cfs) { total_len = cpuview_proc_stat(cg, cpu_cg, cpuset, cg_cpu_usage, cg_cpu_usage_size, f, - d->buf, d->buflen); + d->buf, d->buflen, lxcfs_has_opt(opts, LXCFS_RECURSIVE)); goto out; } } else { @@ -1148,7 +1148,7 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, else use_view = false; if (use_view) - max_cpus = max_cpu_count(cg, cpu_cg); + max_cpus = max_cpu_count(cg, cpu_cg, lxcfs_has_opt(opts, LXCFS_RECURSIVE)); while (getline(&line, &linelen, f) != -1) { ssize_t l; diff --git a/src/sysfs_fuse.c b/src/sysfs_fuse.c index bf75ba99..ce222faa 100644 --- a/src/sysfs_fuse.c +++ b/src/sysfs_fuse.c @@ -57,7 +57,7 @@ static int do_cpuset_read(char *cg, char *cpu_cg, char *buf, size_t buflen) use_view = false; if (use_view) - max_cpus = max_cpu_count(cg, cpu_cg); + max_cpus = max_cpu_count(cg, cpu_cg, lxcfs_has_opt(opts, LXCFS_RECURSIVE)); if (use_view) { if (max_cpus > 1)