File 1026-erts-Fix-cgroup2-path-wrangling.patch of Package erlang
From f31e489b9862f63fc807161dd9cc8b2e92e6c13f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?John=20H=C3=B6gberg?= <john@erlang.org>
Date: Fri, 23 Feb 2024 19:52:57 +0100
Subject: [PATCH 2/2] erts: Fix cgroup2 path wrangling
Fixes #7928
---
erts/lib_src/common/erl_misc_utils.c | 268 ++++++++++++++++-----------
1 file changed, 155 insertions(+), 113 deletions(-)
diff --git a/erts/lib_src/common/erl_misc_utils.c b/erts/lib_src/common/erl_misc_utils.c
index 4c441294ba..35291a1fbb 100644
--- a/erts/lib_src/common/erl_misc_utils.c
+++ b/erts/lib_src/common/erl_misc_utils.c
@@ -1060,62 +1060,77 @@ str_combine(const char *a, const char *b) {
return result;
}
-static const char*
-get_cgroup_v1_base_dir(const char *controller) {
- char line_buf[5 << 10];
- FILE *var_file;
-
- var_file = fopen("/proc/self/cgroup", "r");
+enum cgroup_version_t {
+ ERTS_CGROUP_NONE,
+ ERTS_CGROUP_V1,
+ ERTS_CGROUP_V2
+};
- if (var_file == NULL) {
- return NULL;
- }
+static enum cgroup_version_t
+get_cgroup_child_path(const char *controller, const char **out) {
+ FILE *cgroup_file = fopen("/proc/self/cgroup", "r");
+ enum cgroup_version_t version = ERTS_CGROUP_NONE;
+
+ if (cgroup_file != NULL) {
+ char line_buf[5 << 10];
+
+ while (fgets(line_buf, sizeof(line_buf), cgroup_file)) {
+ /* sscanf_s requires C11, so we use hardcoded sizes (rather than
+ * rely on macros like MAXPATHLEN) so we can specify them directly
+ * in the format string. */
+ char child_dir[4 << 10];
+ char controllers[256];
- while (fgets(line_buf, sizeof(line_buf), var_file)) {
- /* sscanf_s requires C11, so we use hardcoded sizes (rather than rely
- * on macros like MAXPATHLEN) so we can specify them directly in the
- * format string. */
- char base_dir[4 << 10];
- char controllers[256];
+ if (sscanf(line_buf, "%*d:%255[^:]:%4095s\n",
+ controllers, child_dir) == 2) {
+ if (csv_contains(controllers, controller, ',')) {
+ if (version == ERTS_CGROUP_V2) {
+ free((void*)*out);
+ }
- if (sscanf(line_buf, "%*d:%255[^:]:%4095s\n",
- controllers, base_dir) != 2) {
- continue;
+ /* A controller can only exist in one hierarchy, so we can
+ * safely exit once found. */
+ *out = strdup(child_dir);
+ version = ERTS_CGROUP_V1;
+ break;
+ }
+ } else if (sscanf(line_buf, "%*d::%4095s\n", child_dir) == 1) {
+ /* An empty controller list means that this is the unified v2
+ * hierarchy, under which all associated controllers can be
+ * found. We don't know if the given controller is one of them,
+ * though, so we need to keep looking in case it's under a v1
+ * hierarchy. */
+ *out = strdup(child_dir);
+ version = ERTS_CGROUP_V2;
+ }
}
- if (csv_contains(controllers, controller, ',')) {
- fclose(var_file);
- return strdup(base_dir);
- }
+ fclose(cgroup_file);
}
- fclose(var_file);
- return NULL;
+ return version;
}
-enum cgroup_version_t {
- ERTS_CGROUP_NONE,
- ERTS_CGROUP_V1,
- ERTS_CGROUP_V2
-};
-
static enum cgroup_version_t
-get_cgroup_path(const char *controller, const char **path) {
- char line_buf[10 << 10];
- FILE *var_file;
-
- var_file = fopen("/proc/self/mountinfo", "r");
-
- if (var_file == NULL) {
+get_cgroup_path(const char *controller,
+ const char **out) {
+ enum cgroup_version_t version;
+ char mount_line[10 << 10];
+ const char *mount_format;
+ const char *child_path;
+ FILE *mount_file;
+
+ mount_file = fopen("/proc/self/mountinfo", "r");
+ if (mount_file == NULL) {
return ERTS_CGROUP_NONE;
}
- while (fgets(line_buf, sizeof(line_buf), var_file)) {
- char mount_path[4 << 10];
- char root_path[4 << 10];
- char fs_flags[512];
- char fs_type[64];
-
+ version = get_cgroup_child_path(controller, &child_path);
+ switch (version) {
+ case ERTS_CGROUP_NONE:
+ fclose(mount_file);
+ return ERTS_CGROUP_NONE;
+ case ERTS_CGROUP_V1:
/* Format:
* [Mount id] [Parent id] [Major] [Minor] [Root] [Mounted at] \
* [Mount flags] ... (options terminated by a single hyphen) ... \
@@ -1125,50 +1140,69 @@ get_cgroup_path(const char *controller, const char **path) {
*
* This fails if any of the fs options contain a hyphen, but this is
* not likely to happen on a cgroup, so we just skip such lines. */
- if (sscanf(line_buf,
- "%*d %*d %*d:%*d %4095s %4095s %*s%*[^-]- "
- "%63s %*s %511[^\n]\n",
- root_path, mount_path,
- fs_type, fs_flags) != 4) {
+ mount_format = "%*d %*d %*d:%*d %4095s %4095s %*s%*[^-]- "
+ "cgroup %*s %511[^\n]\n";
+ break;
+ case ERTS_CGROUP_V2:
+ mount_format = "%*d %*d %*d:%*d %4095s %4095s %*s%*[^-]- "
+ "cgroup2 %*s %511[^\n]\n";
+ break;
+ }
+
+ /* As a controller can only belong to one hierarchy, regardless of
+ * version, we'll go through all mounted filesystems one by one until
+ * the controller is found. */
+ *out = NULL;
+ while (fgets(mount_line, sizeof(mount_line), mount_file)) {
+ char mount_path[4 << 10];
+ char root_path[4 << 10];
+ char fs_flags[512];
+
+ if (sscanf(mount_line,
+ mount_format,
+ root_path,
+ mount_path,
+ fs_flags) != 3) {
continue;
}
- if (!strcmp(fs_type, "cgroup2")) {
+ if (version == ERTS_CGROUP_V2) {
char controllers[256];
+ const char *group_dir;
const char *cgc_path;
- cgc_path = str_combine(mount_path, "/cgroup.controllers");
+ group_dir = str_combine(mount_path, child_path);
+ cgc_path = str_combine(group_dir, "/cgroup.controllers");
+
if (read_file(cgc_path, controllers, sizeof(controllers)) > 0) {
if (csv_contains(controllers, controller, ' ')) {
free((void*)cgc_path);
- fclose(var_file);
-
- *path = strdup(mount_path);
- return ERTS_CGROUP_V2;
+ *out = group_dir;
+ break;
}
}
+
+ free((void*)group_dir);
free((void*)cgc_path);
- } else if (!strcmp(fs_type, "cgroup")) {
+ } else if (version == ERTS_CGROUP_V1) {
if (csv_contains(fs_flags, controller, ',')) {
- const char *base_dir = get_cgroup_v1_base_dir(controller);
-
- if (base_dir) {
- if (strcmp(root_path, base_dir)) {
- *path = str_combine(mount_path, base_dir);
- } else {
- *path = strdup(mount_path);
- }
-
- free((void*)base_dir);
- fclose(var_file);
-
- return ERTS_CGROUP_V1;
+ if (strcmp(root_path, child_path)) {
+ *out = str_combine(mount_path, child_path);
+ } else {
+ *out = strdup(mount_path);
}
+
+ break;
}
}
}
- fclose(var_file);
+ free((void*)child_path);
+ fclose(mount_file);
+
+ if (*out != NULL) {
+ return version;
+ }
return ERTS_CGROUP_NONE;
}
@@ -1176,82 +1210,90 @@ get_cgroup_path(const char *controller, const char **path) {
static int read_cgroup_interface(const char *group_path, const char *if_name,
int arg_count, const char *format, ...) {
const char *var_path;
- int res;
+ FILE *var_file;
var_path = str_combine(group_path, if_name);
- res = 0;
+ var_file = fopen(var_path, "r");
+ free((void*)var_path);
- if (var_path) {
- FILE *var_file;
+ if (var_file != NULL) {
+ va_list va_args;
+ int res;
- var_file = fopen(var_path, "r");
- free((void*)var_path);
+ va_start(va_args, format);
- if (var_file) {
- va_list va_args;
+ res = (vfscanf(var_file, format, va_args) == arg_count);
+ fclose(var_file);
- va_start(va_args, format);
+ va_end(va_args);
- if (vfscanf(var_file, format, va_args) == arg_count) {
- res = 1;
- }
+ return res;
+ }
- va_end(va_args);
+ return 0;
+}
- fclose(var_file);
+static int calculate_cpu_quota(int limit,
+ ssize_t cfs_period_us,
+ ssize_t cfs_quota_us) {
+ if (cfs_period_us > 0 && cfs_quota_us > 0) {
+ size_t quota = cfs_quota_us / cfs_period_us;
+
+ if (quota == 0) {
+ quota = 1;
+ }
+
+ if (quota <= (size_t)limit) {
+ return quota;
}
}
- return res;
+ return limit;
}
/* CPU quotas are read from the cgroup configuration, which can be pretty hairy
* as we need to support both v1 and v2, and it's possible for both versions to
* be active at the same time. */
-
static int
read_cpu_quota(int limit)
{
ssize_t cfs_period_us, cfs_quota_us;
const char *cgroup_path;
- int succeeded;
switch (get_cgroup_path("cpu", &cgroup_path)) {
case ERTS_CGROUP_V1:
- succeeded = read_cgroup_interface(cgroup_path, "/cpu.cfs_quota_us",
- 1, "%zi", &cfs_quota_us) &&
- read_cgroup_interface(cgroup_path, "/cpu.cfs_period_us",
- 1, "%zi", &cfs_period_us);
-
- free((void*)cgroup_path);
+ {
+ int succeeded = read_cgroup_interface(cgroup_path,
+ "/cpu.cfs_quota_us",
+ 1, "%zi", &cfs_quota_us) &&
+ read_cgroup_interface(cgroup_path,
+ "/cpu.cfs_period_us",
+ 1, "%zi", &cfs_period_us);
+ free((void*)cgroup_path);
+
+ if (succeeded) {
+ return calculate_cpu_quota(limit, cfs_quota_us, cfs_period_us);
+ }
+ }
break;
case ERTS_CGROUP_V2:
- succeeded = read_cgroup_interface(cgroup_path, "/cpu.max",
- 2, "%zi %zi", &cfs_quota_us, &cfs_period_us);
-
- free((void*)cgroup_path);
+ if (read_cgroup_interface(cgroup_path, "/cpu.max",
+ 1, "max %zi", &cfs_period_us)) {
+ /* No quota, just return our upper limit. */
+ free((void*)cgroup_path);
+ return limit;
+ } else if (read_cgroup_interface(cgroup_path, "/cpu.max",
+ 2, "%zi %zi",
+ &cfs_quota_us,
+ &cfs_period_us)) {
+ free((void*)cgroup_path);
+ return calculate_cpu_quota(limit, cfs_quota_us, cfs_period_us);
+ }
break;
default:
- succeeded = 0;
break;
}
- if (succeeded) {
- if (cfs_period_us > 0 && cfs_quota_us > 0) {
- size_t quota = cfs_quota_us / cfs_period_us;
-
- if (quota == 0) {
- quota = 1;
- }
-
- if (quota > 0 && quota <= (size_t)limit) {
- return quota;
- }
- }
-
- return limit;
- }
-
return 0;
}
--
2.35.3