File 1001-udev-use-lock-when-selecting-the-highest-priority-de.patch of Package systemd.27119

From 14f7217cd9e060b2187bd19ff5c20945a0694af0 Mon Sep 17 00:00:00 2001
From: Franck Bui <fbui@suse.com>
Date: Wed, 3 Mar 2021 17:34:39 +0100
Subject: [PATCH 1001/1007] udev: use lock when selecting the highest priority
 devlink

Commit 30f6dce62cb3a738b20253f2192270607c31b55b introduced a lock-less
algorithm, which is a kind of spinlock in userspace spinning on a heavy loop
(!?!), that is supposed to fix the race that might happen when multiple
workers/devices claim for the same symlink.

But the algorithm is boggus as every worker will prefer itself and will try to
"steal" the link from another worker with the same priority.

Rather than trying to fix the algorithm and close all possible races, let's use
a much simplest approach which consists in using a lock. This was originally
implemented by Martin Wilck: https://github.com/systemd/systemd/pull/8667 but
somehow upstream preferred the more complex lock-less algo without any proof of
benefits (quite the opposite since there're only drawback AFAICS).

In fact the figures we collected so far tends to show that the lock approach is
even faster than the racy version without any fix.

This patch basically drop the lock-less algo introduced by commit
30f6dce62cb3a738b20253f2192270607c31b55b and reuses Martin's lock approach with
some minor improvements.

Compare to the old implementation (before commit 30f6dce62cb3a738b20253f21), it
changes the moment the symlink are created: it's now down *after* udev DB has
been updated, hence removing the risk for another worker to overwrite a just
created symlink with a higher priority which has not yet been registed in the
DB. Latest versions of upstream calls update_devnode() twice, see
https://github.com/systemd/systemd/pull/19560#issuecomment-836693914 for the
"rationale".

[fbui: fixes bsc#1181192]
---
 src/udev/udev-event.c | 14 +++-----
 src/udev/udev-node.c  | 81 ++++++++++++++++---------------------------
 2 files changed, 35 insertions(+), 60 deletions(-)

diff --git a/src/udev/udev-event.c b/src/udev/udev-event.c
index b28089be71..773610709a 100644
--- a/src/udev/udev-event.c
+++ b/src/udev/udev-event.c
@@ -1039,10 +1039,6 @@ int udev_event_execute_rules(
         if (r < 0)
                 return r;
 
-        r = update_devnode(event);
-        if (r < 0)
-                return r;
-
         /* preserve old, or get new initialization timestamp */
         r = device_ensure_usec_initialized(dev, event->dev_db_clone);
         if (r < 0)
@@ -1057,12 +1053,12 @@ int udev_event_execute_rules(
         if (r < 0)
                 return log_device_debug_errno(dev, r, "Failed to update database under /run/udev/data/: %m");
 
-        device_set_is_initialized(dev);
+        r = update_devnode(event);
+        if (r < 0)
+                return r;
 
-        /* Yes, we run update_devnode() twice, because in the first invocation, that is before update of udev database,
-         * it could happen that two contenders are replacing each other's symlink. Hence we run it again to make sure
-         * symlinks point to devices that claim them with the highest priority. */
-        return update_devnode(event);
+        device_set_is_initialized(dev);
+        return 0;
 }
 
 void udev_event_execute_run(UdevEvent *event, usec_t timeout_usec, int timeout_signal) {
diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c
index 9e52906571..25c83c5658 100644
--- a/src/udev/udev-node.c
+++ b/src/udev/udev-node.c
@@ -17,6 +17,7 @@
 #include "format-util.h"
 #include "fs-util.h"
 #include "hexdecoct.h"
+#include "lockfile-util.h"
 #include "mkdir.h"
 #include "path-util.h"
 #include "selinux-util.h"
@@ -257,10 +258,12 @@ toolong:
 
 /* manage "stack of names" with possibly specified device priorities */
 static int link_update(sd_device *dev, const char *slink_in, bool add) {
+        _cleanup_(release_lock_file) LockFile lf = LOCK_FILE_INIT;
         _cleanup_free_ char *slink = NULL, *filename = NULL, *dirname = NULL;
+        _cleanup_free_ char *target = NULL;
         const char *slink_name, *id;
         char name_enc[NAME_MAX+1];
-        int i, r, retries;
+        int r;
 
         assert(dev);
         assert(slink_in);
@@ -291,69 +294,45 @@ static int link_update(sd_device *dev, const char *slink_in, bool add) {
         if (!filename)
                 return log_oom_debug();
 
+        mkdir_parents(dirname, 0755);
+        r = make_lock_file_for(dirname, LOCK_EX, &lf);
+        if (r < 0)
+                return log_error_errno(r, "failed to lock %s", dirname);
+
         if (!add) {
                 if (unlink(filename) < 0 && errno != ENOENT)
-                        log_device_debug_errno(dev, errno, "Failed to remove %s, ignoring: %m", filename);
+                        log_device_error_errno(dev, errno, "Failed to remove %s, ignoring: %m", filename);
 
                 (void) rmdir(dirname);
         } else {
-                for (unsigned j = 0; j < TOUCH_FILE_MAX_RETRIES; j++) {
-                        /* This may fail with -ENOENT when the parent directory is removed during
-                         * creating the file by another udevd worker. */
-                        r = touch_file(filename, /* parents= */ true, USEC_INFINITY, UID_INVALID, GID_INVALID, 0444);
-                        if (r != -ENOENT)
-                                break;
-                }
+                r = touch_file(filename, true, USEC_INFINITY, UID_INVALID, GID_INVALID, 0444);
                 if (r < 0)
-                        return log_device_debug_errno(dev, r, "Failed to create %s: %m", filename);
+                        return log_device_error_errno(dev, r, "Failed to create %s: %m", filename);
         }
 
-        /* If the database entry is not written yet we will just do one iteration and possibly wrong symlink
-         * will be fixed in the second invocation. */
-        retries = sd_device_get_is_initialized(dev) > 0 ? LINK_UPDATE_MAX_RETRIES : 1;
-
-        for (i = 0; i < retries; i++) {
-                _cleanup_free_ char *target = NULL;
-                struct stat st1 = {}, st2 = {};
-
-                r = stat(dirname, &st1);
-                if (r < 0 && errno != ENOENT)
-                        return log_device_debug_errno(dev, errno, "Failed to stat %s: %m", dirname);
-
-                r = link_find_prioritized(dev, add, dirname, &target);
-                if (r < 0)
-                        return log_device_debug_errno(dev, r, "Failed to determine highest priority for symlink '%s': %m", slink);
-                if (r == 0) {
-                        log_device_debug(dev, "No reference left for '%s', removing", slink);
-
-                        if (unlink(slink) < 0 && errno != ENOENT)
-                                log_device_debug_errno(dev, errno, "Failed to remove '%s', ignoring: %m", slink);
-
-                        (void) rmdir_parents(slink, "/dev");
-                        break;
-                }
+        /* link_find_prioritized() relies on 'dev' but that's not necessary as
+         * 'filename' has been created previously.  */
+        r = link_find_prioritized(dev, add, dirname, &target);
+        if (r < 0)
+                return log_device_error_errno(dev, r, "Failed to find highest priority for symlink '%s': %m", slink);
+        if (r == 0) {
+                assert(!add);
 
-                r = node_symlink(dev, target, slink);
-                if (r < 0)
-                        return r;
-                if (r == 1)
-                        /* We have replaced already existing symlink, possibly there is some other device trying
-                         * to claim the same symlink. Let's do one more iteration to give us a chance to fix
-                         * the error if other device actually claims the symlink with higher priority. */
-                        continue;
+                log_device_debug(dev, "No reference left for '%s', removing", slink);
 
-                /* Skip the second stat() if the first failed, stat_inode_unmodified() would return false regardless. */
-                if ((st1.st_mode & S_IFMT) != 0) {
-                        r = stat(dirname, &st2);
-                        if (r < 0 && errno != ENOENT)
-                                return log_device_debug_errno(dev, errno, "Failed to stat %s: %m", dirname);
+                if (unlink(slink) < 0)
+                        return log_device_error_errno(dev, errno,
+                                                      "Failed to remove '%s', ignoring: %m", slink);
 
-                        if (stat_inode_unmodified(&st1, &st2))
-                                break;
-                }
+                (void) rmdir_parents(slink, "/dev");
+                return 0;
         }
 
-        return i < LINK_UPDATE_MAX_RETRIES ? 0 : -ELOOP;
+        r = node_symlink(dev, target, slink);
+        if (r < 0)
+                (void) unlink(filename);
+
+        return r;
 }
 
 int udev_node_update_old_links(sd_device *dev, sd_device *dev_old) {
-- 
2.31.1

openSUSE Build Service is sponsored by