File bsc#1232276-0001-Fix-libcrmservice-consider-a-monitor-pending-if-Load.patch of Package pacemaker.41116
From 2fa9c4a3611a931a2a37756d19ae5a889ff9486c Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Tue, 13 May 2025 12:51:00 +0200
Subject: [PATCH] Fix: libcrmservice: consider a monitor pending if LoadUnit
receives no reply from systemd
... rather than return an error yet.
When systemd is re-executing by `systemctl daemon-reexec`, if a monitor
of a systemd resource happens to be executing at the same time, the
`LoadUnit` method will receive `org.freedesktop.DBus.Error.NoReply` from
dbus in the first place. Previously in a bad case, most of the systemd
resources in the cluster might report monitor errors and get restarted
despite the fact that they were actually running well.
This fixes it by marking such a monitor as PCMK_EXEC_PENDING rather
PCMK_EXEC_ERROR, so that the monitor can retry with another iteration.
---
lib/services/systemd.c | 23 ++++++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
Index: pacemaker-2.1.2+20211124.ada5c3b36/lib/services/systemd.c
===================================================================
--- pacemaker-2.1.2+20211124.ada5c3b36.orig/lib/services/systemd.c
+++ pacemaker-2.1.2+20211124.ada5c3b36/lib/services/systemd.c
@@ -319,6 +319,19 @@ set_result_from_method_error(svc_action_
services__set_result(op, PCMK_OCF_NOT_INSTALLED,
PCMK_EXEC_NOT_INSTALLED, "systemd unit not found");
+
+ /* If systemd happens to be re-executing by `systemctl daemon-reexec` at the
+ * same time, dbus gives an error with the name
+ * `org.freedesktop.DBus.Error.NoReply` and the message "Message recipient
+ * disconnected from message bus without replying".
+ * Consider the monitor pending rather than return an error yet, so that it
+ * can retry with another iteration.
+ */
+ } else if (pcmk__str_any_of(op->action, CRMD_ACTION_STATUS,
+ "status", NULL)
+ && strstr(error->name, DBUS_ERROR_NO_REPLY)
+ && strstr(error->message, "disconnected")) {
+ services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
}
crm_err("DBus request for %s of systemd unit %s for resource %s failed: %s",
@@ -373,8 +386,12 @@ execute_after_loadunit(DBusMessage *repl
invoke_unit_by_path(op, path);
} else if (!(op->synchronous)) {
- services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
- "No DBus object found for systemd unit");
+ if (!pcmk__str_any_of(op->action, CRMD_ACTION_STATUS,
+ "status", NULL)
+ || op->status != PCMK_EXEC_PENDING) {
+ services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
+ "No DBus object found for systemd unit");
+ }
services__finalize_async_op(op);
}
}