File 0003-Fix-watchdog-fencing-correctly-derive-timeout-with-t.patch of Package pacemaker.29727
From c4eb45a986f8865fc5e69350fd5b9f4b056d9d69 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Tue, 14 Feb 2023 11:57:17 +0100
Subject: [PATCH 3/3] Fix: watchdog-fencing: correctly derive timeout with
topology
up to now the timeout for watchdog-fencing was just added to
the overall timeout if the node to be fenced was visible and
reported back to the query.
---
daemons/fenced/fenced_remote.c | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)
Index: pacemaker-2.1.2+20211124.ada5c3b36/daemons/fenced/fenced_remote.c
===================================================================
--- pacemaker-2.1.2+20211124.ada5c3b36.orig/daemons/fenced/fenced_remote.c
+++ pacemaker-2.1.2+20211124.ada5c3b36/daemons/fenced/fenced_remote.c
@@ -878,9 +878,10 @@ advance_topology_level(remote_fencing_op
return pcmk_rc_ok;
}
- crm_notice("All fencing options targeting %s for client %s@%s failed "
- CRM_XS " id=%.8s",
- op->target, op->client_name, op->originator, op->id);
+ crm_info("All %sfencing options targeting %s for client %s@%s failed "
+ CRM_XS " id=%.8s",
+ (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"",
+ op->target, op->client_name, op->originator, op->id);
return ENODEV;
}
@@ -1311,8 +1312,17 @@ stonith_choose_peer(remote_fencing_op_t
&& pcmk_is_set(op->call_options, st_opt_topology)
&& (advance_topology_level(op, false) == pcmk_rc_ok));
- crm_notice("Couldn't find anyone to fence (%s) %s using %s",
- op->action, op->target, (device? device : "any device"));
+ if ((stonith_watchdog_timeout_ms > 0)
+ && pcmk__str_any_of(op->action, "off", "reboot", "poweroff", NULL)
+ && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
+ && node_does_watchdog_fencing(op->target)) {
+ crm_info("Couldn't contact watchdog-fencing target-node (%s)",
+ op->target);
+ /* check_watchdog_fencing_and_wait will log additional info */
+ } else {
+ crm_notice("Couldn't find anyone to fence (%s) %s using %s",
+ op->action, op->target, (device? device : "any device"));
+ }
return NULL;
}
@@ -1402,6 +1412,18 @@ get_op_total_timeout(const remote_fencin
continue;
}
for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
+ /* in case of watchdog-device we add the timeout to the budget
+ regardless of if we got a reply or not
+ */
+ if ((stonith_watchdog_timeout_ms > 0)
+ && pcmk__str_any_of(op->action, "off", "reboot", "poweroff", NULL)
+ && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID,
+ pcmk__str_none)
+ && node_does_watchdog_fencing(op->target)) {
+ total_timeout += stonith_watchdog_timeout_ms / 1000;
+ continue;
+ }
+
for (iter = op->query_results; iter != NULL; iter = iter->next) {
const st_query_result_t *peer = iter->data;