File bsc#1177212-0001-Fix-scheduler-don-t-schedule-a-dangling-migration-st.patch of Package pacemaker.26925

From e8704c172c5e68aae00abec68f153d0f74159650 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 18 Aug 2020 14:15:31 -0500
Subject: [PATCH 1/2] Fix: scheduler: don't schedule a dangling migration stop
 if one already occurred

See the 2020-08-18 thread to the users@clusterlabs.org list
"why is node fenced ?". If a node had a dangling migration, later had a
successful stop to recover, and is now shutting down, the scheduler would
schedule another stop, which would be unrunnable due to the shutdown, causing
unnecessary fencing.
---
 lib/pengine/unpack.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 3ea6b2d03..a09061f0d 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -2627,6 +2627,17 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
     }
 }
 
+// Is there an action_name in node_name's rsc history newer than call_id?
+static bool
+newer_op(pe_resource_t *rsc, const char *action_name, const char *node_name,
+         int call_id, pe_working_set_t *data_set)
+{
+    xmlNode *action = find_lrm_op(rsc->id, action_name, node_name, NULL, TRUE,
+                                  data_set);
+
+    return pe__call_id(action) > call_id;
+}
+
 static void
 unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
                           pe_working_set_t *data_set)
@@ -2657,7 +2668,7 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
     target_migrate_from_id = pe__call_id(target_migrate_from);
 
     if ((target_stop == NULL) || (target_stop_id < target_migrate_from_id)) {
-        /* There was no stop on the source, or a stop that happened before a
+        /* There was no stop on the target, or a stop that happened before a
          * migrate_from, so assume the resource is still active on the target
          * (if it is up).
          */
@@ -2675,24 +2686,19 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
          * scheduled or attempted).
          *
          * That means this could be a "dangling" migration. But first, check
-         * whether there is a newer migrate_from or start on the source node --
-         * it's possible the failed migration was followed by a successful
-         * full restart or migration in the reverse direction, in which case we
-         * don't want to force it to stop.
+         * whether there is a newer successful stop, start, or migrate_from on
+         * the source node -- it's possible the failed migration was followed by
+         * a successful stop, full restart, or migration in the reverse
+         * direction, in which case we don't want to force a stop.
          */
-        xmlNode *source_migrate_from = NULL;
-        xmlNode *source_start = NULL;
         int source_migrate_to_id = pe__call_id(xml_op);
 
-        source_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, source,
-                                          NULL, TRUE, data_set);
-        if (pe__call_id(source_migrate_from) > source_migrate_to_id) {
-            return;
-        }
-
-        source_start = find_lrm_op(rsc->id, CRMD_ACTION_START, source, NULL,
-                                   TRUE, data_set);
-        if (pe__call_id(source_start) > source_migrate_to_id) {
+        if (newer_op(rsc, CRMD_ACTION_MIGRATED, source, source_migrate_to_id,
+                     data_set)
+            || newer_op(rsc, CRMD_ACTION_START, source, source_migrate_to_id,
+                     data_set)
+            || newer_op(rsc, CRMD_ACTION_STOP, source, source_migrate_to_id,
+                     data_set)) {
             return;
         }
 
-- 
2.26.2
Places

File bsc#1177212-0001-Fix-scheduler-don-t-schedule-a-dangling-migration-st.patch of Package pacemaker.26925

Places