File bsc#1177212-0007-Fix-scheduler-properly-detect-dangling-migrations.patch of Package pacemaker.26124
From c03ce99db95d99e0b4220d354d90f07252f2af3e Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 5 Jul 2019 19:00:45 -0500
Subject: [PATCH 7/8] Fix: scheduler: properly detect dangling migrations
Previously, a failed migrate_to with a stop but no migrate_from on the target
would always lead to the source being considered "dangling" (which would force
a stop on it).
However, that is incorrect if there was a later successful start or migration
back to the node. In that case, the scheduler would get into a loop of stopping
the resource, starting it at the next transition, then stopping it again at the
next transition.
Now, the source is not marked "dangling" in that case.
---
lib/pengine/unpack.c | 29 ++++++++++++++++++++++++++---
1 file changed, 26 insertions(+), 3 deletions(-)
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 152b43e1f..5e69e0ab7 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -2567,10 +2567,33 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
}
} else if (target_migrate_from == NULL) {
- /* There was a stop, but no migrate_from. The stop could have happened
- * before migrate_from was even scheduled, so mark it as dangling so we
- * can force a stop later.
+ /* We know there was a stop on the target, but there may not have been a
+ * migrate_from (the stop could have happened before migrate_from was
+ * scheduled or attempted).
+ *
+ * That means this could be a "dangling" migration. But first, check
+ * whether there is a newer migrate_from or start on the source node --
+ * it's possible the failed migration was followed by a successful
+ * full restart or migration in the reverse direction, in which case we
+ * don't want to force it to stop.
*/
+ xmlNode *source_migrate_from = NULL;
+ xmlNode *source_start = NULL;
+ int source_migrate_to_id = pe__call_id(xml_op);
+
+ source_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, source,
+ NULL, TRUE, data_set);
+ if (pe__call_id(source_migrate_from) > source_migrate_to_id) {
+ return;
+ }
+
+ source_start = find_lrm_op(rsc->id, CRMD_ACTION_START, source, NULL,
+ TRUE, data_set);
+ if (pe__call_id(source_start) > source_migrate_to_id) {
+ return;
+ }
+
+ // Mark node as having dangling migration so we can force a stop later
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
}
}
--
2.26.2