File bsc#1177212-0000-Low-libpe_status-handle-pending-migrations-correctly.patch of Package pacemaker.26927
From d22554288f7f04e09f5df1172a617b735617de37 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 22 May 2018 11:00:22 -0500
Subject: [PATCH] Low: libpe_status: handle pending migrations correctly
This is mainly a refactor of unpack_rsc_migration() for readability.
The one significant change is that previously, a migrate_from operation that
was *recorded* as pending (record-pending=true) was treated differently from an
unrecorded pending migrate_from (record-pending=false).
---
lib/pengine/unpack.c | 162 ++++++++++++++++++++++++-------------------
1 file changed, 91 insertions(+), 71 deletions(-)
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index b4ebf936e..85b0f684a 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -2281,94 +2281,114 @@ find_lrm_op(const char *resource, const char *op, const char *node, const char *
return get_xpath_object(xpath, data_set->input, LOG_DEBUG);
}
+static bool
+stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+ pe_working_set_t *data_set)
+{
+ xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id,
+ NULL, data_set);
+
+ if (stop_op) {
+ int stop_id = 0;
+ int task_id = 0;
+
+ crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
+ crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
+ if (stop_id > task_id) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
static void
unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
{
-
- /*
- * The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src)
- *
- * So if a migrate_to is followed by a stop, then we don't need to care what
- * happened on the target node
+ /* A successful migration sequence is:
+ * migrate_to on source node
+ * migrate_from on target node
+ * stop on source node
*
- * Without the stop, we need to look for a successful migrate_from.
- * This would also imply we're no longer running on the source
+ * If a migrate_to is followed by a stop, the entire migration (successful
+ * or failed) is complete, and we don't care what happened on the target.
*
- * Without the stop, and without a migrate_from op we make sure the resource
- * gets stopped on both source and target (assuming the target is up)
+ * If no migrate_from has happened, the migration is considered to be
+ * "partial". If the migrate_from failed, make sure the resource gets
+ * stopped on both source and target (if up).
*
+ * If the migrate_to and migrate_from both succeeded (which also implies the
+ * resource is no longer running on the source), but there is no stop, the
+ * migration is considered to be "dangling".
*/
- int stop_id = 0;
- int task_id = 0;
- xmlNode *stop_op =
- find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set);
-
- if (stop_op) {
- crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
+ int from_rc = 0;
+ int from_status = 0;
+ const char *migrate_source = NULL;
+ const char *migrate_target = NULL;
+ pe_node_t *target = NULL;
+ pe_node_t *source = NULL;
+ xmlNode *migrate_from = NULL;
+
+ if (stop_happened_after(rsc, node, xml_op, data_set)) {
+ return;
}
- crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
+ // Clones are not allowed to migrate, so role can't be master
+ rsc->role = RSC_ROLE_STARTED;
- if (stop_op == NULL || stop_id < task_id) {
- int from_rc = 0, from_status = 0;
- const char *migrate_source =
- crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
- const char *migrate_target =
- crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
+ migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
+ migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
- node_t *target = pe_find_node(data_set->nodes, migrate_target);
- node_t *source = pe_find_node(data_set->nodes, migrate_source);
- xmlNode *migrate_from =
- find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
- data_set);
+ target = pe_find_node(data_set->nodes, migrate_target);
+ source = pe_find_node(data_set->nodes, migrate_source);
- rsc->role = RSC_ROLE_STARTED; /* can be master? */
- if (migrate_from) {
- crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
- crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
- pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
- ID(migrate_from), migrate_target, from_status, from_rc);
- }
-
- if (migrate_from && from_rc == PCMK_OCF_OK
- && from_status == PCMK_LRM_OP_DONE) {
- pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
- migrate_source);
+ // Check whether there was a migrate_from action
+ migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target,
+ migrate_source, data_set);
+ if (migrate_from) {
+ crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
+ crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
+ pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
+ ID(migrate_from), migrate_target, from_status, from_rc);
+ }
- /* all good
- * just need to arrange for the stop action to get sent
- * but _without_ affecting the target somehow
- */
- rsc->role = RSC_ROLE_STOPPED;
- rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
+ if (migrate_from && from_rc == PCMK_OCF_OK
+ && from_status == PCMK_LRM_OP_DONE) {
+ /* The migrate_to and migrate_from both succeeded, so mark the migration
+ * as "dangling". This will be used to schedule a stop action on the
+ * source without affecting the target.
+ */
+ pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
+ migrate_source);
+ rsc->role = RSC_ROLE_STOPPED;
+ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
- } else if (migrate_from) { /* Failed */
- if (target && target->details->online) {
- pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
- target->details->online);
- native_add_running(rsc, target, data_set);
- }
+ } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed
+ if (target && target->details->online) {
+ pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
+ target->details->online);
+ native_add_running(rsc, target, data_set);
+ }
- } else { /* Pending or complete but erased */
- if (target && target->details->online) {
- pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
- target->details->online);
+ } else { // Pending, or complete but erased
+ if (target && target->details->online) {
+ pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
+ target->details->online);
- native_add_running(rsc, target, data_set);
- if (source && source->details->online) {
- /* If we make it here we have a partial migration. The migrate_to
- * has completed but the migrate_from on the target has not. Hold on
- * to the target and source on the resource. Later on if we detect that
- * the resource is still going to run on that target, we may continue
- * the migration */
- rsc->partial_migration_target = target;
- rsc->partial_migration_source = source;
- }
- } else {
- /* Consider it failed here - forces a restart, prevents migration */
- set_bit(rsc->flags, pe_rsc_failed);
- clear_bit(rsc->flags, pe_rsc_allow_migrate);
+ native_add_running(rsc, target, data_set);
+ if (source && source->details->online) {
+ /* This is a partial migration: the migrate_to completed
+ * successfully on the source, but the migrate_from has not
+ * completed. Remember the source and target; if the newly
+ * chosen target remains the same when we schedule actions
+ * later, we may continue with the migration.
+ */
+ rsc->partial_migration_target = target;
+ rsc->partial_migration_source = source;
}
+ } else {
+ /* Consider it failed here - forces a restart, prevents migration */
+ set_bit(rsc->flags, pe_rsc_failed);
+ clear_bit(rsc->flags, pe_rsc_allow_migrate);
}
}
}
--
2.26.2