File pacemaker-pengine-resources-on-remote-nodes-not-restarted-until-necessary.patch of Package pacemaker.8397

commit 3f72a028f9a12a6623a7b88733a6fe7ecc879230
Author: Andrew Beekhof <andrew@beekhof.net>
Date:   Wed May 24 11:21:44 2017 +1000

    PE: Assume resources on remote nodes do not need to be restarted until absolutely necessary

Index: pacemaker/pengine/allocate.c
===================================================================
--- pacemaker.orig/pengine/allocate.c
+++ pacemaker/pengine/allocate.c
@@ -1726,6 +1726,244 @@ rsc_order_first(resource_t * lh_rsc, ord
 extern gboolean update_action(action_t * action);
 extern void update_colo_start_chain(action_t * action);
 
+enum remote_connection_state 
+{
+    remote_state_unknown = 0,
+    remote_state_alive = 1,
+    remote_state_resting = 2,
+    remote_state_dead= 3
+};
+
+static int
+is_recurring_action(action_t *action) 
+{
+    const char *interval_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL);
+    int interval = crm_parse_int(interval_s, "0");
+    if(interval > 0) {
+        return TRUE;
+    }
+    return FALSE;
+}
+
+static void
+apply_container_ordering(action_t *action, pe_working_set_t *data_set)
+{
+    /* VMs are also classified as containers for these purposes... in
+     * that they both involve a 'thing' running on a real or remote
+     * cluster node.
+     *
+     * This allows us to be smarter about the type and extent of
+     * recovery actions required in various scenarios
+     */
+    resource_t *remote_rsc = NULL;
+    resource_t *container = NULL;
+    enum action_tasks task = text2task(action->task);
+
+    if (action->rsc == NULL) {
+        return;
+    }
+
+    CRM_ASSERT(action->node);
+    CRM_ASSERT(is_remote_node(action->node));
+    CRM_ASSERT(action->node->details->remote_rsc);
+
+    remote_rsc = action->node->details->remote_rsc;
+    CRM_ASSERT(remote_rsc);
+
+    container = remote_rsc->container;
+    CRM_ASSERT(container);
+
+    crm_trace("%s %s %s %s %d", action->uuid, action->task, remote_rsc->id, container->id, is_set(container->flags, pe_rsc_failed));
+    switch (task) {
+        case start_rsc:
+        case action_promote:
+            /* Force resource recovery if the container is recovered */
+            custom_action_order(container, generate_op_key(container->id, RSC_START, 0), NULL,
+                                action->rsc, NULL, action,
+                                pe_order_preserve | pe_order_implies_then | pe_order_runnable_left, data_set);
+
+            /* Wait for the connection resource to be up too */
+            custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,
+                                action->rsc, NULL, action,
+                                pe_order_preserve | pe_order_runnable_left, data_set);
+            break;
+        case stop_rsc:
+            if(is_set(container->flags, pe_rsc_failed)) {
+                /* When the container representing a guest node fails,
+                 * the stop action for all the resources living in
+                 * that container is implied by the container
+                 * stopping. This is similar to how fencing operations
+                 * work for cluster nodes.
+                 */
+                custom_action_order(container, generate_op_key(container->id, RSC_STOP, 0), NULL,
+                                    action->rsc, NULL, action,
+                                    pe_order_preserve | pe_order_implies_then | pe_order_runnable_left, data_set);
+                pe_set_action_bit(action, pe_action_pseudo);
+            } else {
+                /* Otherwise, ensure the operation happens before the connection is brought down */
+                custom_action_order(action->rsc, NULL, action,
+                                    remote_rsc, generate_op_key(remote_rsc->id, RSC_STOP, 0), NULL,
+                                    pe_order_preserve, data_set);
+            }
+            break;
+        case action_demote:
+            if(is_set(container->flags, pe_rsc_failed)) {
+                /* Just like a stop, the demote is implied by the
+                 * container having failed/stopped
+                 *
+                 * If we really wanted to we would order the demote
+                 * after the stop, IFF the containers current role was
+                 * stopped (otherwise we re-introduce an ordering
+                 * loop)
+                 */
+                pe_set_action_bit(action, pe_action_pseudo);
+            } else {
+                /* Otherwise, ensure the operation happens before the connection is brought down */
+                custom_action_order(action->rsc, NULL, action,
+                                    remote_rsc, generate_op_key(remote_rsc->id, RSC_STOP, 0), NULL,
+                                    pe_order_preserve, data_set);
+            }
+            break;
+        default:
+            /* Wait for the connection resource to be up */
+            if (is_recurring_action(action)) {
+                /* In case we ever get the recovery logic wrong, force
+                 * recurring monitors to be restarted, even if just
+                 * the connection was re-established
+                 */
+                custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,
+                                    action->rsc, NULL, action,
+                                    pe_order_preserve | pe_order_runnable_left | pe_order_implies_then, data_set);
+            } else {
+                custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,
+                                    action->rsc, NULL, action,
+                                    pe_order_preserve | pe_order_runnable_left, data_set);
+            }
+            break;
+    }
+}
+
+static void
+apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
+{
+    resource_t *remote_rsc = NULL;
+    node_t *cluster_node = NULL;
+    enum action_tasks task = text2task(action->task);
+    enum remote_connection_state state = remote_state_unknown;
+
+    if (action->rsc == NULL) {
+        return;
+    }
+
+    CRM_ASSERT(action->node);
+    CRM_ASSERT(is_remote_node(action->node));
+    CRM_ASSERT(action->node->details->remote_rsc);
+
+    remote_rsc = action->node->details->remote_rsc;
+    CRM_ASSERT(remote_rsc);
+
+    if(remote_rsc->running_on) {
+        cluster_node = remote_rsc->running_on->data;
+    }
+
+    /* If the cluster node the remote connection resource resides on
+     * is unclean or went offline, we can't process any operations
+     * on that remote node until after it starts elsewhere.
+     */
+    if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) {
+        /* There is no-where left to run the connection resource
+         * We must assume the target has failed
+         */
+        state = remote_state_dead;
+
+    } else if (cluster_node == NULL) {
+        /* Connection is recoverable but not currently running anywhere, see if we can recover it first */
+        state = remote_state_unknown;
+
+    } else if(cluster_node->details->unclean == TRUE
+              || cluster_node->details->online == FALSE) {
+        /* Connection is running on a dead node, see if we can recover it first */
+        state = remote_state_resting;
+
+    } else if (g_list_length(remote_rsc->running_on) > 1
+               && remote_rsc->partial_migration_source
+               && remote_rsc->partial_migration_target) {
+        /* We're in the middle of migrating a connection resource,
+         * wait until after the resource migrates before performing
+         * any actions.
+         */
+        state = remote_state_resting;
+
+    } else {
+        state = remote_state_alive;
+    }
+
+    crm_trace("%s %s %d", action->uuid, action->task, state);
+    switch (task) {
+        case start_rsc:
+        case action_promote:
+            if(state == remote_state_dead) {
+                /* Wait for the connection resource to be up and force recovery */
+                custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,
+                                    action->rsc, NULL, action,
+                                    pe_order_preserve | pe_order_implies_then | pe_order_runnable_left, data_set);
+            } else {
+                /* Ensure the connection resource is up and assume everything is as we left it */
+                custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,
+                                    action->rsc, NULL, action,
+                                    pe_order_preserve | pe_order_runnable_left, data_set);
+            }
+            break;
+        case stop_rsc:
+            /* Handle special case with remote node where stop actions need to be
+             * ordered after the connection resource starts somewhere else.
+             */
+            if(state == remote_state_resting) {
+                /* Wait for the connection resource to be up and assume everything is as we left it */
+                custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,
+                                    action->rsc, NULL, action,
+                                    pe_order_preserve | pe_order_runnable_left, data_set);
+            } else {
+                custom_action_order(action->rsc, NULL, action,
+                                    remote_rsc, generate_op_key(remote_rsc->id, RSC_STOP, 0), NULL,
+                                    pe_order_preserve | pe_order_implies_first, data_set);
+            }
+            break;
+        case action_demote:
+
+            /* If the connection is being torn down, we don't want
+             * to build a constraint between a resource's demotion and
+             * the connection resource starting... because the connection
+             * resource can not start. The connection might already be up,
+             * but the "start" action would not be allowed, which in turn would
+             * block the demotion of any resources living in the node.
+             */
+
+            if(state == remote_state_resting || state == remote_state_unknown) {
+                custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,
+                                    action->rsc, NULL, action,
+                                    pe_order_preserve, data_set);
+            } /* Otherwise we can rely on the stop ordering */
+            break;
+        default:
+            /* Wait for the connection resource to be up */
+            if (is_recurring_action(action)) {
+                /* In case we ever get the recovery logic wrong, force
+                 * recurring monitors to be restarted, even if just
+                 * the connection was re-established
+                 */
+                custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,
+                                    action->rsc, NULL, action,
+                                    pe_order_preserve | pe_order_runnable_left | pe_order_implies_then, data_set);
+            } else {
+                custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,
+                                    action->rsc, NULL, action,
+                                    pe_order_preserve | pe_order_runnable_left, data_set);
+            }
+            break;
+    }
+}
+
 static void
 apply_remote_node_ordering(pe_working_set_t *data_set)
 {
@@ -1734,10 +1972,9 @@ apply_remote_node_ordering(pe_working_se
     if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
         return;
     }
+
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
-        resource_t *remote_rsc = NULL;
-        resource_t *container = NULL;
 
         if (action->rsc == NULL) {
             continue;
@@ -1761,7 +1998,7 @@ apply_remote_node_ordering(pe_working_se
                 pe_order_optional,
                 data_set);
 
-                continue;
+            continue;
         }
 
         /* detect if the action occurs on a remote node. if so create
@@ -1772,134 +2009,13 @@ apply_remote_node_ordering(pe_working_se
             is_remote_node(action->node) == FALSE ||
             action->node->details->remote_rsc == NULL ||
             is_set(action->flags, pe_action_pseudo)) {
-            continue;
-        }
-
-        remote_rsc = action->node->details->remote_rsc;
-        container = remote_rsc->container;
-
-        if (safe_str_eq(action->task, "monitor") ||
-            safe_str_eq(action->task, "start") ||
-            safe_str_eq(action->task, "promote") ||
-            safe_str_eq(action->task, "notify") ||
-            safe_str_eq(action->task, CRM_OP_LRM_REFRESH) ||
-            safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT) ||
-            safe_str_eq(action->task, "delete")) {
-
-            custom_action_order(remote_rsc,
-                generate_op_key(remote_rsc->id, RSC_START, 0),
-                NULL,
-                action->rsc,
-                NULL,
-                action,
-                pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
-                data_set);
+            crm_trace("Nothing required for %s", action->uuid);
 
-        } else if (safe_str_eq(action->task, "demote")) {
+        } else if(action->node->details->remote_rsc->container) {
+            apply_container_ordering(action, data_set);
 
-            /* If the connection is being torn down, we don't want
-             * to build a constraint between a resource's demotion and
-             * the connection resource starting... because the connection
-             * resource can not start. The connection might already be up,
-             * but the START action would not be allowed which in turn would
-             * block the demotion of any resournces living in the remote-node.
-             *
-             * In this case, only build the constraint between the demotion and
-             * the connection's stop action. This allows the connection and all the
-             * resources within the remote-node to be torn down properly. */
-            if (remote_rsc->next_role == RSC_ROLE_STOPPED) {
-                custom_action_order(action->rsc,
-                    NULL,
-                    action,
-                    remote_rsc,
-                    generate_op_key(remote_rsc->id, RSC_STOP, 0),
-                    NULL,
-                    pe_order_preserve | pe_order_implies_first,
-                    data_set);
-            } else if(container == NULL) {
-                custom_action_order(remote_rsc,
-                    generate_op_key(remote_rsc->id, RSC_START, 0),
-                    NULL,
-                    action->rsc,
-                    NULL,
-                    action,
-                    pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
-                    data_set);
-            }
-
-            if(container && is_set(container->flags, pe_rsc_failed)) {
-                /* Just like a stop, the demote is implied by the
-                 * container having failed/stopped
-                 *
-                 * If we really wanted to we would order the demote
-                 * after the stop, IFF the containers current role was
-                 * stopped (otherwise we re-introduce an ordering
-                 * loop)
-                 */
-                pe_set_action_bit(action, pe_action_pseudo);
-            }
-
-        } else if (safe_str_eq(action->task, "stop") &&
-                   container &&
-                   is_set(container->flags, pe_rsc_failed)) {
-
-            /* when the container representing a remote node fails, the stop
-             * action for all the resources living in that container is implied
-             * by the container stopping.  This is similar to how fencing operations
-             * work for cluster nodes. */
-            pe_set_action_bit(action, pe_action_pseudo);
-            custom_action_order(container,
-                generate_op_key(container->id, RSC_STOP, 0),
-                NULL,
-                action->rsc,
-                NULL,
-                action,
-                pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
-                data_set);
-        } else if (safe_str_eq(action->task, "stop")) {
-            gboolean after_start = FALSE;
-
-            /* handle special case with baremetal remote where stop actions need to be
-             * ordered after the connection resource starts somewhere else. */
-            if (is_baremetal_remote_node(action->node)) {
-                node_t *cluster_node = remote_rsc->running_on ? remote_rsc->running_on->data : NULL;
-
-                /* if the current cluster node a baremetal connection resource
-                 * is residing on is unclean or went offline we can't process any
-                 * operations on that remote node until after it starts somewhere else. */
-                if (cluster_node == NULL ||
-                    cluster_node->details->unclean == TRUE ||
-                    cluster_node->details->online == FALSE) {
-                    after_start = TRUE;
-                } else if (g_list_length(remote_rsc->running_on) > 1 &&
-                           remote_rsc->partial_migration_source &&
-                            remote_rsc->partial_migration_target) {
-                    /* if we're caught in the middle of migrating a connection resource,
-                     * then we have to wait until after the resource migrates before performing
-                     * any actions. */
-                    after_start = TRUE;
-                }
-            }
-
-            if (after_start) {
-                custom_action_order(remote_rsc,
-                    generate_op_key(remote_rsc->id, RSC_START, 0),
-                    NULL,
-                    action->rsc,
-                    NULL,
-                    action,
-                    pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
-                    data_set);
-            } else {
-                custom_action_order(action->rsc,
-                    NULL,
-                    action,
-                    remote_rsc,
-                    generate_op_key(remote_rsc->id, RSC_STOP, 0),
-                    NULL,
-                    pe_order_preserve | pe_order_implies_first,
-                    data_set);
-            }
+        } else {
+            apply_remote_ordering(action, data_set);
         }
     }
 }
openSUSE Build Service is sponsored by