File pacemaker-pengine-transition-loop-start-then-stop-unfencing.patch of Package pacemaker.3577

commit 48622e7462f8a9bbb94d9cc925133f3afaa52629
Author: Ken Gaillot <kgaillot@redhat.com>
Date:   Mon Jul 18 16:25:56 2016 -0500

    Fix: pengine: avoid transition loop for start-then-stop + unfencing
    
    Partial fix

diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h
index 94aa832..c376c73 100644
--- a/include/crm/pengine/status.h
+++ b/include/crm/pengine/status.h
@@ -206,7 +206,7 @@ struct node_s {
 #  define pe_rsc_needs_quorum	 0x10000000ULL
 #  define pe_rsc_needs_fencing	 0x20000000ULL
 #  define pe_rsc_needs_unfencing 0x40000000ULL
-#  define pe_rsc_have_unfencing  0x80000000ULL
+#  define pe_rsc_have_unfencing  0x80000000ULL /* obsolete (not set or used by cluster) */
 
 enum pe_graph_flags {
     pe_graph_none = 0x00000,
diff --git a/pengine/native.c b/pengine/native.c
index 9f659ef..9d9a2da 100644
--- a/pengine/native.c
+++ b/pengine/native.c
@@ -1342,30 +1342,41 @@ native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set)
 
     if (is_stonith == FALSE
         && is_set(data_set->flags, pe_flag_enable_unfencing)
-        && is_set(rsc->flags, pe_rsc_needs_unfencing)
-        && is_not_set(rsc->flags, pe_rsc_have_unfencing)) {
+        && is_set(rsc->flags, pe_rsc_needs_unfencing)) {
         /* Check if the node needs to be unfenced first */
         node_t *node = NULL;
         GHashTableIter iter;
 
-        if(rsc != top) {
-            /* Only create these constraints once, rsc is almost certainly cloned */
-            set_bit_recursive(top, pe_rsc_have_unfencing);
-        }
-
         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
             action_t *unfence = pe_fence_op(node, "on", TRUE, data_set);
 
-            custom_action_order(top, generate_op_key(top->id, top == rsc?RSC_STOP:RSC_STOPPED, 0), NULL,
-                                NULL, strdup(unfence->uuid), unfence,
-                                pe_order_optional, data_set);
+            crm_debug("Ordering any stops of %s before %s, and any starts after",
+                      rsc->id, unfence->uuid);
 
-            crm_debug("Stopping %s prior to unfencing %s", top->id, unfence->uuid);
+            /*
+             * It would be more efficient to order clone resources once,
+             * rather than order each instance, but ordering the instance
+             * allows us to avoid unnecessary dependencies that might conflict
+             * with user constraints.
+             *
+             * @TODO: This constraint can still produce a transition loop if the
+             * resource has a stop scheduled on the node being unfenced, and
+             * there is a user ordering constraint to start some other resource
+             * (which will be ordered after the unfence) before stopping this
+             * resource. An example is "start some slow-starting cloned service
+             * before stopping an associated virtual IP that may be moving to
+             * it":
+             *       stop this -> unfencing -> start that -> stop this
+             */
+            custom_action_order(rsc, stop_key(rsc), NULL,
+                                NULL, strdup(unfence->uuid), unfence,
+                                pe_order_optional|pe_order_same_node, data_set);
 
             custom_action_order(NULL, strdup(unfence->uuid), unfence,
-                                top, generate_op_key(top->id, RSC_START, 0), NULL,
-                                pe_order_implies_then_on_node, data_set);
+                                rsc, start_key(rsc), NULL,
+                                pe_order_implies_then_on_node|pe_order_same_node,
+                                data_set);
         }
     }
 
openSUSE Build Service is sponsored by