File pacemaker-pengine-pseudo-fence-guest-node-recovery.patch of Package pacemaker.14737
commit beab7718e14a54f1b50d7c5ff4b0086e09332da3
Author: Ken Gaillot <kgaillot@redhat.com>
Date: Fri Apr 15 13:10:17 2016 -0500
Fix: pengine: create a pseudo-fence for guest node recovery
If a guest node needs to be recovered, the PE would previously order actions
in relation to the stop action for the guest's container resource, if one
was scheduled.
This had problems: for implied stops due to fencing the guest's host, there
would be no stop action, so no ordering could be done; ordering in relation to
the stop action made stonith_constraints() mistakenly assume that the host node
(the node for the stop action) was the fence target, and thus mistakenly mark
the wrong stops/demotes as implied; and, clone notifications for fence events
would not get called for guest node recoveries, whether explicit or implied.
Now, a fence pseudo-event is created for guest node recovery, regardless of
whether there is an explicit stop action scheduled for the container. This
addresses all those issues, and will allow the crmd to be able to detect
implied stops.
This also allows us to simplify the implied stop/demote detection, since we
will check the pseudo-op for implied actions -- we don't need to check the
real fence op for implied actions on guest nodes.
Index: pacemaker/crmd/te_utils.c
===================================================================
--- pacemaker.orig/crmd/te_utils.c
+++ pacemaker/crmd/te_utils.c
@@ -331,6 +331,14 @@ tengine_stonith_notify(stonith_t * st, s
/* The DC always sends updates */
send_stonith_update(NULL, st_event->target, uuid);
+ /* @TODO Ideally, at this point, we'd check whether the fenced node
+ * hosted any guest nodes, and call remote_node_down() for them.
+ * Unfortunately, the crmd doesn't have a simple, reliable way to
+ * map hosts to guests. It might be possible to track this in the
+ * peer cache via crm_remote_peer_cache_refresh(). For now, we rely
+ * on the PE creating fence pseudo-events for the guests.
+ */
+
if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {
/* Abort the current transition graph if it wasn't us
Index: pacemaker/pengine/allocate.c
===================================================================
--- pacemaker.orig/pengine/allocate.c
+++ pacemaker/pengine/allocate.c
@@ -1337,6 +1337,69 @@ any_managed_resources(pe_working_set_t *
return FALSE;
}
+/*!
+ * \internal
+ * \brief Create pseudo-op for guest node fence, and order relative to it
+ *
+ * \param[in] node Guest node to fence
+ * \param[in] done STONITH_DONE operation
+ * \param[in] data_set Working set of CIB state
+ */
+static void
+fence_guest(pe_node_t *node, pe_action_t *done, pe_working_set_t *data_set)
+{
+ resource_t *container = node->details->remote_rsc->container;
+ pe_action_t *stop = NULL;
+ pe_action_t *stonith_op = NULL;
+
+ /* The fence action is just a label; we don't do anything differently for
+ * off vs. reboot. We specify it explicitly, rather than let it default to
+ * cluster's default action, because we are not _initiating_ fencing -- we
+ * are creating a pseudo-event to describe fencing that is already occurring
+ * by other means (container recovery).
+ */
+ const char *fence_action = "off";
+
+ /* Check whether guest's container resource is has any explicit stop or
+ * start (the stop may be implied by fencing of the guest's host).
+ */
+ if (container) {
+ stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL);
+
+ if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) {
+ fence_action = "reboot";
+ }
+ }
+
+ /* Create a fence pseudo-event, so we have an event to order actions
+ * against, and crmd can always detect it.
+ */
+ stonith_op = pe_fence_op(node, fence_action, FALSE, data_set);
+ update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable);
+
+ /* We want to imply stops/demotes after the guest is stopped, not wait until
+ * it is restarted, so we always order pseudo-fencing after stop, not start
+ * (even though start might be closer to what is done for a real reboot).
+ */
+ if (stop) {
+ order_actions(stop, stonith_op,
+ pe_order_runnable_left|pe_order_implies_then);
+ crm_info("Implying guest node %s is down (action %d) "
+ "after container %s is stopped (action %d)",
+ node->details->uname, stonith_op->id,
+ container->id, stop->id);
+ } else {
+ crm_info("Implying guest node %s is down (action %d) ",
+ node->details->uname, stonith_op->id);
+ }
+
+ /* @TODO: Order pseudo-fence after any (optional) fence of guest's host */
+
+ /* Order/imply other actions relative to pseudo-fence as with real fence */
+ stonith_constraints(node, stonith_op, data_set);
+ order_actions(stonith_op, done, pe_order_implies_then);
+}
+
/*
* Create dependencies for stonith and shutdown operations
*/
@@ -1365,21 +1429,12 @@ stage6(pe_working_set_t * data_set)
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
- /* remote-nodes associated with a container resource (such as a vm) are not fenced */
+ /* Guest nodes are "fenced" by recovering their container resource,
+ * so handle them separately.
+ */
if (is_container_remote_node(node)) {
if (node->details->remote_requires_reset && need_stonith) {
- resource_t *container = node->details->remote_rsc->container;
- char *key = stop_key(container);
- GListPtr stop_list = find_actions(container->actions, key, NULL);
-
- crm_info("Implying node %s is down when container %s is stopped (%p)",
- node->details->uname, container->id, stop_list);
- if(stop_list) {
- stonith_constraints(node, stop_list->data, data_set);
- }
-
- g_list_free(stop_list);
- free(key);
+ fence_guest(node, done, data_set);
}
continue;
}
Index: pacemaker/pengine/graph.c
===================================================================
--- pacemaker.orig/pengine/graph.c
+++ pacemaker/pengine/graph.c
@@ -713,13 +713,7 @@ stonith_constraints(node_t * node, actio
CRM_CHECK(stonith_op != NULL, return FALSE);
for (r = data_set->resources; r != NULL; r = r->next) {
- resource_t *rsc = (resource_t *) r->data;
-
- if ((stonith_op->rsc == NULL)
- || ((stonith_op->rsc != rsc) && (stonith_op->rsc != rsc->container))) {
-
- rsc_stonith_ordering(rsc, stonith_op, data_set);
- }
+ rsc_stonith_ordering((resource_t *) r->data, stonith_op, data_set);
}
return TRUE;
}
@@ -886,7 +880,11 @@ action2xml(action_t * action, gboolean a
}
if (safe_str_eq(action->task, CRM_OP_FENCE)) {
- action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
+ /* All fences need node info; guest node fences are pseudo-events */
+ action_xml = create_xml_node(NULL,
+ is_set(action->flags, pe_action_pseudo)?
+ XML_GRAPH_TAG_PSEUDO_EVENT :
+ XML_GRAPH_TAG_CRM_EVENT);
} else if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
Index: pacemaker/pengine/native.c
===================================================================
--- pacemaker.orig/pengine/native.c
+++ pacemaker/pengine/native.c
@@ -2903,48 +2903,6 @@ native_start_constraints(resource_t * rs
}
}
-/* User data to pass to guest node iterator */
-struct action_list_s {
- GListPtr search_list; /* list of actions to search */
- GListPtr result_list; /* list of matching actions for this node */
- const char *key; /* action key to match */
-};
-
-/*!
- * \internal
- * \brief Prepend a node's actions matching a key to a list
- *
- * \param[in] node Guest node
- * \param[in/out] data User data
- */
-static void prepend_node_actions(const node_t *node, void *data)
-{
- GListPtr actions;
- struct action_list_s *info = (struct action_list_s *) data;
-
- actions = find_actions(info->search_list, info->key, node);
- info->result_list = g_list_concat(actions, info->result_list);
-}
-
-static GListPtr
-find_fence_target_node_actions(GListPtr search_list, const char *key, node_t *fence_target, pe_working_set_t *data_set)
-{
- struct action_list_s action_list;
-
- /* Actions on the target that match the key are implied by the fencing */
- action_list.search_list = search_list;
- action_list.result_list = find_actions(search_list, key, fence_target);
- action_list.key = key;
-
- /*
- * If the target is a host for any guest nodes, actions on those nodes
- * that match the key are also implied by the fencing.
- */
- pe_foreach_guest_node(data_set, fence_target, prepend_node_actions, &action_list);
-
- return action_list.result_list;
-}
-
static void
native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set)
{
@@ -2964,8 +2922,7 @@ native_stop_constraints(resource_t * rsc
/* Get a list of stop actions potentially implied by the fencing */
key = stop_key(rsc);
- action_list = find_fence_target_node_actions(rsc->actions, key, target,
- data_set);
+ action_list = find_actions(rsc->actions, key, target);
free(key);
for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
@@ -3071,8 +3028,7 @@ native_stop_constraints(resource_t * rsc
/* Get a list of demote actions potentially implied by the fencing */
key = demote_key(rsc);
- action_list = find_fence_target_node_actions(rsc->actions, key, target,
- data_set);
+ action_list = find_actions(rsc->actions, key, target);
free(key);
for (gIter = action_list; gIter != NULL; gIter = gIter->next) {