File pacemaker-pengine-dont-clear-same-fail-count-twice.patch of Package pacemaker.14737
commit 357cd703e99fbcf1a371f34966accaf5322b1c50
Author: Ken Gaillot <kgaillot@redhat.com>
Date: Wed Feb 22 14:14:48 2017 -0600
Fix: pengine,libpe_status: don't clear same fail-count twice
Previously, pengine and libpe_status were inconsistent when generating
a key to use for a fail-count op. This could lead to two identical ops
being scheduled, one of which would timeout (during which time the resource
would not be recovered). Now, they consistently use generate_op_key().
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 67372739e..a35764392 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -2934,10 +2934,10 @@ static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNod
}
if (clear_failcount) {
- action_t *clear_op = NULL;
+ char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
+ action_t *clear_op = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT,
+ node, FALSE, TRUE, data_set);
- clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'),
- CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
}
diff --git a/pengine/allocate.c b/pengine/allocate.c
index 74b57fbb1..4e8d68df8 100644
--- a/pengine/allocate.c
+++ b/pengine/allocate.c
@@ -596,7 +596,7 @@ static gboolean
failcount_clear_action_exists(node_t * node, resource_t * rsc)
{
gboolean rc = FALSE;
- char *key = crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_');
+ char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
GListPtr list = find_actions_exact(rsc->actions, key, node);
if (list) {
@@ -1195,10 +1195,9 @@ cleanup_orphans(resource_t * rsc, pe_working_set_t * data_set)
node_t *node = (node_t *) gIter->data;
if (node->details->online && get_failcount(node, rsc, NULL, data_set)) {
- action_t *clear_op = NULL;
-
- clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'),
- CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
+ char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
+ action_t *clear_op = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT,
+ node, FALSE, TRUE, data_set);
add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
pe_rsc_info(rsc, "Clearing failcount (%d) for orphaned resource %s on %s (%s)",