File pacemaker-pengine-properly-cancel-recurring-monitors.patch of Package pacemaker.19778
commit 81f7635571e67c413e1a4977fddec3f394e5478f
Author: Ken Gaillot <kgaillot@redhat.com>
Date: Wed Mar 7 15:40:50 2018 -0600
Fix: pengine: properly cancel recurring monitors
Previously, RecurringOp() and RecurringOp_Stopped() would create a cancel op,
and set the action interval meta-attribute to the configured interval
specification. However, it is interpreted as milliseconds.
This refactors a new pe_cancel_op() function to reduce code duplication across
those functions and CancelXmlOp(), and fix the issue.
Index: pacemaker-1.1.18+20180406.19c7be5c7/pengine/allocate.c
===================================================================
--- pacemaker-1.1.18+20180406.19c7be5c7.orig/pengine/allocate.c
+++ pacemaker-1.1.18+20180406.19c7be5c7/pengine/allocate.c
@@ -222,7 +222,6 @@ CancelXmlOp(resource_t * rsc, xmlNode *
int interval = 0;
action_t *cancel = NULL;
- char *key = NULL;
const char *task = NULL;
const char *call_id = NULL;
const char *interval_s = NULL;
@@ -236,27 +235,13 @@ CancelXmlOp(resource_t * rsc, xmlNode *
interval = crm_parse_int(interval_s, "0");
- /* we need to reconstruct the key because of the way we used to construct resource IDs */
- key = generate_op_key(rsc->id, task, interval);
+ crm_info("Action %s_%s_%u on %s will be stopped: %s",
+ rsc->id, task, interval,
+ active_node->details->uname, (reason? reason : "unknown"));
- crm_info("Action %s on %s will be stopped: %s",
- key, active_node->details->uname, reason ? reason : "unknown");
-
- /* TODO: This looks highly dangerous if we ever try to schedule 'key' too */
- cancel = custom_action(rsc, strdup(key), RSC_CANCEL, active_node, FALSE, TRUE, data_set);
-
- free(cancel->task);
- free(cancel->cancel_task);
- cancel->task = strdup(RSC_CANCEL);
- cancel->cancel_task = strdup(task);
-
- add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task);
+ cancel = pe_cancel_op(rsc, task, interval, active_node, data_set);
add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
- add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s);
-
custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set);
- free(key);
- key = NULL;
}
static gboolean
Index: pacemaker-1.1.18+20180406.19c7be5c7/pengine/native.c
===================================================================
--- pacemaker-1.1.18+20180406.19c7be5c7.orig/pengine/native.c
+++ pacemaker-1.1.18+20180406.19c7be5c7/pengine/native.c
@@ -736,47 +736,36 @@ RecurringOp(resource_t * rsc, action_t *
const char *result = "Ignoring";
if (is_optional) {
- char *local_key = strdup(key);
+ char *after_key = NULL;
+ action_t *cancel_op = NULL;
+ // It's running, so cancel it
log_level = LOG_INFO;
result = "Cancelling";
- /* it's running : cancel it */
-
- mon = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set);
-
- free(mon->task);
- free(mon->cancel_task);
- mon->task = strdup(RSC_CANCEL);
- mon->cancel_task = strdup(name);
- add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval);
- add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name);
-
- local_key = NULL;
+ cancel_op = pe_cancel_op(rsc, name, interval_ms, node, data_set);
switch (rsc->role) {
case RSC_ROLE_SLAVE:
case RSC_ROLE_STARTED:
if (rsc->next_role == RSC_ROLE_MASTER) {
- local_key = promote_key(rsc);
+ after_key = promote_key(rsc);
} else if (rsc->next_role == RSC_ROLE_STOPPED) {
- local_key = stop_key(rsc);
+ after_key = stop_key(rsc);
}
break;
case RSC_ROLE_MASTER:
- local_key = demote_key(rsc);
+ after_key = demote_key(rsc);
break;
default:
break;
}
- if (local_key) {
- custom_action_order(rsc, NULL, mon, rsc, local_key, NULL,
+ if (after_key) {
+ custom_action_order(rsc, NULL, cancel_op, rsc, after_key, NULL,
pe_order_runnable_left, data_set);
}
-
- mon = NULL;
}
do_crm_log(log_level, "%s action %s (%s vs. %s)",
@@ -915,20 +904,10 @@ RecurringOp_Stopped(resource_t * rsc, ac
possible_matches = find_actions_exact(rsc->actions, key, node);
if (possible_matches) {
action_t *cancel_op = NULL;
- char *local_key = strdup(key);
g_list_free(possible_matches);
- cancel_op = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set);
-
- free(cancel_op->task);
- free(cancel_op->cancel_task);
- cancel_op->task = strdup(RSC_CANCEL);
- cancel_op->cancel_task = strdup(name);
- add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL, interval);
- add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, name);
-
- local_key = NULL;
+ cancel_op = pe_cancel_op(rsc, name, interval_ms, node, data_set);
if (rsc->next_role == RSC_ROLE_STARTED || rsc->next_role == RSC_ROLE_SLAVE) {
/* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */
Index: pacemaker-1.1.18+20180406.19c7be5c7/pengine/utils.c
===================================================================
--- pacemaker-1.1.18+20180406.19c7be5c7.orig/pengine/utils.c
+++ pacemaker-1.1.18+20180406.19c7be5c7/pengine/utils.c
@@ -440,3 +440,41 @@ create_pseudo_resource_op(resource_t * r
}
return action;
}
+
+/*!
+ * \internal
+ * \brief Create a LRMD cancel op
+ *
+ * \param[in] rsc Resource of action to cancel
+ * \param[in] task Name of action to cancel
+ * \param[in] interval_ms Interval of action to cancel
+ * \param[in] node Node of action to cancel
+ * \param[in] data_set Working set of cluster
+ *
+ * \return Created op
+ */
+pe_action_t *
+pe_cancel_op(resource_t *rsc, const char *task, guint interval_ms,
+ pe_node_t *node, pe_working_set_t *data_set)
+{
+ pe_action_t *cancel_op;
+ char *interval_ms_s = crm_strdup_printf("%u", interval_ms);
+
+ // @TODO dangerous if possible to schedule another action with this key
+ char *key = generate_op_key(rsc->id, task, interval_ms);
+
+ cancel_op = custom_action(rsc, key, RSC_CANCEL, node, FALSE, TRUE,
+ data_set);
+
+ free(cancel_op->task);
+ cancel_op->task = strdup(RSC_CANCEL);
+
+ free(cancel_op->cancel_task);
+ cancel_op->cancel_task = strdup(task);
+
+ add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, task);
+ add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL, interval_ms_s);
+ free(interval_ms_s);
+
+ return cancel_op;
+}
Index: pacemaker-1.1.18+20180406.19c7be5c7/pengine/utils.h
===================================================================
--- pacemaker-1.1.18+20180406.19c7be5c7.orig/pengine/utils.h
+++ pacemaker-1.1.18+20180406.19c7be5c7/pengine/utils.h
@@ -76,6 +76,9 @@ extern void calculate_utilization(GHashT
extern void process_utilization(resource_t * rsc, node_t ** prefer, pe_working_set_t * data_set);
pe_action_t *create_pseudo_resource_op(resource_t * rsc, const char *task, bool optional, bool runnable, pe_working_set_t *data_set);
+pe_action_t *pe_cancel_op(resource_t *rsc, const char *name,
+ guint interval_ms, pe_node_t *node,
+ pe_working_set_t *data_set);
# define STONITH_UP "stonith_up"
# define STONITH_DONE "stonith_complete"