File jsc#ECO-1611-0007-Feature-fencer-handle-any-enforced-fencing-delay.patch of Package pacemaker.15718
From 3dec9302e50acc8b4a97b2a3cd9211b4421879c1 Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Wed, 18 Mar 2020 15:48:32 +0100
Subject: [PATCH 7/9] Feature: fencer: handle any enforced fencing delay
Enforced fencing delay takes precedence over any pcmk_delay_base/max
configured for the corresponding fencing resources.
Enforced fencing delay is applied only for the first device in the first
fencing topology level.
Consistently use g_timeout_add_seconds() for pcmk_delay_base/max as
well.
---
daemons/fenced/fenced_commands.c | 71 ++++++++++++++++++-------------
daemons/fenced/fenced_remote.c | 19 +++++++++
daemons/fenced/pacemaker-fenced.h | 4 ++
3 files changed, 64 insertions(+), 30 deletions(-)
Index: pacemaker-2.0.1+20190417.13d370ca9/daemons/fenced/fenced_commands.c
===================================================================
--- pacemaker-2.0.1+20190417.13d370ca9.orig/daemons/fenced/fenced_commands.c
+++ pacemaker-2.0.1+20190417.13d370ca9/daemons/fenced/fenced_commands.c
@@ -80,7 +80,7 @@ typedef struct async_command_s {
int default_timeout; /* seconds */
int timeout; /* seconds */
- int start_delay; /* milliseconds */
+ int start_delay; /* seconds */
int delay_id;
char *op;
@@ -122,7 +122,7 @@ static int
get_action_delay_max(stonith_device_t * device, const char * action)
{
const char *value = NULL;
- int delay_max_ms = 0;
+ int delay_max = 0;
if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
return 0;
@@ -130,17 +130,17 @@ get_action_delay_max(stonith_device_t *
value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX);
if (value) {
- delay_max_ms = crm_get_msec(value);
+ delay_max = crm_parse_interval_spec(value) / 1000;
}
- return delay_max_ms;
+ return delay_max;
}
static int
get_action_delay_base(stonith_device_t * device, const char * action)
{
const char *value = NULL;
- int delay_base_ms = 0;
+ int delay_base = 0;
if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
return 0;
@@ -148,10 +148,10 @@ get_action_delay_base(stonith_device_t *
value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE);
if (value) {
- delay_base_ms = crm_get_msec(value);
+ delay_base = crm_parse_interval_spec(value) / 1000;
}
- return delay_base_ms;
+ return delay_base;
}
/*!
@@ -242,6 +242,9 @@ create_async_command(xmlNode * msg)
crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
cmd->timeout = cmd->default_timeout;
+ // Default value -1 means no enforced fencing delay
+ cmd->start_delay = -1;
+ crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));
cmd->origin = crm_element_value_copy(msg, F_ORIG);
cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
@@ -330,7 +333,7 @@ stonith_device_execute(stonith_device_t
if (pending_op && pending_op->delay_id) {
crm_trace
- ("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %dms",
+ ("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %ds",
pending_op->action, pending_op->victim ? " targeting " : "",
pending_op->victim ? pending_op->victim : "",
device->id, pending_op->start_delay);
@@ -447,6 +450,7 @@ schedule_stonith_command(async_command_t
{
int delay_max = 0;
int delay_base = 0;
+ bool delay_enforced = (cmd->start_delay >= 0);
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(device != NULL, return);
@@ -479,30 +483,37 @@ schedule_stonith_command(async_command_t
device->pending_ops = g_list_append(device->pending_ops, cmd);
mainloop_set_trigger(device->work);
- delay_max = get_action_delay_max(device, cmd->action);
- delay_base = get_action_delay_base(device, cmd->action);
- if (delay_max == 0) {
- delay_max = delay_base;
- }
- if (delay_max < delay_base) {
- crm_warn("Base-delay (%dms) is larger than max-delay (%dms) "
- "for %s on %s - limiting to max-delay",
- delay_base, delay_max, cmd->action, device->id);
- delay_base = delay_max;
- }
- if (delay_max > 0) {
- // coverity[dont_call] We're not using rand() for security
- cmd->start_delay =
- ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
- + delay_base;
- crm_notice("Delaying '%s' action%s%s on %s for %dms (timeout=%ds, base=%dms, "
- "max=%dms)",
- cmd->action,
- cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
- device->id, cmd->start_delay, cmd->timeout,
- delay_base, delay_max);
+ // No enforced fencing delay
+ if (delay_enforced == FALSE) {
+ delay_max = get_action_delay_max(device, cmd->action);
+ delay_base = get_action_delay_base(device, cmd->action);
+ if (delay_max == 0) {
+ delay_max = delay_base;
+ }
+ if (delay_max < delay_base) {
+ crm_warn("Base-delay (%ds) is larger than max-delay (%ds) "
+ "for %s on %s - limiting to max-delay",
+ delay_base, delay_max, cmd->action, device->id);
+ delay_base = delay_max;
+ }
+ if (delay_max > 0) {
+ // coverity[dont_call] We're not using rand() for security
+ cmd->start_delay =
+ ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
+ + delay_base;
+ }
+ }
+
+ if (cmd->start_delay > 0) {
+ crm_notice("Delaying '%s' action%s%s on %s for %s%ds (timeout=%ds, base=%ds, "
+ "max=%ds)",
+ cmd->action,
+ cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
+ device->id, delay_enforced ? "enforced " : "",
+ cmd->start_delay, cmd->timeout,
+ delay_base, delay_max);
cmd->delay_id =
- g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
+ g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
}
}
Index: pacemaker-2.0.1+20190417.13d370ca9/daemons/fenced/fenced_remote.c
===================================================================
--- pacemaker-2.0.1+20190417.13d370ca9.orig/daemons/fenced/fenced_remote.c
+++ pacemaker-2.0.1+20190417.13d370ca9/daemons/fenced/fenced_remote.c
@@ -814,6 +814,11 @@ stonith_topology_next(remote_fencing_op_
op->client_name, op->originator, op->id);
set_op_device_list(op, tp->levels[op->level]);
+ // The enforced delay has been applied for the first fencing level
+ if (op->level > 1 && op->delay > 0) {
+ op->delay = 0;
+ }
+
if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) {
/* A reboot has been requested for a topology level with multiple
* devices. Instead of rebooting the devices sequentially, we will
@@ -969,6 +974,10 @@ create_remote_stonith_op(const char *cli
crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
+ // Default value -1 means no enforced fencing delay
+ op->delay = -1;
+ crm_element_value_int(request, F_STONITH_DELAY, &(op->delay));
+
if (peer && dev) {
op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
} else {
@@ -1416,6 +1425,12 @@ advance_op_topology(remote_fencing_op_t
/* Necessary devices remain, so execute the next one */
crm_trace("Next for %s on behalf of %s@%s (rc was %d)",
op->target, op->originator, op->client_name, rc);
+
+ // The enforced delay has been applied for the first device
+ if (op->delay > 0) {
+ op->delay = 0;
+ }
+
call_remote_stonith(op, NULL);
} else {
/* We're done with all devices and phases, so finalize operation */
@@ -1470,6 +1485,10 @@ call_remote_stonith(remote_fencing_op_t
crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
+ if (op->delay >= 0) {
+ crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);
+ }
+
if (device) {
timeout_one = TIMEOUT_MULTIPLY_FACTOR *
get_device_timeout(op, peer, device);
Index: pacemaker-2.0.1+20190417.13d370ca9/daemons/fenced/pacemaker-fenced.h
===================================================================
--- pacemaker-2.0.1+20190417.13d370ca9.orig/daemons/fenced/pacemaker-fenced.h
+++ pacemaker-2.0.1+20190417.13d370ca9/daemons/fenced/pacemaker-fenced.h
@@ -105,6 +105,10 @@ typedef struct remote_fencing_op_s {
* values associated with the devices this fencing operation may call */
gint total_timeout;
+ /*! Enforced fencing delay.
+ * Default value -1 means no enforced fencing delay. */
+ int delay;
+
/*! Delegate is the node being asked to perform a fencing action
* on behalf of the node that owns the remote operation. Some operations
* will involve multiple delegates. This value represents the final delegate