File bug-938545_pacemaker-fencing-concurrent-device-active-ops.patch of Package pacemaker.1630
commit 951f781b2684c223ada2aab5559b66c2a60040eb
Author: Gao,Yan <ygao@suse.com>
Date: Fri Jan 22 21:14:47 2016 +0100
Fix: fencing: Correctly track active stonith actions
When executing a stonith action, stonith_action_async_done() can
re-invoke internal_stonith_action_execute(). In that case, it will
introduce a new pid for the action, which can not be tracked by the
invoker of stonith_action_execute_async().
This commit fixes it by adding stonith_device_t *active_on to
async_command_t.
diff --git a/fencing/commands.c b/fencing/commands.c
index 7016809..2ea223d 100644
--- a/fencing/commands.c
+++ b/fencing/commands.c
@@ -116,6 +116,8 @@ typedef struct async_command_s {
/*! If the operation timed out, this is the last signal
* we sent to the process to get it to terminate */
int last_timeout_signo;
+
+ stonith_device_t *active_on;
} async_command_t;
static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output,
@@ -270,20 +272,26 @@ get_action_limit(stonith_device_t * device)
return action_limit;
}
-static void
-device_add_active_pid(stonith_device_t * device, GPid pid)
+static int
+get_active_cmds(stonith_device_t * device)
{
- CRM_CHECK(device != NULL, return);
+ int counter = 0;
+ GListPtr gIter = NULL;
+ GListPtr gIterNext = NULL;
- device->active_pids = g_list_append(device->active_pids, GINT_TO_POINTER(pid));
-}
+ CRM_CHECK(device != NULL, return 0);
-static void
-device_remove_active_pid(stonith_device_t * device, GPid pid)
-{
- CRM_CHECK(device != NULL, return);
+ for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
+ async_command_t *cmd = gIter->data;
- device->active_pids = g_list_remove(device->active_pids, GINT_TO_POINTER(pid));
+ gIterNext = gIter->next;
+
+ if (cmd->active_on == device) {
+ counter++;
+ }
+ }
+
+ return counter;
}
static gboolean
@@ -293,16 +301,16 @@ stonith_device_execute(stonith_device_t * device)
const char *action_str = NULL;
async_command_t *cmd = NULL;
stonith_action_t *action = NULL;
- guint active_pids = 0;
+ int active_cmds = 0;
int action_limit = 0;
CRM_CHECK(device != NULL, return FALSE);
- active_pids = g_list_length(device->active_pids);
+ active_cmds = get_active_cmds(device);
action_limit = get_action_limit(device);
- if (action_limit > -1 && active_pids >= action_limit) {
- crm_trace("%s is over its action limit of %d (%u active pid%s)",
- device->id, action_limit, active_pids, active_pids > 1 ? "s" : "");
+ if (action_limit > -1 && active_cmds >= action_limit) {
+ crm_trace("%s is over its action limit of %d (%u active action%s)",
+ device->id, action_limit, active_cmds, active_cmds > 1 ? "s" : "");
return TRUE;
}
@@ -379,7 +387,7 @@ stonith_device_execute(stonith_device_t * device)
crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%ds",
cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "",
device->id, exec_rc, cmd->timeout);
- device_add_active_pid(device, exec_rc);
+ cmd->active_on = device;
} else {
crm_warn("Operation %s%s%s on %s failed: %s (%d)",
@@ -470,7 +478,6 @@ free_device(gpointer data)
free_async_command(cmd);
}
g_list_free(device->pending_ops);
- g_list_free(device->active_pids);
g_list_free_full(device->targets, free);
@@ -913,7 +920,6 @@ status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
return;
}
- device_remove_active_pid(dev, pid);
mainloop_set_trigger(dev->work);
if (rc == 1 /* unknown */ ) {
@@ -950,7 +956,6 @@ dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
return;
}
- device_remove_active_pid(dev, pid);
mainloop_set_trigger(dev->work);
/* If we successfully got the targets earlier, don't disable. */
@@ -1925,8 +1930,9 @@ unfence_cb(GPid pid, int rc, const char *output, gpointer user_data)
log_operation(cmd, rc, pid, NULL, output);
+ cmd->active_on = NULL;
+
if(dev) {
- device_remove_active_pid(dev, pid);
mainloop_set_trigger(dev->work);
} else {
crm_trace("Device %s does not exist", cmd->device);
@@ -1968,10 +1974,11 @@ st_child_done(GPid pid, int rc, const char *output, gpointer user_data)
CRM_CHECK(cmd != NULL, return);
+ cmd->active_on = NULL;
+
/* The device is ready to do something else now */
device = g_hash_table_lookup(device_list, cmd->device);
if (device) {
- device_remove_active_pid(device, pid);
if (rc == pcmk_ok &&
(safe_str_eq(cmd->action, "list") ||
safe_str_eq(cmd->action, "monitor") || safe_str_eq(cmd->action, "status"))) {
diff --git a/fencing/internal.h b/fencing/internal.h
index 1bd8e37..5538a3a 100644
--- a/fencing/internal.h
+++ b/fencing/internal.h
@@ -34,7 +34,6 @@ typedef struct stonith_device_s {
/* whether the cluster should automatically unfence nodes with the device */
gboolean automatic_unfencing;
guint priority;
- GListPtr active_pids;
enum st_device_flags flags;