File bug-938545_pacemaker-fencing-concurrent-device-active-ops.patch of Package pacemaker.9287

commit 951f781b2684c223ada2aab5559b66c2a60040eb
Author: Gao,Yan <ygao@suse.com>
Date:   Fri Jan 22 21:14:47 2016 +0100

    Fix: fencing: Correctly track active stonith actions
    
    When executing a stonith action, stonith_action_async_done() can
    re-invoke internal_stonith_action_execute(). In that case, it will
    introduce a new pid for the action, which can not be tracked by the
    invoker of stonith_action_execute_async().
    
    This commit fixes it by adding stonith_device_t *active_on to
    async_command_t.

Index: pacemaker/fencing/commands.c
===================================================================
--- pacemaker.orig/fencing/commands.c
+++ pacemaker/fencing/commands.c
@@ -109,6 +109,8 @@ typedef struct async_command_s {
     /*! If the operation timed out, this is the last signal
      *  we sent to the process to get it to terminate */
     int last_timeout_signo;
+
+    stonith_device_t *active_on;
 } async_command_t;
 
 static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output,
@@ -248,20 +250,26 @@ get_action_limit(stonith_device_t * devi
     return action_limit;
 }
 
-static void
-device_add_active_pid(stonith_device_t * device, GPid pid)
+static int
+get_active_cmds(stonith_device_t * device)
 {
-    CRM_CHECK(device != NULL, return);
+    int counter = 0;
+    GListPtr gIter = NULL;
+    GListPtr gIterNext = NULL;
 
-    device->active_pids = g_list_append(device->active_pids, GINT_TO_POINTER(pid));
-}
+    CRM_CHECK(device != NULL, return 0);
 
-static void
-device_remove_active_pid(stonith_device_t * device, GPid pid)
-{
-    CRM_CHECK(device != NULL, return);
+    for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
+        async_command_t *cmd = gIter->data;
+
+        gIterNext = gIter->next;
+
+        if (cmd->active_on == device) {
+            counter++;
+        }
+    }
 
-    device->active_pids = g_list_remove(device->active_pids, GINT_TO_POINTER(pid));
+    return counter;
 }
 
 static gboolean
@@ -271,16 +279,16 @@ stonith_device_execute(stonith_device_t
     const char *action_str = NULL;
     async_command_t *cmd = NULL;
     stonith_action_t *action = NULL;
-    guint active_pids = 0;
+    int active_cmds = 0;
     int action_limit = 0;
 
     CRM_CHECK(device != NULL, return FALSE);
 
-    active_pids = g_list_length(device->active_pids);
+    active_cmds = get_active_cmds(device);
     action_limit = get_action_limit(device);
-    if (action_limit > -1 && active_pids >= action_limit) {
-        crm_trace("%s is over its action limit of %d (%u active pid%s)",
-                  device->id, action_limit, active_pids, active_pids > 1 ? "s" : "");
+    if (action_limit > -1 && active_cmds >= action_limit) {
+        crm_trace("%s is over its action limit of %d (%u active action%s)",
+                  device->id, action_limit, active_cmds, active_cmds > 1 ? "s" : "");
         return TRUE;
     }
 
@@ -357,7 +365,7 @@ stonith_device_execute(stonith_device_t
         crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%ds",
                   cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "",
                   device->id, exec_rc, cmd->timeout);
-        device_add_active_pid(device, exec_rc);
+        cmd->active_on = device;
 
     } else {
         crm_warn("Operation %s%s%s on %s failed: %s (%d)",
@@ -448,7 +456,6 @@ free_device(gpointer data)
         free_async_command(cmd);
     }
     g_list_free(device->pending_ops);
-    g_list_free(device->active_pids);
 
     g_list_free_full(device->targets, free);
 
@@ -894,7 +901,6 @@ status_search_cb(GPid pid, int rc, const
         return;
     }
 
-    device_remove_active_pid(dev, pid);
     mainloop_set_trigger(dev->work);
 
     if (rc == 1 /* unkown */ ) {
@@ -931,7 +937,6 @@ dynamic_list_search_cb(GPid pid, int rc,
         return;
     }
 
-    device_remove_active_pid(dev, pid);
     mainloop_set_trigger(dev->work);
 
     /* If we successfully got the targets earlier, don't disable. */
@@ -1627,8 +1632,9 @@ unfence_cb(GPid pid, int rc, const char
 
     log_operation(cmd, rc, pid, NULL, output);
 
+    cmd->active_on = NULL;
+
     if(dev) {
-        device_remove_active_pid(dev, pid);
         mainloop_set_trigger(dev->work);
     } else {
         crm_trace("Device %s does not exist", cmd->device);
@@ -1673,10 +1679,11 @@ st_child_done(GPid pid, int rc, const ch
 
     active_children--;
 
+    cmd->active_on = NULL;
+
     /* The device is ready to do something else now */
     device = g_hash_table_lookup(device_list, cmd->device);
     if (device) {
-        device_remove_active_pid(device, pid);
         if (rc == pcmk_ok &&
             (safe_str_eq(cmd->action, "list") ||
              safe_str_eq(cmd->action, "monitor") || safe_str_eq(cmd->action, "status"))) {
Index: pacemaker/fencing/internal.h
===================================================================
--- pacemaker.orig/fencing/internal.h
+++ pacemaker/fencing/internal.h
@@ -33,7 +33,6 @@ typedef struct stonith_device_s {
     /* should nodeid parameter for victim be included in agent arguments */
     gboolean include_nodeid;
     guint priority;
-    GListPtr active_pids;
 
     enum st_device_flags flags;
 
openSUSE Build Service is sponsored by