File pacemaker-crmd-validate-CIB-diffs-better.patch of Package pacemaker.openSUSE_Leap_42.3_Update

commit 8c286edcef4e2b1dcaf4c2560574324903fa18ef
Author: Ken Gaillot <kgaillot@redhat.com>
Date:   Mon Feb 26 11:44:19 2018 -0600

    Low: crmd: validate CIB diffs better
    
    This contains refactoring and logging changes to make the diff processing code
    more readable, validate CIB diffs better, and log diff issues consistently.

diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c
index fd75368cb..976e57e62 100644
--- a/crmd/te_callbacks.c
+++ b/crmd/te_callbacks.c
@@ -66,7 +66,7 @@ update_stonith_max_attempts(const char* value)
     }
 }
 static void
-te_legacy_update_diff(const char *event, xmlNode * diff)
+te_update_diff_v1(const char *event, xmlNode *diff)
 {
     int lpc, max;
     xmlXPathObject *xpathObj = NULL;
@@ -144,46 +144,40 @@ te_legacy_update_diff(const char *event, xmlNode * diff)
     freeXpathObject(xpathObj);
 
     /*
-     * Check for and fast-track the processing of LRM refreshes
-     * In large clusters this can result in _huge_ speedups
+     * Updates by, or in response to, TE actions will never contain updates
+     * for more than one resource at a time, so such updates indicate an
+     * LRM refresh.
      *
-     * Unfortunately we can only do so when there are no pending actions
-     * Otherwise we could miss updates we're waiting for and stall
+     * In that case, start a new transition rather than check each result
+     * individually, which can result in _huge_ speedups in large clusters.
      *
+     * Unfortunately, we can only do so when there are no pending actions.
+     * Otherwise, we could mistakenly throw away those results here, and
+     * the cluster will stall waiting for them and time out the operation.
      */
-    xpathObj = NULL;
     if (transition_graph->pending == 0) {
-        xpathObj =
-            xpath_search(diff,
-                         "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//"
-                         XML_LRM_TAG_RESOURCE);
-    }
-
-    max = numXpathResults(xpathObj);
-    if (max > 1) {
-        /* Updates by, or in response to, TE actions will never contain updates
-         * for more than one resource at a time
-         */
-        crm_debug("Detected LRM refresh - %d resources updated: Skipping all resource events", max);
-        crm_log_xml_trace(diff, "lrm-refresh");
-        abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL);
-        goto bail;
+        xpathObj = xpath_search(diff,
+                                "//" F_CIB_UPDATE_RESULT
+                                "//" XML_TAG_DIFF_ADDED
+                                "//" XML_LRM_TAG_RESOURCE);
+        max = numXpathResults(xpathObj);
+        if (max > 1) {
+            crm_debug("Ignoring resource operation updates due to LRM refresh of %d resources",
+                      max);
+            crm_log_xml_trace(diff, "lrm-refresh");
+            abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL);
+            goto bail;
+        }
+        freeXpathObject(xpathObj);
     }
-    freeXpathObject(xpathObj);
 
     /* Process operation updates */
     xpathObj =
         xpath_search(diff,
                      "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP);
-    if (numXpathResults(xpathObj)) {
-/*
-    <status>
-       <node_state id="node1" state=CRMD_JOINSTATE_MEMBER exp_state="active">
-          <lrm>
-             <lrm_resources>
-        	<rsc_state id="" rsc_id="rsc4" node_id="node1" rsc_state="stopped"/>
-*/
-        int lpc = 0, max = numXpathResults(xpathObj);
+    max = numXpathResults(xpathObj);
+    if (max > 0) {
+        int lpc = 0;
 
         for (lpc = 0; lpc < max; lpc++) {
             xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
@@ -241,38 +235,50 @@ te_legacy_update_diff(const char *event, xmlNode * diff)
     freeXpathObject(xpathObj);
 }
 
-static void process_resource_updates(
-    const char *node, xmlNode *xml, xmlNode *change, const char *op, const char *xpath) 
+static void
+process_lrm_resource_diff(xmlNode *lrm_resource, const char *node)
+{
+    for (xmlNode *rsc_op = __xml_first_child(lrm_resource); rsc_op != NULL;
+         rsc_op = __xml_next(rsc_op)) {
+        process_graph_event(rsc_op, node);
+    }
+}
+
+static void
+process_resource_updates(const char *node, xmlNode *xml, xmlNode *change,
+                         const char *op, const char *xpath)
 {
     xmlNode *cIter = NULL;
     xmlNode *rsc = NULL;
-    xmlNode *rsc_op = NULL;
     int num_resources = 0;
 
-    if(xml == NULL) {
+    if (xml == NULL) {
         return;
 
-    } else if(strcmp((const char*)xml->name, XML_CIB_TAG_LRM) == 0) {
+    } else if (strcmp((const char*)xml->name, XML_CIB_TAG_LRM) == 0) {
         xml = first_named_child(xml, XML_LRM_TAG_RESOURCES);
         crm_trace("Got %p in %s", xml, XML_CIB_TAG_LRM);
     }
 
     CRM_ASSERT(strcmp((const char*)xml->name, XML_LRM_TAG_RESOURCES) == 0);
 
-    for(cIter = xml->children; cIter; cIter = cIter->next) {
+    for (cIter = xml->children; cIter; cIter = cIter->next) {
         num_resources++;
     }
 
-    if(num_resources > 1) {
+    if (num_resources > 1) {
         /*
-         * Check for and fast-track the processing of LRM refreshes
-         * In large clusters this can result in _huge_ speedups
+         * Updates by, or in response to, TE actions will never contain updates
+         * for more than one resource at a time, so such updates indicate an
+         * LRM refresh.
          *
-         * Unfortunately we can only do so when there are no pending actions
-         * Otherwise we could miss updates we're waiting for and stall
+         * In that case, start a new transition rather than check each result
+         * individually, which can result in _huge_ speedups in large clusters.
          *
+         * Unfortunately, we can only do so when there are no pending actions.
+         * Otherwise, we could mistakenly throw away those results here, and
+         * the cluster will stall waiting for them and time out the operation.
          */
-
         crm_debug("Detected LRM refresh - %d resources updated", num_resources);
         crm_log_xml_trace(change, "lrm-refresh");
         abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL);
@@ -281,10 +287,7 @@ static void process_resource_updates(
 
     for (rsc = __xml_first_child(xml); rsc != NULL; rsc = __xml_next(rsc)) {
         crm_trace("Processing %s", ID(rsc));
-        for (rsc_op = __xml_first_child(rsc); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
-            crm_trace("Processing %s", ID(rsc_op));
-            process_graph_event(rsc_op, node);
-        }
+        process_lrm_resource_diff(rsc, node);
     }
 }
 
@@ -361,84 +364,142 @@ abort_unless_down(const char *xpath, const char *op, xmlNode *change,
     free(node_uuid);
 }
 
-void
-te_update_diff(const char *event, xmlNode * msg)
+static void
+process_op_deletion(const char *xpath, xmlNode *change)
 {
-    int rc = -EINVAL;
-    int format = 1;
-    xmlNode *change = NULL;
-    const char *op = NULL;
-
-    xmlNode *diff = NULL;
+    char *mutable_key = strdup(xpath);
+    char *key;
+    char *node_uuid;
+    crm_action_t *cancel = NULL;
+
+    // Extract the part of xpath between last pair of single quotes
+    key = strrchr(mutable_key, '\'');
+    if (key != NULL) {
+        *key = '\0';
+        key = strrchr(mutable_key, '\'');
+    }
+    if (key == NULL) {
+        crm_warn("Ignoring malformed CIB update (resource deletion of %s)",
+                 xpath);
+        free(mutable_key);
+        return;
+    }
+    ++key;
 
-    int p_add[] = { 0, 0, 0 };
-    int p_del[] = { 0, 0, 0 };
+    node_uuid = extract_node_uuid(xpath);
+    cancel = get_cancel_action(key, node_uuid);
+    if (cancel) {
+        crm_info("Cancellation of %s on %s confirmed (%d)",
+                 key, node_uuid, cancel->id);
+        stop_te_timer(cancel->timer);
+        te_action_confirmed(cancel);
+        update_graph(transition_graph, cancel);
+        trigger_graph();
+    } else {
+        abort_transition(INFINITY, tg_restart, "Resource operation removal",
+                         change);
+    }
+    free(mutable_key);
+    free(node_uuid);
+}
 
-    CRM_CHECK(msg != NULL, return);
-    crm_element_value_int(msg, F_CIB_RC, &rc);
+static void
+process_delete_diff(const char *xpath, const char *op, xmlNode *change)
+{
+    if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) {
+        process_op_deletion(xpath, change);
 
-    if (transition_graph == NULL) {
-        crm_trace("No graph");
-        return;
+    } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) {
+        abort_unless_down(xpath, op, change, "Resource state removal");
 
-    } else if (rc < pcmk_ok) {
-        crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
-        return;
+    } else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) {
+        abort_unless_down(xpath, op, change, "Node state removal");
 
-    } else if (transition_graph->complete == TRUE
-               && fsa_state != S_IDLE
-               && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) {
-        crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state),
-                  transition_graph->complete);
-        return;
+    } else {
+        crm_trace("Ignoring delete of %s", xpath);
     }
+}
 
-    op = crm_element_value(msg, F_CIB_OPERATION);
-    diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+static void
+process_node_state_diff(xmlNode *state, xmlNode *change, const char *op,
+                        const char *xpath)
+{
+    xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);
 
-    xml_patch_versions(diff, p_add, p_del);
-    crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
-              p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
-              fsa_state2string(fsa_state));
+    process_resource_updates(ID(state), lrm, change, op, xpath);
+}
 
-    crm_element_value_int(diff, "format", &format);
-    switch(format) {
-        case 1:
-            te_legacy_update_diff(event, diff);
-            return;
-        case 2:
-            /* Cool, we know what to do here */
-            crm_log_xml_trace(diff, "Patch:Raw");
-            break;
-        default:
-            crm_warn("Unknown patch format: %d", format);
-            return;
+static void
+process_status_diff(xmlNode *status, xmlNode *change, const char *op,
+                    const char *xpath)
+{
+    for (xmlNode *state = __xml_first_child(status); state != NULL;
+         state = __xml_next(state)) {
+        process_node_state_diff(state, change, op, xpath);
+    }
+}
+
+static void
+process_cib_diff(xmlNode *cib, xmlNode *change, const char *op,
+                 const char *xpath)
+{
+    xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS);
+    xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION);
+
+    if (status) {
+        process_status_diff(status, change, op, xpath);
     }
+    if (config) {
+        abort_transition(INFINITY, tg_restart,
+                         "Non-status-only change", change);
+    }
+}
 
-    for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) {
+static void
+te_update_diff_v2(xmlNode *diff)
+{
+    crm_log_xml_trace(diff, "Patch:Raw");
+
+    for (xmlNode *change = __xml_first_child(diff); change != NULL;
+         change = __xml_next(change)) {
+
+        xmlNode *match = NULL;
         const char *name = NULL;
-        const char *op = crm_element_value(change, XML_DIFF_OP);
         const char *xpath = crm_element_value(change, XML_DIFF_PATH);
-        xmlNode *match = NULL;
-        const char *node = NULL;
 
-        if(op == NULL) {
+        // Possible ops: create, modify, delete, move
+        const char *op = crm_element_value(change, XML_DIFF_OP);
+
+        // Ignore uninteresting updates
+        if (op == NULL) {
             continue;
 
-        } else if(strcmp(op, "create") == 0) {
-            match = change->children;
+        } else if (xpath == NULL) {
+            crm_trace("Ignoring %s change for version field", op);
+            continue;
 
-        } else if(strcmp(op, "move") == 0) {
+        } else if (strcmp(op, "move") == 0) {
+            crm_trace("Ignoring move change at %s", xpath);
             continue;
+        }
 
-        } else if(strcmp(op, "modify") == 0) {
+        // Find the result of create/modify ops
+        if (strcmp(op, "create") == 0) {
+            match = change->children;
+
+        } else if (strcmp(op, "modify") == 0) {
             match = first_named_child(change, XML_DIFF_RESULT);
             if(match) {
                 match = match->children;
             }
+
+        } else if (strcmp(op, "delete") != 0) {
+            crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)",
+                     op, xpath);
+            continue;
         }
 
-        if(match) {
+        if (match) {
             if (match->type == XML_COMMENT_NODE) {
                 crm_trace("Ignoring %s operation for comment at %s", op, xpath);
                 continue;
@@ -449,130 +510,117 @@ te_update_diff(const char *event, xmlNode * msg)
         crm_trace("Handling %s operation for %s%s%s",
                   op, (xpath? xpath : "CIB"),
                   (name? " matched by " : ""), (name? name : ""));
-        if(xpath == NULL) {
-            /* Version field, ignore */
 
-        } else if(strstr(xpath, "/cib/configuration")) {
-            abort_transition(INFINITY, tg_restart, "Configuration change", change);
-            break; /* Won't be packaged with any resource operations we may be waiting for */
+        if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) {
+            abort_transition(INFINITY, tg_restart, "Configuration change",
+                             change);
+            break; // Won't be packaged with operation results we may be waiting for
 
-        } else if(strstr(xpath, "/"XML_CIB_TAG_TICKETS) || safe_str_eq(name, XML_CIB_TAG_TICKETS)) {
+        } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS)
+                   || safe_str_eq(name, XML_CIB_TAG_TICKETS)) {
             abort_transition(INFINITY, tg_restart, "Ticket attribute change", change);
-            break; /* Won't be packaged with any resource operations we may be waiting for */
+            break; // Won't be packaged with operation results we may be waiting for
 
-        } else if(strstr(xpath, "/"XML_TAG_TRANSIENT_NODEATTRS"[") || safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) {
+        } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[")
+                   || safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) {
             abort_unless_down(xpath, op, change, "Transient attribute change");
-            break; /* Won't be packaged with any resource operations we may be waiting for */
-
-        } else if(strstr(xpath, "/"XML_LRM_TAG_RSC_OP"[") && safe_str_eq(op, "delete")) {
-            crm_action_t *cancel = NULL;
-            char *mutable_key = strdup(xpath);
-            char *key, *node_uuid;
-
-            /* Extract the part of xpath between last pair of single quotes */
-            key = strrchr(mutable_key, '\'');
-            if (key != NULL) {
-                *key = '\0';
-                key = strrchr(mutable_key, '\'');
-            }
-            if (key == NULL) {
-                crm_warn("Ignoring malformed CIB update (resource deletion)");
-                free(mutable_key);
-                continue;
-            }
-            ++key;
-
-            node_uuid = extract_node_uuid(xpath);
-            cancel = get_cancel_action(key, node_uuid);
-            if (cancel == NULL) {
-                abort_transition(INFINITY, tg_restart, "Resource operation removal", change);
+            break; // Won't be packaged with operation results we may be waiting for
 
-            } else {
-                crm_info("Cancellation of %s on %s confirmed (%d)", key, node_uuid, cancel->id);
-                stop_te_timer(cancel->timer);
-                te_action_confirmed(cancel);
-
-                update_graph(transition_graph, cancel);
-                trigger_graph();
-
-            }
-            free(mutable_key);
-            free(node_uuid);
-
-        } else if(strstr(xpath, "/"XML_CIB_TAG_LRM"[") && safe_str_eq(op, "delete")) {
-            abort_unless_down(xpath, op, change, "Resource state removal");
-
-        } else if(strstr(xpath, "/"XML_CIB_TAG_STATE"[") && safe_str_eq(op, "delete")) {
-            abort_unless_down(xpath, op, change, "Node state removal");
-
-        } else if(name == NULL) {
-            crm_debug("No result for %s operation to %s", op, xpath);
-            CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0);
-
-        } else if(strcmp(name, XML_TAG_CIB) == 0) {
-            xmlNode *state = NULL;
-            xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS);
-            xmlNode *config = first_named_child(match, XML_CIB_TAG_CONFIGURATION);
-
-            for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
-                xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);
-
-                node = ID(state);
-                process_resource_updates(node, lrm, change, op, xpath);
-            }
-
-            if(config) {
-                abort_transition(INFINITY, tg_restart, "Non-status-only change", change);
-            }
+        } else if (strcmp(op, "delete") == 0) {
+            process_delete_diff(xpath, op, change);
 
-        } else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) {
-            xmlNode *state = NULL;
+        } else if (name == NULL) {
+            crm_warn("Ignoring malformed CIB update (%s at %s has no result)",
+                     op, xpath);
 
-            for (state = __xml_first_child(match); state != NULL; state = __xml_next(state)) {
-                xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);
-
-                node = ID(state);
-                process_resource_updates(node, lrm, change, op, xpath);
-            }
+        } else if (strcmp(name, XML_TAG_CIB) == 0) {
+            process_cib_diff(match, change, op, xpath);
 
-        } else if(strcmp(name, XML_CIB_TAG_STATE) == 0) {
-            xmlNode *lrm = first_named_child(match, XML_CIB_TAG_LRM);
+        } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) {
+            process_status_diff(match, change, op, xpath);
 
-            node = ID(match);
-            process_resource_updates(node, lrm, change, op, xpath);
+        } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) {
+            process_node_state_diff(match, change, op, xpath);
 
-        } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) {
-            node = ID(match);
-            process_resource_updates(node, match, change, op, xpath);
+        } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) {
+            process_resource_updates(ID(match), match, change, op, xpath);
 
-        } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
+        } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
             char *local_node = get_node_from_xpath(xpath);
 
             process_resource_updates(local_node, match, change, op, xpath);
             free(local_node);
 
-        } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
-
-            xmlNode *rsc_op;
+        } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
             char *local_node = get_node_from_xpath(xpath);
 
-            for (rsc_op = __xml_first_child(match); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
-                process_graph_event(rsc_op, local_node);
-            }
+            process_lrm_resource_diff(match, local_node);
             free(local_node);
 
-        } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
+        } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
             char *local_node = get_node_from_xpath(xpath);
 
             process_graph_event(match, local_node);
             free(local_node);
 
         } else {
-            crm_err("Ignoring %s operation for %s %p, %s", op, xpath, match, name);
+            crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)",
+                     op, xpath, name);
         }
     }
 }
 
+void
+te_update_diff(const char *event, xmlNode * msg)
+{
+    xmlNode *diff = NULL;
+    const char *op = NULL;
+    int rc = -EINVAL;
+    int format = 1;
+    int p_add[] = { 0, 0, 0 };
+    int p_del[] = { 0, 0, 0 };
+
+    CRM_CHECK(msg != NULL, return);
+    crm_element_value_int(msg, F_CIB_RC, &rc);
+
+    if (transition_graph == NULL) {
+        crm_trace("No graph");
+        return;
+
+    } else if (rc < pcmk_ok) {
+        crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
+        return;
+
+    } else if (transition_graph->complete
+               && fsa_state != S_IDLE
+               && fsa_state != S_TRANSITION_ENGINE
+               && fsa_state != S_POLICY_ENGINE) {
+        crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state),
+                  transition_graph->complete);
+        return;
+    }
+
+    op = crm_element_value(msg, F_CIB_OPERATION);
+    diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+
+    xml_patch_versions(diff, p_add, p_del);
+    crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
+              p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
+              fsa_state2string(fsa_state));
+
+    crm_element_value_int(diff, "format", &format);
+    switch (format) {
+        case 1:
+            te_update_diff_v1(event, diff);
+            break;
+        case 2:
+            te_update_diff_v2(diff);
+            break;
+        default:
+            crm_warn("Ignoring malformed CIB update (unknown patch format %d)",
+                     format);
+    }
+}
 
 gboolean
 process_te_message(xmlNode * msg, xmlNode * xml_data)
openSUSE Build Service is sponsored by