File pacemaker-attrd-crmd-erase-attributes-at-attrd-start-up.patch of Package pacemaker.14737

commit c9d1c3cd34f05c245a54809fe4eafaf1cae4161b
Author: Ken Gaillot <kgaillot@redhat.com>
Date:   Tue Aug 15 11:36:07 2017 -0500

    Fix: attrd,crmd: erase attributes at attrd start-up, not first join
    
    Previously, the crmd cleared the local node's transient attributes from the CIB
    the first time it joined the cluster, to handle the case where a node restarts
    so quickly that the cluster layer doesn't notice.
    
    However, that left a race where any attributes set between attrd's start-up and
    crmd's first join would be lost.
    
    Now, attrd handles the clearing when it starts. This makes more sense, since
    attrd's attribute memory is empty at this point, and it is making sure the CIB
    is in sync.

Index: pacemaker/attrd/main.c
===================================================================
--- pacemaker.orig/attrd/main.c
+++ pacemaker/attrd/main.c
@@ -133,6 +133,48 @@ attrd_cib_destroy_cb(gpointer user_data)
     return;
 }
 
+static void
+attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output,
+               void *user_data)
+{
+    do_crm_log_unlikely((rc? LOG_NOTICE : LOG_DEBUG),
+                        "Cleared transient attributes: %s "
+                        CRM_XS " xpath=%s rc=%d",
+                        pcmk_strerror(rc), (char *) user_data, rc);
+}
+
+#define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS
+
+/*!
+ * \internal
+ * \brief Wipe all transient attributes for this node from the CIB
+ *
+ * Clear any previous transient node attributes from the CIB. This is
+ * normally done by the DC's crmd when this node leaves the cluster, but
+ * this handles the case where the node restarted so quickly that the
+ * cluster layer didn't notice.
+ *
+ * \todo If attrd respawns after crashing (see PCMK_respawned), ideally we'd
+ *       skip this and sync our attributes from the writer. However, currently
+ *       we reject any values for us that the writer has, in
+ *       attrd_peer_update().
+ */
+static void
+attrd_erase_attrs()
+{
+    int call_id;
+    char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname);
+
+    crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s",
+             xpath);
+
+    call_id = the_cib->cmds->delete(the_cib, xpath, NULL,
+                                    cib_quorum_override | cib_xpath);
+    the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath,
+                                          "attrd_erase_cb", attrd_erase_cb,
+                                          free);
+}
+
 static int
 attrd_cib_connect(int max_retry)
 {
@@ -175,6 +217,9 @@ attrd_cib_connect(int max_retry)
         goto cleanup;
     }
 
+    // We have no attribute values in memory, wipe the CIB to match
+    attrd_erase_attrs();
+
     return pcmk_ok;
 
   cleanup:
Index: pacemaker/crmd/join_client.c
===================================================================
--- pacemaker.orig/crmd/join_client.c
+++ pacemaker/crmd/join_client.c
@@ -281,12 +281,9 @@ do_cl_join_finalize_respond(long long ac
              *   of anonymous clones and end up with multiple active
              *   instances on the machine.
              */
-            erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0);
 
             /* Just in case attrd was still around too */
             if (is_not_set(fsa_input_register, R_SHUTDOWN)) {
-                update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE);
-                update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE);
             }
         }
 
openSUSE Build Service is sponsored by