File bsc#1198767-0002-Fix-scheduler-Do-not-fence-a-pending-node-that-doesn.patch of Package pacemaker.38495

From 00dd1989fdad9ceafbc2e385d5171ddb18b1dda9 Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Tue, 21 Feb 2023 17:08:11 +0100
Subject: [PATCH 2/3] Fix: scheduler: Do not fence a pending node that doesn't
 have an uname in node state yet

If a joining peer makes the cluster acquire the quorum from corosync
meanwhile it has not joined CPG membership of pacemaker-controld yet,
it's possible that the created node_state entry doesn't have an uname
yet. Previously in that case, the node would be considered `UNCLEAN
(offline)` and get unnecessarily fenced before it got a chance to join
CPG yet.

The fix resolves that by recognizing the node as `pending` and waiting
for it to join CPG.
---
 lib/pengine/unpack.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

Index: pacemaker-2.0.5+20201202.ba59be712/lib/pengine/unpack.c
===================================================================
--- pacemaker-2.0.5+20201202.ba59be712.orig/lib/pengine/unpack.c
+++ pacemaker-2.0.5+20201202.ba59be712/lib/pengine/unpack.c
@@ -1156,10 +1156,17 @@ unpack_status(xmlNode * status, pe_worki
             this_node = pe_find_node_any(data_set->nodes, id, uname);
 
             if (uname == NULL) {
-                /* error */
-                continue;
+                /* If a joining peer makes the cluster acquire the quorum from corosync
+                 * meanwhile it has not joined CPG membership of pacemaker-controld yet,
+                 * it's possible that the created node_state entry doesn't have an uname
+                 * yet. We should recognize the node as `pending` and wait for it to
+                 * join CPG.
+                 */
+                crm_trace("Handling " XML_CIB_TAG_STATE " entry with id=\"%s\" without "
+                          XML_ATTR_UNAME, id);
+            }
 
-            } else if (this_node == NULL) {
+            if (this_node == NULL) {
                 pcmk__config_warn("Ignoring recorded node status for '%s' "
                                   "because no longer in configuration", uname);
                 continue;
@@ -1174,7 +1181,7 @@ unpack_status(xmlNode * status, pe_worki
                 continue;
             }
 
-            crm_trace("Processing node id=%s, uname=%s", id, uname);
+            crm_trace("Processing node id=%s, uname=%s", id, crm_str(uname));
 
             /* Mark the node as provisionally clean
              * - at least we have seen it in the current cluster's lifetime
@@ -1185,19 +1192,19 @@ unpack_status(xmlNode * status, pe_worki
             add_node_attrs(attrs, this_node, TRUE, data_set);
 
             if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
-                crm_info("Node %s is in standby-mode", this_node->details->uname);
+                crm_info("Node %s is in standby-mode", crm_str(this_node->details->uname));
                 this_node->details->standby = TRUE;
             }
 
             if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) {
-                crm_info("Node %s is in maintenance-mode", this_node->details->uname);
+                crm_info("Node %s is in maintenance-mode", crm_str(this_node->details->uname));
                 this_node->details->maintenance = TRUE;
             }
 
             resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
             if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
                 crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
-                    XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
+                    XML_NODE_ATTR_RSC_DISCOVERY, crm_str(this_node->details->uname));
             }
 
             crm_trace("determining node state");
Places

File bsc#1198767-0002-Fix-scheduler-Do-not-fence-a-pending-node-that-doesn.patch of Package pacemaker.38495

Places