A new user interface for you! Read more...

File fenced_use_post_join_delay_after_cluster_join.patch of Package cluster

commit d8a76837ecd24a5f7ab9a2ebf72745e6c6752c0b
Author: David Teigland <teigland@redhat.com>
Date:   Tue Aug 17 16:42:30 2010 -0500

    fenced: use post_join_delay after cluster join
    
    When the cluster has lost quorum due to a node failure,
    the next event is generally a cluster node join which
    gives the cluster quorum again.  With quorum, fenced
    begins fencing any failed nodes, applying post_fail_delay
    since the last cpg event was a node failure.  In this
    case, however, post_join_delay is more appropriate since
    the chances are good that nodes being fenced will be joining.
    Detect this case where a node joins the cluster giving it
    quorum, and use post_join_delay.
    
    bz 624844
    
    Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fence/fenced/fd.h b/fence/fenced/fd.h
index 39a34ad..a5a78bf 100644
--- a/fence/fenced/fd.h
+++ b/fence/fenced/fd.h
@@ -64,6 +64,7 @@ extern int daemon_quit;
 extern int cluster_down;
 extern struct list_head domains;
 extern int cluster_quorate;
+extern int cluster_quorate_from_last_update;
 extern uint32_t cluster_ringid_seq;
 extern uint64_t quorate_time;
 extern int our_nodeid;
diff --git a/fence/fenced/main.c b/fence/fenced/main.c
index a371dc8..e5ab568 100644
--- a/fence/fenced/main.c
+++ b/fence/fenced/main.c
@@ -1069,6 +1069,7 @@ int daemon_quit;
 int cluster_down;
 struct list_head domains;
 int cluster_quorate;
+int cluster_quorate_from_last_update;
 uint32_t cluster_ringid_seq;
 uint64_t quorate_time;
 int our_nodeid;
diff --git a/fence/fenced/member_cman.c b/fence/fenced/member_cman.c
index b9d8341..0919b8e 100644
--- a/fence/fenced/member_cman.c
+++ b/fence/fenced/member_cman.c
@@ -150,6 +150,7 @@ static void update_cluster(void)
 {
 	cman_cluster_t info;
 	int quorate = cluster_quorate;
+	int removed = 0, added = 0;
 	int i, rv;
 
 	rv = cman_get_cluster(ch, &info);
@@ -183,6 +184,7 @@ static void update_cluster(void)
 				  old_nodes[i].cn_nodeid, cluster_ringid_seq);
 
 			node_history_cluster_remove(old_nodes[i].cn_nodeid);
+			removed++;
 		}
 	}
 
@@ -194,8 +196,18 @@ static void update_cluster(void)
 				  cman_nodes[i].cn_nodeid, cluster_ringid_seq);
 
 			node_history_cluster_add(cman_nodes[i].cn_nodeid);
+			added++;
 		}
 	}
+
+	if (removed) {
+		cluster_quorate_from_last_update = 0;
+	} else if (added) {
+		if (!quorate && cluster_quorate)
+			cluster_quorate_from_last_update = 1;
+		else
+			cluster_quorate_from_last_update = 0;
+	}
 }
 
 /* Note: in fence delay loop we aren't processing callbacks so won't
diff --git a/fence/fenced/recover.c b/fence/fenced/recover.c
index d3bf35f..a7ca047 100644
--- a/fence/fenced/recover.c
+++ b/fence/fenced/recover.c
@@ -181,7 +181,7 @@ void delay_fencing(struct fd *fd, int node_join)
 	if (list_empty(&fd->victims))
 		return;
 
-	if (node_join) {
+	if (node_join || cluster_quorate_from_last_update) {
 		delay = cfgd_post_join_delay;
 		delay_type = "post_join_delay";
 	} else {
@@ -189,6 +189,9 @@ void delay_fencing(struct fd *fd, int node_join)
 		delay_type = "post_fail_delay";
 	}
 
+	log_debug("delay %s %d quorate_from_last_update %d",
+		  delay_type, delay, cluster_quorate_from_last_update);
+
 	if (delay == 0)
 		goto out;