File pacemaker-daemons-improve-connection-loss-messages.patch of Package pacemaker.10805
commit d526fc33b58596c1552550ddedd45ebdf89b755b
Author: Ken Gaillot <kgaillot@redhat.com>
Date: Sat Apr 28 10:09:06 2018 -0500
Log: daemons: improve connection loss messages
Make them consistent across daemons, and use critical severity when it causes
shutdown.
Index: pacemaker-1.1.18+20180430.b12c320f5/cts/patterns.py
===================================================================
--- pacemaker-1.1.18+20180430.b12c320f5.orig/cts/patterns.py
+++ pacemaker-1.1.18+20180430.b12c320f5/cts/patterns.py
@@ -238,11 +238,15 @@ class crm_corosync(BasePatterns):
]
self.components["corosync"] = [
- r"pacemakerd.*error:.*Connection destroyed",
- r"attrd.*:\s*(crit|error):.*Lost connection to (Corosync|CIB) service",
- r"stonith.*:\s*(Corosync connection terminated|Shutting down)",
- r"cib.*:\s*Corosync connection lost!\s+Exiting.",
- r"crmd.*:\s*(connection terminated|Disconnected from Corosync)",
+ # We expect each daemon to lose its cluster connection.
+ # However, if the CIB manager loses its connection first,
+ # it's possible for another daemon to lose that connection and
+ # exit before losing the cluster connection.
+ r"pacemakerd.*:\s*(crit|error):.*Lost connection to cluster layer",
+ r"attrd.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
+ r"cib.*:\s*(crit|error):.*Lost connection to cluster layer",
+ r"crmd.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
+ r"stonith.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
r"pengine.*Scheduling Node .* for STONITH",
r"crmd.*:\s*Peer .* was terminated \(.*\) by .* for .*:\s*OK",
]
Index: pacemaker-1.1.18+20180430.b12c320f5/attrd/main.c
===================================================================
--- pacemaker-1.1.18+20180430.b12c320f5.orig/attrd/main.c
+++ pacemaker-1.1.18+20180430.b12c320f5/attrd/main.c
@@ -86,7 +86,7 @@ attrd_cpg_destroy(gpointer unused)
crm_info("Corosync disconnection complete");
} else {
- crm_crit("Lost connection to Corosync service!");
+ crm_crit("Lost connection to cluster layer, shutting down");
attrd_exit_status = CRM_EX_DISCONNECT;
attrd_shutdown(0);
}
@@ -113,7 +113,7 @@ attrd_cib_destroy_cb(gpointer user_data)
} else {
/* eventually this should trigger a reconnect, not a shutdown */
- crm_err("Lost connection to CIB service!");
+ crm_crit("Lost connection to the CIB manager, shutting down");
attrd_exit_status = CRM_EX_DISCONNECT;
attrd_shutdown(0);
}
Index: pacemaker-1.1.18+20180430.b12c320f5/cib/main.c
===================================================================
--- pacemaker-1.1.18+20180430.b12c320f5.orig/cib/main.c
+++ pacemaker-1.1.18+20180430.b12c320f5/cib/main.c
@@ -259,7 +259,7 @@ cib_cs_destroy(gpointer user_data)
if (cib_shutdown_flag) {
crm_info("Corosync disconnection complete");
} else {
- crm_err("Corosync connection lost! Exiting.");
+ crm_crit("Lost connection to cluster layer, shutting down");
terminate_cib(__FUNCTION__, CRM_EX_DISCONNECT);
}
}
Index: pacemaker-1.1.18+20180430.b12c320f5/crmd/cib.c
===================================================================
--- pacemaker-1.1.18+20180430.b12c320f5.orig/crmd/cib.c
+++ pacemaker-1.1.18+20180430.b12c320f5/crmd/cib.c
@@ -74,7 +74,7 @@ do_cib_control(long long action,
return;
}
- crm_info("Disconnecting CIB");
+ crm_info("Disconnecting from the CIB manager");
clear_bit(fsa_input_register, R_CIB_CONNECTED);
fsa_cib_conn->cmds->del_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated);
@@ -83,14 +83,14 @@ do_cib_control(long long action,
fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);
fsa_cib_conn->cmds->signoff(fsa_cib_conn);
}
- crm_notice("Disconnected from the CIB");
+ crm_notice("Disconnected from the CIB manager");
}
if (action & A_CIB_START) {
int rc = pcmk_ok;
if (cur_state == S_STOPPING) {
- crm_err("Ignoring request to start the CIB after shutdown");
+ crm_err("Ignoring request to connect to the CIB manager after shutdown");
return;
}
Index: pacemaker-1.1.18+20180430.b12c320f5/crmd/callbacks.c
===================================================================
--- pacemaker-1.1.18+20180430.b12c320f5.orig/crmd/callbacks.c
+++ pacemaker-1.1.18+20180430.b12c320f5/crmd/callbacks.c
@@ -270,8 +270,8 @@ crmd_cib_connection_destroy(gpointer use
return;
}
- /* eventually this will trigger a reconnect, not a shutdown */
- crm_err("Connection to the CIB terminated...");
+ // @TODO This should trigger a reconnect, not a shutdown
+ crm_crit("Lost connection to the CIB manager, shutting down");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
clear_bit(fsa_input_register, R_CIB_CONNECTED);
Index: pacemaker-1.1.18+20180430.b12c320f5/crmd/corosync.c
===================================================================
--- pacemaker-1.1.18+20180430.b12c320f5.orig/crmd/corosync.c
+++ pacemaker-1.1.18+20180430.b12c320f5/crmd/corosync.c
@@ -95,7 +95,7 @@ static void
crmd_cs_destroy(gpointer user_data)
{
if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) {
- crm_err("Corosync connection lost");
+ crm_crit("Lost connection to cluster layer, shutting down");
crmd_exit(CRM_EX_DISCONNECT);
} else {
Index: pacemaker-1.1.18+20180430.b12c320f5/fencing/main.c
===================================================================
--- pacemaker-1.1.18+20180430.b12c320f5.orig/fencing/main.c
+++ pacemaker-1.1.18+20180430.b12c320f5/fencing/main.c
@@ -214,7 +214,7 @@ stonith_peer_ais_callback(cpg_handle_t h
static void
stonith_peer_cs_destroy(gpointer user_data)
{
- crm_err("Corosync connection terminated");
+ crm_crit("Lost connection to cluster layer, shutting down");
stonith_shutdown(0);
}
#endif
@@ -1122,7 +1122,7 @@ cib_connection_destroy(gpointer user_dat
crm_info("Connection to the CIB closed.");
return;
} else {
- crm_notice("Connection to the CIB terminated. Shutting down.");
+ crm_crit("Lost connection to the CIB manager, shutting down");
}
if (cib_api) {
cib_api->cmds->signoff(cib_api);
Index: pacemaker-1.1.18+20180430.b12c320f5/daemons/pacemakerd/pacemakerd.c
===================================================================
--- pacemaker-1.1.18+20180430.b12c320f5.orig/daemons/pacemakerd/pacemakerd.c
+++ pacemaker-1.1.18+20180430.b12c320f5/daemons/pacemakerd/pacemakerd.c
@@ -781,7 +781,7 @@ init_children_processes(void)
static void
mcp_cpg_destroy(gpointer user_data)
{
- crm_err("Connection destroyed");
+ crm_crit("Lost connection to cluster layer, shutting down");
crm_exit(CRM_EX_DISCONNECT);
}