File pacemaker-crmd-liblrmd-libcrmcommon-improve-remote-node-disconnection-logs.patch of Package pacemaker.14737
commit 326a24626cf789850196dd4443f21ece356f2df2
Author: Ken Gaillot <kgaillot@redhat.com>
Date: Fri Mar 2 15:51:18 2018 -0600
Log: crmd,liblrmd,libcrmcommon: improve remote node disconnection logs
Index: pacemaker/crmd/remote_lrmd_ra.c
===================================================================
--- pacemaker.orig/crmd/remote_lrmd_ra.c
+++ pacemaker/crmd/remote_lrmd_ra.c
@@ -466,7 +466,8 @@ monitor_timeout_cb(gpointer data)
lrm_state = lrm_state_find(cmd->rsc_id);
- crm_info("Poke async response timed out for node %s (%p)", cmd->rsc_id, lrm_state);
+ crm_info("Timed out waiting for remote poke response from %s%s",
+ cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
cmd->monitor_timeout_id = 0;
cmd->op_status = PCMK_LRM_OP_TIMEOUT;
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
@@ -565,11 +566,13 @@ remote_lrm_op_callback(lrmd_event_data_t
(ra_data->active == TRUE)) {
if (!remote_ra_is_in_maintenance(lrm_state)) {
- crm_err("Unexpected disconnect on remote-node %s", lrm_state->node_name);
+ crm_err("Lost connection to Pacemaker Remote node %s",
+ lrm_state->node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
} else {
- crm_notice("Disconnect on unmanaged remote-node %s", lrm_state->node_name);
+ crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
+ lrm_state->node_name);
/* Do roughly what a 'stop' on the remote-resource would do */
handle_remote_ra_stop(lrm_state, NULL);
remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
Index: pacemaker/cts/CTStests.py
===================================================================
--- pacemaker.orig/cts/CTStests.py
+++ pacemaker/cts/CTStests.py
@@ -3096,7 +3096,8 @@ class RemoteStonithd(RemoteDriver):
def errorstoignore(self):
ignore_pats = [
- r"Unexpected disconnect on remote-node",
+ r"Lost connection to Pacemaker Remote node",
+ r"Software caused connection abort",
r"crmd.*:\s+error.*: Operation remote_.*_monitor",
r"crmd.*:\s+error.*: Result of monitor operation for remote_.*",
r"pengine.*:\s+Recover remote_.*\s*\(.*\)",
Index: pacemaker/lib/common/remote.c
===================================================================
--- pacemaker.orig/lib/common/remote.c
+++ pacemaker/lib/common/remote.c
@@ -245,8 +245,9 @@ crm_send_tls(gnutls_session_t * session,
(unsigned long long) len);
} else if (rc < 0) {
- crm_err("Connection terminated: %s " CRM_XS " rc=%d",
- gnutls_strerror(rc), rc);
+ // Caller can log as error if necessary
+ crm_info("TLS connection terminated: %s " CRM_XS " rc=%d",
+ gnutls_strerror(rc), rc);
rc = -ECONNABORTED;
break;
@@ -289,7 +290,9 @@ crm_send_plaintext(int sock, const char
crm_trace("Retry");
goto retry;
default:
- crm_perror(LOG_ERR, "Could only write %d of the remaining %d bytes", rc, (int)len);
+ crm_perror(LOG_INFO,
+ "Could only write %d of the remaining %llu bytes",
+ rc, (unsigned long long) len);
break;
}
@@ -311,22 +314,19 @@ crm_send_plaintext(int sock, const char
static int
crm_remote_sendv(crm_remote_t * remote, struct iovec * iov, int iovs)
{
- int lpc = 0;
- int rc = -ESOCKTNOSUPPORT;
-
- for(; lpc < iovs; lpc++) {
+ int rc = 0;
+ for (int lpc = 0; (lpc < iovs) && (rc >= 0); lpc++) {
#ifdef HAVE_GNUTLS_GNUTLS_H
if (remote->tls_session) {
rc = crm_send_tls(remote->tls_session, iov[lpc].iov_base, iov[lpc].iov_len);
- } else if (remote->tcp_socket) {
-#else
- if (remote->tcp_socket) {
+ continue;
+ }
#endif
+ if (remote->tcp_socket) {
rc = crm_send_plaintext(remote->tcp_socket, iov[lpc].iov_base, iov[lpc].iov_len);
-
} else {
- crm_err("Unsupported connection type");
+ rc = -ESOCKTNOSUPPORT;
}
}
return rc;
Index: pacemaker/lib/lrmd/lrmd_client.c
===================================================================
--- pacemaker.orig/lib/lrmd/lrmd_client.c
+++ pacemaker/lib/lrmd/lrmd_client.c
@@ -355,11 +355,11 @@ lrmd_tls_dispatch(gpointer userdata)
int disconnected = 0;
if (lrmd_tls_connected(lrmd) == FALSE) {
- crm_trace("tls dispatch triggered after disconnect");
+ crm_trace("TLS dispatch triggered after disconnect");
return 0;
}
- crm_trace("tls_dispatch triggered");
+ crm_trace("TLS dispatch triggered");
/* First check if there are any pending notifies to process that came
* while we were waiting for replies earlier. */
@@ -396,7 +396,7 @@ lrmd_tls_dispatch(gpointer userdata)
int reply_id = 0;
crm_element_value_int(xml, F_LRMD_CALLID, &reply_id);
/* if this happens, we want to know about it */
- crm_err("Got outdated reply %d", reply_id);
+ crm_err("Got outdated remote LRM reply %d", reply_id);
}
}
free_xml(xml);
@@ -404,7 +404,8 @@ lrmd_tls_dispatch(gpointer userdata)
}
if (disconnected) {
- crm_info("Server disconnected while reading remote server msg.");
+ crm_info("Lost %s LRM connection while reading data",
+ (native->remote_nodename? native->remote_nodename : "local"));
lrmd_tls_disconnect(lrmd);
return 0;
}
@@ -1403,7 +1404,9 @@ lrmd_api_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->private;
- crm_info("Disconnecting from %d lrmd service", native->type);
+ crm_info("Disconnecting %s LRM connection to %s",
+ crm_client_type_text(native->type),
+ (native->remote_nodename? native->remote_nodename : "local"));
switch (native->type) {
case CRM_CLIENT_IPC:
lrmd_ipc_disconnect(lrmd);
Index: pacemaker/lrmd/lrmd.c
===================================================================
--- pacemaker.orig/lrmd/lrmd.c
+++ pacemaker/lrmd/lrmd.c
@@ -386,7 +386,8 @@ send_reply(crm_client_t * client, int rc
free_xml(reply);
if (send_rc < 0) {
- crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
+ crm_warn("Reply to client %s failed: %s " CRM_XS " %d",
+ client->name, pcmk_strerror(send_rc), send_rc);
}
}
Index: pacemaker/lrmd/tls_backend.c
===================================================================
--- pacemaker.orig/lrmd/tls_backend.c
+++ pacemaker/lrmd/tls_backend.c
@@ -91,7 +91,7 @@ lrmd_remote_client_msg(gpointer data)
/* no msg to read */
return 0;
} else if (rc < 0) {
- crm_info("Client disconnected during remote client read");
+ crm_info("Client disconnected while polling it");
return -1;
}
@@ -126,7 +126,7 @@ lrmd_remote_client_msg(gpointer data)
}
if (disconnected) {
- crm_info("Client disconnect detected in tls msg dispatcher.");
+ crm_info("Client disconnected while reading from it");
return -1;
}
@@ -142,6 +142,10 @@ lrmd_remote_client_destroy(gpointer user
return;
}
+ crm_notice("Cleaning up after remote client %s disconnected "
+ CRM_XS " id=%s",
+ (client->name? client->name : ""), client->id);
+
ipc_proxy_remove_provider(client);
/* if this is the last remote connection, stop recurring
@@ -150,9 +154,6 @@ lrmd_remote_client_destroy(gpointer user
client_disconnect_cleanup(NULL);
}
- crm_notice("LRMD client disconnecting remote client - name: %s id: %s",
- client->name ? client->name : "<unknown>", client->id);
-
if (client->remote->tls_session) {
void *sock_ptr;
int csock;