File pacemaker-crmd-liblrmd-libcrmcommon-improve-remote-node-disconnection-logs.patch of Package pacemaker.14737

commit 326a24626cf789850196dd4443f21ece356f2df2
Author: Ken Gaillot <kgaillot@redhat.com>
Date:   Fri Mar 2 15:51:18 2018 -0600

    Log: crmd,liblrmd,libcrmcommon: improve remote node disconnection logs

Index: pacemaker/crmd/remote_lrmd_ra.c
===================================================================
--- pacemaker.orig/crmd/remote_lrmd_ra.c
+++ pacemaker/crmd/remote_lrmd_ra.c
@@ -466,7 +466,8 @@ monitor_timeout_cb(gpointer data)
 
     lrm_state = lrm_state_find(cmd->rsc_id);
 
-    crm_info("Poke async response timed out for node %s (%p)", cmd->rsc_id, lrm_state);
+    crm_info("Timed out waiting for remote poke response from %s%s",
+             cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
     cmd->monitor_timeout_id = 0;
     cmd->op_status = PCMK_LRM_OP_TIMEOUT;
     cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
@@ -565,11 +566,13 @@ remote_lrm_op_callback(lrmd_event_data_t
         (ra_data->active == TRUE)) {
 
         if (!remote_ra_is_in_maintenance(lrm_state)) {
-            crm_err("Unexpected disconnect on remote-node %s", lrm_state->node_name);
+            crm_err("Lost connection to Pacemaker Remote node %s",
+                    lrm_state->node_name);
             ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
             ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
         } else {
-            crm_notice("Disconnect on unmanaged remote-node %s", lrm_state->node_name);
+            crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
+                       lrm_state->node_name);
             /* Do roughly what a 'stop' on the remote-resource would do */
             handle_remote_ra_stop(lrm_state, NULL);
             remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
Index: pacemaker/cts/CTStests.py
===================================================================
--- pacemaker.orig/cts/CTStests.py
+++ pacemaker/cts/CTStests.py
@@ -3096,7 +3096,8 @@ class RemoteStonithd(RemoteDriver):
 
     def errorstoignore(self):
         ignore_pats = [
-            r"Unexpected disconnect on remote-node",
+            r"Lost connection to Pacemaker Remote node",
+            r"Software caused connection abort",
             r"crmd.*:\s+error.*: Operation remote_.*_monitor",
             r"crmd.*:\s+error.*: Result of monitor operation for remote_.*",
             r"pengine.*:\s+Recover remote_.*\s*\(.*\)",
Index: pacemaker/lib/common/remote.c
===================================================================
--- pacemaker.orig/lib/common/remote.c
+++ pacemaker/lib/common/remote.c
@@ -245,8 +245,9 @@ crm_send_tls(gnutls_session_t * session,
                       (unsigned long long) len);
 
         } else if (rc < 0) {
-            crm_err("Connection terminated: %s " CRM_XS " rc=%d",
-                    gnutls_strerror(rc), rc);
+            // Caller can log as error if necessary
+            crm_info("TLS connection terminated: %s " CRM_XS " rc=%d",
+                     gnutls_strerror(rc), rc);
             rc = -ECONNABORTED;
             break;
 
@@ -289,7 +290,9 @@ crm_send_plaintext(int sock, const char
                 crm_trace("Retry");
                 goto retry;
             default:
-                crm_perror(LOG_ERR, "Could only write %d of the remaining %d bytes", rc, (int)len);
+                crm_perror(LOG_INFO,
+                           "Could only write %d of the remaining %llu bytes",
+                           rc, (unsigned long long) len);
                 break;
         }
 
@@ -311,22 +314,19 @@ crm_send_plaintext(int sock, const char
 static int
 crm_remote_sendv(crm_remote_t * remote, struct iovec * iov, int iovs)
 {
-    int lpc = 0;
-    int rc = -ESOCKTNOSUPPORT;
-
-    for(; lpc < iovs; lpc++) {
+    int rc = 0;
 
+    for (int lpc = 0; (lpc < iovs) && (rc >= 0); lpc++) {
 #ifdef HAVE_GNUTLS_GNUTLS_H
         if (remote->tls_session) {
             rc = crm_send_tls(remote->tls_session, iov[lpc].iov_base, iov[lpc].iov_len);
-        } else if (remote->tcp_socket) {
-#else
-        if (remote->tcp_socket) {
+            continue;
+        }
 #endif
+        if (remote->tcp_socket) {
             rc = crm_send_plaintext(remote->tcp_socket, iov[lpc].iov_base, iov[lpc].iov_len);
-
         } else {
-            crm_err("Unsupported connection type");
+            rc = -ESOCKTNOSUPPORT;
         }
     }
     return rc;
Index: pacemaker/lib/lrmd/lrmd_client.c
===================================================================
--- pacemaker.orig/lib/lrmd/lrmd_client.c
+++ pacemaker/lib/lrmd/lrmd_client.c
@@ -355,11 +355,11 @@ lrmd_tls_dispatch(gpointer userdata)
     int disconnected = 0;
 
     if (lrmd_tls_connected(lrmd) == FALSE) {
-        crm_trace("tls dispatch triggered after disconnect");
+        crm_trace("TLS dispatch triggered after disconnect");
         return 0;
     }
 
-    crm_trace("tls_dispatch triggered");
+    crm_trace("TLS dispatch triggered");
 
     /* First check if there are any pending notifies to process that came
      * while we were waiting for replies earlier. */
@@ -396,7 +396,7 @@ lrmd_tls_dispatch(gpointer userdata)
                 int reply_id = 0;
                 crm_element_value_int(xml, F_LRMD_CALLID, &reply_id);
                 /* if this happens, we want to know about it */
-                crm_err("Got outdated reply %d", reply_id);
+                crm_err("Got outdated remote LRM reply %d", reply_id);
             }
         }
         free_xml(xml);
@@ -404,7 +404,8 @@ lrmd_tls_dispatch(gpointer userdata)
     }
 
     if (disconnected) {
-        crm_info("Server disconnected while reading remote server msg.");
+        crm_info("Lost %s LRM connection while reading data",
+                 (native->remote_nodename? native->remote_nodename : "local"));
         lrmd_tls_disconnect(lrmd);
         return 0;
     }
@@ -1403,7 +1404,9 @@ lrmd_api_disconnect(lrmd_t * lrmd)
 {
     lrmd_private_t *native = lrmd->private;
 
-    crm_info("Disconnecting from %d lrmd service", native->type);
+    crm_info("Disconnecting %s LRM connection to %s",
+             crm_client_type_text(native->type),
+             (native->remote_nodename? native->remote_nodename : "local"));
     switch (native->type) {
         case CRM_CLIENT_IPC:
             lrmd_ipc_disconnect(lrmd);
Index: pacemaker/lrmd/lrmd.c
===================================================================
--- pacemaker.orig/lrmd/lrmd.c
+++ pacemaker/lrmd/lrmd.c
@@ -386,7 +386,8 @@ send_reply(crm_client_t * client, int rc
 
     free_xml(reply);
     if (send_rc < 0) {
-        crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
+        crm_warn("Reply to client %s failed: %s " CRM_XS " %d",
+                 client->name, pcmk_strerror(send_rc), send_rc);
     }
 }
 
Index: pacemaker/lrmd/tls_backend.c
===================================================================
--- pacemaker.orig/lrmd/tls_backend.c
+++ pacemaker/lrmd/tls_backend.c
@@ -91,7 +91,7 @@ lrmd_remote_client_msg(gpointer data)
         /* no msg to read */
         return 0;
     } else if (rc < 0) {
-        crm_info("Client disconnected during remote client read");
+        crm_info("Client disconnected while polling it");
         return -1;
     }
 
@@ -126,7 +126,7 @@ lrmd_remote_client_msg(gpointer data)
     }
 
     if (disconnected) {
-        crm_info("Client disconnect detected in tls msg dispatcher.");
+        crm_info("Client disconnected while reading from it");
         return -1;
     }
 
@@ -142,6 +142,10 @@ lrmd_remote_client_destroy(gpointer user
         return;
     }
 
+    crm_notice("Cleaning up after remote client %s disconnected "
+               CRM_XS " id=%s",
+               (client->name? client->name : ""), client->id);
+
     ipc_proxy_remove_provider(client);
 
     /* if this is the last remote connection, stop recurring
@@ -150,9 +154,6 @@ lrmd_remote_client_destroy(gpointer user
         client_disconnect_cleanup(NULL);
     }
 
-    crm_notice("LRMD client disconnecting remote client - name: %s id: %s",
-               client->name ? client->name : "<unknown>", client->id);
-
     if (client->remote->tls_session) {
         void *sock_ptr;
         int csock;
openSUSE Build Service is sponsored by