File Fix-ust-UST-communication-can-return-EAGAIN.patch of Package lttng-tools.32487

From be355079dc40848a2b654b1f0871dfa99a4876de Mon Sep 17 00:00:00 2001
From: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Date: Tue, 22 Jun 2021 22:17:03 -0400
Subject: [PATCH] Fix: ust: UST communication can return -EAGAIN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
References: bsc#1218508
Git-commit: be355079dc40848a2b654b1f0871dfa99a4876de

Observed issue
==============

The following scenario lead to an abort on event creation. The
problem manifest itself when an application is unresponsive. Note that
the default timeout for ust communication is 5 seconds.

  # Start an instrumented app
  ./app
  gdb lttng-sessiond
  # put a breakpoint on ustctl_create_event.
  lttng create my_session
  lttng enable-event -u -a
  lttng start
  # The tracepoint should hit. Do not continue.
  kill -s SIGSTOP $(pgrep app)
  # Continue lttng-sessiond.
  # lttng-sessiond will abort.

Note that for UST this is not an expected behaviour. Expected
communication failure with a single app should not invalidate the
complete channel, compromise its setup or result in an abort.

Note that a similar scenario for the following ustctl call sites also
lead to scenario where failure of a single app lead to error reporting
and/or error propagation to upper level object.

Problematic callsites:
   ustctl_set_exclusion
   ustctl_set_filter
   ustctl_disable_channel

These callsites are also fixed by this patch.

Cause
=====

For an unresponsive application, EAGAIN is returned and is treated as an
"unknown" hard error.

In this particular case the abort() call was introduced by commit:
88e3c2f5610b9ac89b0923d448fee34140fc46fb [1]. It is not clear if this is
a leftover from debugging session since this is the only callsite where
an abort is issued on communication failure via ustctl.

Solution
========

Handle EAGAIN coming from ustctl_* and treat it the same way a
dying application is handled. The only minor difference is that we WARN
on communication time out. Albeit not the most useful thing for a CLI
client, it could help overall user of lttng-sessiond in time out
situation.

Most call site already handled "unknown" error correctly. For those call
site we simply end up bringing more info in regards to the timeout
issue instead of mentioning that "-11" was returned.

Note, the reclamation of "app" is handled by the poll loop and
ust_app_unregister since the socket is shutdown by lttng-ust internally
on error, including EAGAIN.

Note that the application will try to register itself back to the
lttng-sessiond based on its configuration.

Known drawbacks
=========
None

Note
==========

Some logging call sites used the ppid of the app instead of the pid.
Those have been changed to pid.

References
==========
[1] https://github.com/lttng/lttng-tools/commit/88e3c2f5610b9ac89b0923d448fee34140fc46fb

Fixes: #1384

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Change-Id: If364b5d48e7fd2b664276a0fb1b7eec2c45ed683
Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>

Acked-by: Tony Jones <tonyj@suse.com>
---
 src/bin/lttng-sessiond/ust-app.c      |  529 +++++++++++++++++++++-------------
 src/bin/lttng-sessiond/ust-consumer.c |   26 +
 2 files changed, 353 insertions(+), 202 deletions(-)

--- a/src/bin/lttng-sessiond/ust-app.c
+++ b/src/bin/lttng-sessiond/ust-app.c
@@ -276,9 +276,18 @@ void delete_ust_app_ctx(int sock, struct
 		pthread_mutex_lock(&app->sock_lock);
 		ret = ustctl_release_object(sock, ua_ctx->obj);
 		pthread_mutex_unlock(&app->sock_lock);
-		if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app sock %d release ctx obj handle %d failed with ret %d",
-					sock, ua_ctx->obj->handle, ret);
+		if (ret < 0) {
+			if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+				DBG3("UST app release ctx failed. Application is dead: pid = %d, sock = %d",
+						app->pid, app->sock);
+			} else if (ret == -EAGAIN) {
+				WARN("UST app release ctx failed. Communication time out: pid = %d, sock = %d",
+						app->pid, app->sock);
+			} else {
+				ERR("UST app release ctx obj handle %d failed with ret %d: pid = %d, sock = %d",
+						ua_ctx->obj->handle, ret,
+						app->pid, app->sock);
+			}
 		}
 		free(ua_ctx->obj);
 	}
@@ -304,9 +313,17 @@ void delete_ust_app_event(int sock, stru
 		pthread_mutex_lock(&app->sock_lock);
 		ret = ustctl_release_object(sock, ua_event->obj);
 		pthread_mutex_unlock(&app->sock_lock);
-		if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app sock %d release event obj failed with ret %d",
-					sock, ret);
+		if (ret < 0) {
+			if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+				DBG3("UST app release event failed. Application is dead: pid = %d, sock = %d",
+						app->pid, app->sock);
+			} else if (ret == -EAGAIN) {
+				WARN("UST app release event failed. Communication time out: pid = %d, sock = %d",
+						app->pid, app->sock);
+			} else {
+				ERR("UST app release event obj failed with ret %d: pid = %d, sock = %d",
+						ret, app->pid, app->sock);
+			}
 		}
 		free(ua_event->obj);
 	}
@@ -329,9 +346,17 @@ static int release_ust_app_stream(int so
 		pthread_mutex_lock(&app->sock_lock);
 		ret = ustctl_release_object(sock, stream->obj);
 		pthread_mutex_unlock(&app->sock_lock);
-		if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app sock %d release stream obj failed with ret %d",
-					sock, ret);
+		if (ret < 0) {
+			if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+				DBG3("UST app release stream failed. Application is dead: pid = %d, sock = %d",
+						app->pid, app->sock);
+			} else if (ret == -EAGAIN) {
+				WARN("UST app release stream failed. Communication time out: pid = %d, sock = %d",
+						app->pid, app->sock);
+			} else {
+				ERR("UST app release stream obj failed with ret %d: pid = %d, sock = %d",
+						ret, app->pid, app->sock);
+			}
 		}
 		lttng_fd_put(LTTNG_FD_APPS, 2);
 		free(stream->obj);
@@ -505,9 +530,20 @@ void delete_ust_app_channel(int sock, st
 		pthread_mutex_lock(&app->sock_lock);
 		ret = ustctl_release_object(sock, ua_chan->obj);
 		pthread_mutex_unlock(&app->sock_lock);
-		if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app sock %d release channel obj failed with ret %d",
-					sock, ret);
+		if (ret < 0) {
+			if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+				DBG3("UST app channel %s release failed. Application is dead: pid = %d, sock = %d",
+						ua_chan->name, app->pid,
+						app->sock);
+			} else if (ret == -EAGAIN) {
+				WARN("UST app channel %s release failed. Communication time out: pid = %d, sock = %d",
+						ua_chan->name, app->pid,
+						app->sock);
+			} else {
+				ERR("UST app channel %s release failed with ret %d: pid = %d, sock = %d",
+						ua_chan->name, ret, app->pid,
+						app->sock);
+			}
 		}
 		lttng_fd_put(LTTNG_FD_APPS, 1);
 		free(ua_chan->obj);
@@ -870,10 +906,19 @@ void delete_ust_app_session(int sock, st
 		pthread_mutex_lock(&app->sock_lock);
 		ret = ustctl_release_handle(sock, ua_sess->handle);
 		pthread_mutex_unlock(&app->sock_lock);
-		if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app sock %d release session handle failed with ret %d",
-					sock, ret);
+		if (ret < 0) {
+			if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+				DBG3("UST app release session handle failed. Application is dead: pid = %d, sock = %d",
+						app->pid, app->sock);
+			} else if (ret == -EAGAIN) {
+				WARN("UST app release session handle failed. Communication time out: pid = %d, sock = %d",
+						app->pid, app->sock);
+			} else {
+				ERR("UST app release session handle failed with ret %d: pid = %d, sock = %d",
+						ret, app->pid, app->sock);
+			}
 		}
+
 		/* Remove session from application UST object descriptor. */
 		iter.iter.node = &ua_sess->ust_objd_node.node;
 		ret = lttng_ht_del(app->ust_sessions_objd, &iter);
@@ -1315,17 +1360,17 @@ int create_ust_channel_context(struct us
 			ua_chan->obj, &ua_ctx->obj);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app create channel context failed for app (pid: %d) "
-					"with ret %d", app->pid, ret);
-		} else {
-			/*
-			 * This is normal behavior, an application can die during the
-			 * creation process. Don't report an error so the execution can
-			 * continue normally.
-			 */
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			ret = 0;
+			DBG3("UST app create channel context failed. Application is dead: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
 			ret = 0;
-			DBG3("UST app add context failed. Application is dead.");
+			WARN("UST app create channel context failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else {
+			ERR("UST app create channel context failed with ret %d: pid = %d, sock = %d",
+					ret, app->pid, app->sock);
 		}
 		goto error;
 	}
@@ -1367,17 +1412,17 @@ int set_ust_event_filter(struct ust_app_
 			ua_event->obj);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app event %s filter failed for app (pid: %d) "
-					"with ret %d", ua_event->attr.name, app->pid, ret);
-		} else {
-			/*
-			 * This is normal behavior, an application can die during the
-			 * creation process. Don't report an error so the execution can
-			 * continue normally.
-			 */
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
 			ret = 0;
-			DBG3("UST app filter event failed. Application is dead.");
+			DBG3("UST app event filter failed. Application is dead: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			ret = 0;
+			WARN("UST app event filter failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else {
+			ERR("UST app event filter failed with ret %d: pid = %d, sock = %d, object = %p",
+					ret, app->pid, app->sock, ua_event->obj);
 		}
 		goto error;
 	}
@@ -1438,17 +1483,17 @@ int set_ust_event_exclusion(struct ust_a
 	ret = ustctl_set_exclusion(app->sock, ust_exclusion, ua_event->obj);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app event %s exclusions failed for app (pid: %d) "
-					"with ret %d", ua_event->attr.name, app->pid, ret);
-		} else {
-			/*
-			 * This is normal behavior, an application can die during the
-			 * creation process. Don't report an error so the execution can
-			 * continue normally.
-			 */
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
 			ret = 0;
-			DBG3("UST app event exclusion failed. Application is dead.");
+			DBG3("UST app event exclusion failed. Application is dead: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			ret = 0;
+			WARN("UST app event exclusion failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else {
+			ERR("UST app event exclusion failed with ret %d: pid = %d, sock = %d, object = %p",
+					ret, app->pid, app->sock, ua_event->obj);
 		}
 		goto error;
 	}
@@ -1475,19 +1520,18 @@ static int disable_ust_event(struct ust_
 	ret = ustctl_disable(app->sock, ua_event->obj);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app event %s disable failed for app (pid: %d) "
-					"and session handle %d with ret %d",
-					ua_event->attr.name, app->pid, ua_sess->handle, ret);
-		} else {
-			/*
-			 * This is normal behavior, an application can die during the
-			 * creation process. Don't report an error so the execution can
-			 * continue normally.
-			 */
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
 			ret = 0;
-			DBG3("UST app disable event failed. Application is dead.");
-		}
+			DBG3("UST app disable event failed. Application is dead: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+ 			ret = 0;
+			WARN("UST app disable event failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else {
+			ERR("UST app disable event failed with ret %d: pid = %d, sock = %d, object = %p",
+					ret, app->pid, app->sock, ua_event->obj);
+ 		}
 		goto error;
 	}
 
@@ -1513,23 +1557,23 @@ static int disable_ust_channel(struct us
 	ret = ustctl_disable(app->sock, ua_chan->obj);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app channel %s disable failed for app (pid: %d) "
-					"and session handle %d with ret %d",
-					ua_chan->name, app->pid, ua_sess->handle, ret);
-		} else {
-			/*
-			 * This is normal behavior, an application can die during the
-			 * creation process. Don't report an error so the execution can
-			 * continue normally.
-			 */
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
 			ret = 0;
-			DBG3("UST app disable channel failed. Application is dead.");
+			DBG3("UST app disable channel failed. Application is dead: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			ret = 0;
+			WARN("UST app disable channel failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else {
+			ERR("UST app channel %s disable failed, session handle %d, with ret %d: pid = %d, sock = %d",
+					ua_chan->name, ua_sess->handle, ret,
+					app->pid, app->sock);
 		}
 		goto error;
 	}
 
-	DBG2("UST app channel %s disabled successfully for app (pid: %d)",
+	DBG2("UST app channel %s disabled successfully for app: pid = %d",
 			ua_chan->name, app->pid);
 
 error:
@@ -1551,25 +1595,25 @@ static int enable_ust_channel(struct ust
 	ret = ustctl_enable(app->sock, ua_chan->obj);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app channel %s enable failed for app (pid: %d) "
-					"and session handle %d with ret %d",
-					ua_chan->name, app->pid, ua_sess->handle, ret);
-		} else {
-			/*
-			 * This is normal behavior, an application can die during the
-			 * creation process. Don't report an error so the execution can
-			 * continue normally.
-			 */
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
 			ret = 0;
-			DBG3("UST app enable channel failed. Application is dead.");
+			DBG3("UST app channel %s enable failed. Application is dead: pid = %d, sock = %d",
+					ua_chan->name, app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			ret = 0;
+			WARN("UST app channel %s enable failed. Communication time out: pid = %d, sock = %d",
+					ua_chan->name, app->pid, app->sock);
+		} else {
+			ERR("UST app channel %s enable failed, session handle %d, with ret %d: pid = %d, sock = %d",
+					ua_chan->name, ua_sess->handle, ret,
+					app->pid, app->sock);
 		}
 		goto error;
 	}
 
 	ua_chan->enabled = 1;
 
-	DBG2("UST app channel %s enabled successfully for app (pid: %d)",
+	DBG2("UST app channel %s enabled successfully for app: pid = %d",
 			ua_chan->name, app->pid);
 
 error:
@@ -1591,19 +1635,18 @@ static int enable_ust_event(struct ust_a
 	ret = ustctl_enable(app->sock, ua_event->obj);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app event %s enable failed for app (pid: %d) "
-					"and session handle %d with ret %d",
-					ua_event->attr.name, app->pid, ua_sess->handle, ret);
-		} else {
-			/*
-			 * This is normal behavior, an application can die during the
-			 * creation process. Don't report an error so the execution can
-			 * continue normally.
-			 */
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
 			ret = 0;
-			DBG3("UST app enable event failed. Application is dead.");
-		}
+			DBG3("UST app enable event failed. Application is dead: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+ 			ret = 0;
+			WARN("UST app enable event failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else {
+			ERR("UST app enable event failed with ret %d: pid = %d, sock = %d, object = %p",
+					ret, app->pid, app->sock, ua_event->obj);
+ 		}
 		goto error;
 	}
 
@@ -1640,6 +1683,13 @@ static int send_channel_pid_to_ust(struc
 	if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
 		ret = -ENOTCONN;	/* Caused by app exiting. */
 		goto error;
+	} else if (ret == -EAGAIN) {
+		/* Caused by timeout. */
+		WARN("Communication with application %d timed out on send_channel for channel \"%s\" of session \"%" PRIu64 "\".",
+				app->pid, ua_chan->name, ua_sess->tracing_id);
+		/* Treat this the same way as an application that is exiting. */
+		ret = -ENOTCONN;
+		goto error;
 	} else if (ret < 0) {
 		goto error;
 	}
@@ -1650,8 +1700,16 @@ static int send_channel_pid_to_ust(struc
 	cds_list_for_each_entry_safe(stream, stmp, &ua_chan->streams.head, list) {
 		ret = ust_consumer_send_stream_to_ust(app, ua_chan, stream);
 		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
-			ret = -ENOTCONN;	/* Caused by app exiting. */
+			ret = -ENOTCONN; /* Caused by app exiting. */
 			goto error;
+		} else if (ret == -EAGAIN) {
+			/* Caused by timeout. */
+			WARN("Communication with application %d timed out on send_stream for stream \"%s\" of channel \"%s\" of session \"%" PRIu64 "\".",
+					app->pid, stream->name, ua_chan->name,
+					ua_sess->tracing_id);
+			/* Treat this the same way as an application that is
+			 * exiting. */
+			ret = -ENOTCONN;
 		} else if (ret < 0) {
 			goto error;
 		}
@@ -1686,26 +1744,26 @@ int create_ust_event(struct ust_app *app
 			&ua_event->obj);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			abort();
-			ERR("Error ustctl create event %s for app pid: %d with ret %d",
-					ua_event->attr.name, app->pid, ret);
-		} else {
-			/*
-			 * This is normal behavior, an application can die during the
-			 * creation process. Don't report an error so the execution can
-			 * continue normally.
-			 */
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
 			ret = 0;
-			DBG3("UST app create event failed. Application is dead.");
-		}
+			DBG3("UST app create event failed. Application is dead: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+ 			ret = 0;
+			WARN("UST app create event failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else {
+			ERR("UST app create event '%s' failed with ret %d: pid = %d, sock = %d",
+					ua_event->attr.name, ret, app->pid,
+					app->sock);
+ 		}
 		goto error;
 	}
 
 	ua_event->handle = ua_event->obj->handle;
 
-	DBG2("UST app event %s created successfully for pid:%d",
-			ua_event->attr.name, app->pid);
+	DBG2("UST app event %s created successfully for pid:%d object = %p",
+			ua_event->attr.name, app->pid, ua_event->obj);
 
 	health_code_update();
 
@@ -2159,18 +2217,17 @@ static int find_or_create_ust_app_sessio
 		ret = ustctl_create_session(app->sock);
 		pthread_mutex_unlock(&app->sock_lock);
 		if (ret < 0) {
-			if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-				ERR("Creating session for app pid %d with ret %d",
-						app->pid, ret);
-			} else {
-				DBG("UST app creating session failed. Application is dead");
-				/*
-				 * This is normal behavior, an application can die during the
-				 * creation process. Don't report an error so the execution can
-				 * continue normally. This will get flagged ENOTCONN and the
-				 * caller will handle it.
-				 */
+			if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+				DBG("UST app creating session failed. Application is dead: pid = %d, sock = %d",
+						app->pid, app->sock);
+				ret = 0;
+			} else if (ret == -EAGAIN) {
+				DBG("UST app creating session failed. Communication time out: pid = %d, sock = %d",
+						app->pid, app->sock);
 				ret = 0;
+			} else {
+				ERR("UST app creating session failed with ret %d: pid = %d, sock =%d",
+						ret, app->pid, app->sock);
 			}
 			delete_ust_app_session(-1, ua_sess, app);
 			if (ret != -ENOMEM) {
@@ -2752,6 +2809,13 @@ static int send_channel_uid_to_ust(struc
 	if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
 		ret = -ENOTCONN;	/* Caused by app exiting. */
 		goto error;
+	} else if (ret == -EAGAIN) {
+		/* Caused by timeout. */
+		WARN("Communication with application %d timed out on send_channel for channel \"%s\" of session \"%" PRIu64 "\".",
+				app->pid, ua_chan->name, ua_sess->tracing_id);
+		/* Treat this the same way as an application that is exiting. */
+		ret = -ENOTCONN;
+		goto error;
 	} else if (ret < 0) {
 		goto error;
 	}
@@ -2770,10 +2834,21 @@ static int send_channel_uid_to_ust(struc
 
 		ret = ust_consumer_send_stream_to_ust(app, ua_chan, &stream);
 		if (ret < 0) {
-			(void) release_ust_app_stream(-1, &stream, app);
 			if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
 				ret = -ENOTCONN; /* Caused by app exiting. */
+			} else if (ret == -EAGAIN) {
+				/*
+				 * Caused by timeout.
+				 * Treat this the same way as an application
+				 * that is exiting.
+				 */
+				WARN("Communication with application %d timed out on send_stream for stream \"%s\" of channel \"%s\" of session \"%" PRIu64 "\".",
+						app->pid, stream.name,
+						ua_chan->name,
+						ua_sess->tracing_id);
+				ret = -ENOTCONN;
 			}
+			(void) release_ust_app_stream(-1, &stream, app);
 			goto error_stream_unlock;
 		}
 
@@ -3163,8 +3238,8 @@ int create_ust_app_event(struct ust_app_
 
 	add_unique_ust_app_event(ua_chan, ua_event);
 
-	DBG2("UST app create event %s for PID %d completed", ua_event->name,
-			app->pid);
+	DBG2("UST app create event completed: app = '%s' pid = %d",
+			app->name, app->pid);
 
 end:
 	return ret;
@@ -3411,8 +3486,8 @@ void ust_app_add(struct ust_app *app)
 	lttng_ht_node_init_ulong(&app->notify_sock_n, app->notify_sock);
 	lttng_ht_add_unique_ulong(ust_app_ht_by_notify_sock, &app->notify_sock_n);
 
-	DBG("App registered with pid:%d ppid:%d uid:%d gid:%d sock:%d name:%s "
-			"notify_sock:%d (version %d.%d)", app->pid, app->ppid, app->uid,
+	DBG("App registered with pid:%d ppid:%d uid:%d gid:%d sock =%d name:%s "
+			"notify_sock =%d (version %d.%d)", app->pid, app->ppid, app->uid,
 			app->gid, app->sock, app->name, app->notify_sock, app->v_major,
 			app->v_minor);
 
@@ -3435,10 +3510,15 @@ int ust_app_version(struct ust_app *app)
 	ret = ustctl_tracer_version(app->sock, &app->version);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -LTTNG_UST_ERR_EXITING && ret != -EPIPE) {
-			ERR("UST app %d version failed with ret %d", app->sock, ret);
+		if (ret == -LTTNG_UST_ERR_EXITING || ret == -EPIPE) {
+			DBG3("UST app version failed. Application is dead: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			WARN("UST app version failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
 		} else {
-			DBG3("UST app %d version failed. Application is dead", app->sock);
+			ERR("UST app version failed with ret %d: pid = %d, sock = %d",
+					ret, app->pid, app->sock);
 		}
 	}
 
@@ -3620,11 +3700,6 @@ int ust_app_list_events(struct lttng_eve
 							app->sock, ret);
 				} else {
 					DBG3("UST app tp list get failed. Application is dead");
-					/*
-					 * This is normal behavior, an application can die during the
-					 * creation process. Don't report an error so the execution can
-					 * continue normally. Continue normal execution.
-					 */
 					break;
 				}
 				free(tmp_event);
@@ -3679,8 +3754,17 @@ int ust_app_list_events(struct lttng_eve
 		}
 		ret = ustctl_release_handle(app->sock, handle);
 		pthread_mutex_unlock(&app->sock_lock);
-		if (ret < 0 && ret != -LTTNG_UST_ERR_EXITING && ret != -EPIPE) {
-			ERR("Error releasing app handle for app %d with ret %d", app->sock, ret);
+		if (ret < 0) {
+			if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+				DBG3("Error releasing app handle. Application died: pid = %d, sock = %d",
+						app->pid, app->sock);
+			} else if (ret == -EAGAIN) {
+				WARN("Error releasing app handle. Communication time out: pid = %d, sock = %d",
+						app->pid, app->sock);
+			} else {
+				ERR("Error releasing app handle with ret %d: pid = %d, sock = %d",
+						ret, app->pid, app->sock);
+			}
 		}
 	}
 
@@ -3751,11 +3835,6 @@ int ust_app_list_event_fields(struct ltt
 							app->sock, ret);
 				} else {
 					DBG3("UST app tp list field failed. Application is dead");
-					/*
-					 * This is normal behavior, an application can die during the
-					 * creation process. Don't report an error so the execution can
-					 * continue normally. Reset list and count for next app.
-					 */
 					break;
 				}
 				free(tmp_event);
@@ -4373,18 +4452,20 @@ skip_setup:
 	ret = ustctl_start_session(app->sock, ua_sess->handle);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("Error starting tracing for app pid: %d (ret: %d)",
-					app->pid, ret);
-		} else {
-			DBG("UST app start session failed. Application is dead.");
-			/*
-			 * This is normal behavior, an application can die during the
-			 * creation process. Don't report an error so the execution can
-			 * continue normally.
-			 */
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			DBG3("UST app start session failed. Application is dead: pid = %d, sock = %d",
+					app->pid, app->sock);
+			pthread_mutex_unlock(&ua_sess->lock);
+			goto end;
+		} else if (ret == -EAGAIN) {
+			WARN("UST app start session failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
 			pthread_mutex_unlock(&ua_sess->lock);
 			goto end;
+
+		} else {
+			ERR("UST app start session failed with ret %d: pid = %d, sock = %d",
+					ret, app->pid, app->sock);
 		}
 		goto error_unlock;
 	}
@@ -4401,9 +4482,17 @@ skip_setup:
 	pthread_mutex_lock(&app->sock_lock);
 	ret = ustctl_wait_quiescent(app->sock);
 	pthread_mutex_unlock(&app->sock_lock);
-	if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-		ERR("UST app wait quiescent failed for app pid %d ret %d",
-				app->pid, ret);
+	if (ret < 0) {
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			DBG3("UST app wait quiescent failed. Application is dead: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			WARN("UST app wait quiescent failed. Communication time out: pid =  %d, sock = %d",
+					app->pid, app->sock);
+		} else {
+			ERR("UST app wait quiescent failed with ret %d: pid %d, sock = %d",
+					ret, app->pid, app->sock);
+		}
 	}
 
 end:
@@ -4465,17 +4554,18 @@ int ust_app_stop_trace(struct ltt_ust_se
 	ret = ustctl_stop_session(app->sock, ua_sess->handle);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("Error stopping tracing for app pid: %d (ret: %d)",
-					app->pid, ret);
-		} else {
-			DBG("UST app stop session failed. Application is dead.");
-			/*
-			 * This is normal behavior, an application can die during the
-			 * creation process. Don't report an error so the execution can
-			 * continue normally.
-			 */
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			DBG3("UST app stop session failed. Application is dead: pid = %d, sock = %d",
+					app->pid, app->sock);
+			goto end_unlock;
+		} else if (ret == -EAGAIN) {
+			WARN("UST app stop session failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
 			goto end_unlock;
+
+		} else {
+			ERR("UST app stop session failed with ret %d: pid = %d, sock = %d",
+					ret, app->pid, app->sock);
 		}
 		goto error_rcu_unlock;
 	}
@@ -4487,9 +4577,17 @@ int ust_app_stop_trace(struct ltt_ust_se
 	pthread_mutex_lock(&app->sock_lock);
 	ret = ustctl_wait_quiescent(app->sock);
 	pthread_mutex_unlock(&app->sock_lock);
-	if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-		ERR("UST app wait quiescent failed for app pid %d ret %d",
-				app->pid, ret);
+	if (ret < 0) {
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			DBG3("UST app wait quiescent failed. Application is dead: pid= %d, sock = %d)",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			WARN("UST app wait quiescent failed. Communication time out: pid= %d, sock = %d)",
+					app->pid, app->sock);
+		} else {
+			ERR("UST app wait quiescent failed with ret %d: pid= %d, sock = %d)",
+					ret, app->pid, app->sock);
+		}
 	}
 
 	health_code_update();
@@ -4837,9 +4935,17 @@ static int destroy_trace(struct ltt_ust_
 	pthread_mutex_lock(&app->sock_lock);
 	ret = ustctl_wait_quiescent(app->sock);
 	pthread_mutex_unlock(&app->sock_lock);
-	if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-		ERR("UST app wait quiescent failed for app pid %d ret %d",
-				app->pid, ret);
+	if (ret < 0) {
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			DBG3("UST app wait quiescent failed. Application is dead: pid= %d, sock = %d)",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			WARN("UST app wait quiescent failed. Communication time out: pid= %d, sock = %d)",
+					app->pid, app->sock);
+		} else {
+			ERR("UST app wait quiescent failed with ret %d: pid= %d, sock = %d)",
+					ret, app->pid, app->sock);
+		}
 	}
 end:
 	rcu_read_unlock();
@@ -5406,16 +5512,21 @@ static int reply_ust_register_channel(in
 
 reply:
 	DBG3("UST app replying to register channel key %" PRIu64
-			" with id %u, type: %d, ret: %d", chan_reg_key, chan_id, type,
+			" with id %u, type = %d, ret = %d", chan_reg_key, chan_id, type,
 			ret_code);
 
 	ret = ustctl_reply_register_channel(sock, chan_id, type, ret_code);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app reply channel failed with ret %d", ret);
-		} else {
-			DBG3("UST app reply channel failed. Application died");
-		}
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			DBG3("UST app reply channel failed. Application died: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			WARN("UST app reply channel failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
+ 		} else {
+			ERR("UST app reply channel failed with ret %d: pid = %d, sock = %d",
+					ret, app->pid, app->sock);
+ 		}
 		goto error;
 	}
 
@@ -5508,10 +5619,15 @@ static int add_event_ust_registry(int so
 	 */
 	ret = ustctl_reply_register_event(sock, event_id, ret_code);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app reply event failed with ret %d", ret);
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			DBG3("UST app reply event failed. Application died: pid = %d, sock = %d.",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			WARN("UST app reply event failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
 		} else {
-			DBG3("UST app reply event failed. Application died");
+			ERR("UST app reply event failed with ret %d: pid = %d, sock = %d",
+					ret, app->pid, app->sock);
 		}
 		/*
 		 * No need to wipe the create event since the application socket will
@@ -5566,14 +5682,14 @@ static int add_enum_ust_registry(int soc
 	ua_sess = find_session_by_objd(app, sobjd);
 	if (!ua_sess) {
 		/* Return an error since this is not an error */
-		DBG("Application session is being torn down (session not found). Aborting enum registration.");
+		DBG("Application session is being torn down (session not found Aborting enum registration.");
 		free(entries);
 		goto error_rcu_unlock;
 	}
 
 	registry = get_session_registry(ua_sess);
 	if (!registry) {
-		DBG("Application session is being torn down (registry not found). Aborting enum registration.");
+		DBG("Application session is being torn down (registry not found Aborting enum registration.");
 		free(entries);
 		goto error_rcu_unlock;
 	}
@@ -5596,10 +5712,15 @@ static int add_enum_ust_registry(int soc
 	 */
 	ret = ustctl_reply_register_enum(sock, enum_id, ret_code);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app reply enum failed with ret %d", ret);
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			DBG3("UST app reply enum failed. Application died: pid = %d, sock = %d",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			WARN("UST app reply enum failed. Communication time out: pid = %d, sock = %d",
+					app->pid, app->sock);
 		} else {
-			DBG3("UST app reply enum failed. Application died");
+			ERR("UST app reply enum failed with ret %d: pid = %d, sock = %d",
+					ret, app->pid, app->sock);
 		}
 		/*
 		 * No need to wipe the create enum since the application socket will
@@ -5631,10 +5752,15 @@ int ust_app_recv_notify(int sock)
 
 	ret = ustctl_recv_notify(sock, &cmd);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("UST app recv notify failed with ret %d", ret);
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			DBG3("UST app recv notify failed. Application died: sock = %d",
+					sock);
+		} else if (ret == -EAGAIN) {
+			WARN("UST app recv notify failed. Communication time out: sock = %d",
+					sock);
 		} else {
-			DBG3("UST app recv notify failed. Application died");
+			ERR("UST app recv notify failed with ret %d: sock = %d",
+					ret, sock);
 		}
 		goto error;
 	}
@@ -5653,10 +5779,15 @@ int ust_app_recv_notify(int sock)
 				&loglevel_value, &sig, &nr_fields, &fields,
 				&model_emf_uri);
 		if (ret < 0) {
-			if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-				ERR("UST app recv event failed with ret %d", ret);
+			if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+				DBG3("UST app recv event failed. Application died: sock = %d",
+						sock);
+			} else if (ret == -EAGAIN) {
+				WARN("UST app recv event failed. Communication time out: sock = %d",
+						sock);
 			} else {
-				DBG3("UST app recv event failed. Application died");
+				ERR("UST app recv event failed with ret %d: sock = %d",
+						ret, sock);
 			}
 			goto error;
 		}
@@ -5686,10 +5817,15 @@ int ust_app_recv_notify(int sock)
 		ret = ustctl_recv_register_channel(sock, &sobjd, &cobjd, &nr_fields,
 				&fields);
 		if (ret < 0) {
-			if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-				ERR("UST app recv channel failed with ret %d", ret);
+			if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+				DBG3("UST app recv channel failed. Application died: sock = %d",
+						sock);
+			} else if (ret == -EAGAIN) {
+				WARN("UST app recv channel failed. Communication time out: sock = %d",
+						sock);
 			} else {
-				DBG3("UST app recv channel failed. Application died");
+				ERR("UST app recv channel failed with ret %d: sock = %d)",
+						ret, sock);
 			}
 			goto error;
 		}
@@ -5719,10 +5855,15 @@ int ust_app_recv_notify(int sock)
 		ret = ustctl_recv_register_enum(sock, &sobjd, name,
 				&entries, &nr_entries);
 		if (ret < 0) {
-			if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-				ERR("UST app recv enum failed with ret %d", ret);
+			if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+				DBG3("UST app recv enum failed. Application died: sock = %d",
+						sock);
+			} else if (ret == -EAGAIN) {
+				WARN("UST app recv enum failed. Communication time out: sock = %d",
+						sock);
 			} else {
-				DBG3("UST app recv enum failed. Application died");
+				ERR("UST app recv enum failed with ret %d: sock = %d",
+						ret, sock);
 			}
 			goto error;
 		}
--- a/src/bin/lttng-sessiond/ust-consumer.c
+++ b/src/bin/lttng-sessiond/ust-consumer.c
@@ -374,11 +374,16 @@ int ust_consumer_send_stream_to_ust(stru
 	ret = ustctl_send_stream_to_ust(app->sock, channel->obj, stream->obj);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("ustctl send stream handle %d to app pid: %d with ret %d",
-					stream->obj->handle, app->pid, ret);
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			DBG3("UST app send stream to ust failed. Application is dead. (pid: %d, sock: %d).",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			WARN("UST app send stream to ust failed. Communication time out (pid: %d, sock: %d).",
+					app->pid, app->sock);
 		} else {
-			DBG3("UST app send stream to ust failed. Application is dead.");
+			ERR("UST app send stream, handle %d, to ust failed with ret %d (pid: %d, sock: %d).",
+					stream->obj->handle, ret, app->pid,
+					app->sock);
 		}
 		goto error;
 	}
@@ -411,11 +416,16 @@ int ust_consumer_send_channel_to_ust(str
 	ret = ustctl_send_channel_to_ust(app->sock, ua_sess->handle, channel->obj);
 	pthread_mutex_unlock(&app->sock_lock);
 	if (ret < 0) {
-		if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-			ERR("Error ustctl send channel %s to app pid: %d with ret %d",
-					channel->name, app->pid, ret);
+		if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+			DBG3("UST app send channel to ust failed. Application is dead (pid: %d, sock: %d).",
+					app->pid, app->sock);
+		} else if (ret == -EAGAIN) {
+			WARN("UST app send channel to ust failed. Communication timeout (pid: %d, sock: %d).",
+					app->pid, app->sock);
 		} else {
-			DBG3("UST app send channel to ust failed. Application is dead.");
+			ERR("UST app send channel %s, to ust failed with ret %d (pid: %d, sock: %d).",
+					channel->name, ret, app->pid,
+					app->sock);
 		}
 		goto error;
 	}
openSUSE Build Service is sponsored by