Fix: ust: app stuck on recv message during UST comm timeout scenario
[lttng-tools.git] / src / bin / lttng-sessiond / ust-app.c
index 4cc21b42cab971bd954447b55023f941db077804..8305f7efee77ef9ce5ac81f0e762a95e7189e205 100644 (file)
@@ -27,7 +27,6 @@
 #include <sys/types.h>
 #include <unistd.h>
 #include <urcu/compiler.h>
-#include <lttng/ust-error.h>
 #include <signal.h>
 
 #include <common/common.h>
 #include "health-sessiond.h"
 #include "ust-app.h"
 #include "ust-consumer.h"
-#include "ust-ctl.h"
+#include "lttng-ust-ctl.h"
+#include "lttng-ust-error.h"
 #include "utils.h"
 #include "session.h"
 #include "lttng-sessiond.h"
 #include "notification-thread-commands.h"
 #include "rotate.h"
 
+struct lttng_ht *ust_app_ht;
+struct lttng_ht *ust_app_ht_by_sock;
+struct lttng_ht *ust_app_ht_by_notify_sock;
+
 static
 int ust_app_flush_app_session(struct ust_app *app, struct ust_app_session *ua_sess);
 
@@ -250,7 +254,8 @@ static struct ust_registry_session *get_session_registry(
        case LTTNG_BUFFER_PER_UID:
        {
                struct buffer_reg_uid *reg_uid = buffer_reg_uid_find(
-                               ua_sess->tracing_id, ua_sess->bits_per_long, ua_sess->uid);
+                               ua_sess->tracing_id, ua_sess->bits_per_long,
+                               ua_sess->real_credentials.uid);
                if (!reg_uid) {
                        goto error;
                }
@@ -281,9 +286,18 @@ void delete_ust_app_ctx(int sock, struct ust_app_ctx *ua_ctx,
                pthread_mutex_lock(&app->sock_lock);
                ret = ustctl_release_object(sock, ua_ctx->obj);
                pthread_mutex_unlock(&app->sock_lock);
-               if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app sock %d release ctx obj handle %d failed with ret %d",
-                                       sock, ua_ctx->obj->handle, ret);
+               if (ret < 0) {
+                       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                               DBG3("UST app release ctx failed. Application is dead: pid = %d, sock = %d",
+                                               app->pid, app->sock);
+                       } else if (ret == -EAGAIN) {
+                               WARN("UST app release ctx failed. Communication time out: pid = %d, sock = %d",
+                                               app->pid, app->sock);
+                       } else {
+                               ERR("UST app release ctx obj handle %d failed with ret %d: pid = %d, sock = %d",
+                                               ua_ctx->obj->handle, ret,
+                                               app->pid, app->sock);
+                       }
                }
                free(ua_ctx->obj);
        }
@@ -309,9 +323,17 @@ void delete_ust_app_event(int sock, struct ust_app_event *ua_event,
                pthread_mutex_lock(&app->sock_lock);
                ret = ustctl_release_object(sock, ua_event->obj);
                pthread_mutex_unlock(&app->sock_lock);
-               if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app sock %d release event obj failed with ret %d",
-                                       sock, ret);
+               if (ret < 0) {
+                       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                               DBG3("UST app release event failed. Application is dead: pid = %d, sock = %d",
+                                               app->pid, app->sock);
+                       } else if (ret == -EAGAIN) {
+                               WARN("UST app release event failed. Communication time out: pid = %d, sock = %d",
+                                               app->pid, app->sock);
+                       } else {
+                               ERR("UST app release event obj failed with ret %d: pid = %d, sock = %d",
+                                               ret, app->pid, app->sock);
+                       }
                }
                free(ua_event->obj);
        }
@@ -334,9 +356,17 @@ static int release_ust_app_stream(int sock, struct ust_app_stream *stream,
                pthread_mutex_lock(&app->sock_lock);
                ret = ustctl_release_object(sock, stream->obj);
                pthread_mutex_unlock(&app->sock_lock);
-               if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app sock %d release stream obj failed with ret %d",
-                                       sock, ret);
+               if (ret < 0) {
+                       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                               DBG3("UST app release stream failed. Application is dead: pid = %d, sock = %d",
+                                               app->pid, app->sock);
+                       } else if (ret == -EAGAIN) {
+                               WARN("UST app release stream failed. Communication time out: pid = %d, sock = %d",
+                                               app->pid, app->sock);
+                       } else {
+                               ERR("UST app release stream obj failed with ret %d: pid = %d, sock = %d",
+                                               ret, app->pid, app->sock);
+                       }
                }
                lttng_fd_put(LTTNG_FD_APPS, 2);
                free(stream->obj);
@@ -510,9 +540,20 @@ void delete_ust_app_channel(int sock, struct ust_app_channel *ua_chan,
                pthread_mutex_lock(&app->sock_lock);
                ret = ustctl_release_object(sock, ua_chan->obj);
                pthread_mutex_unlock(&app->sock_lock);
-               if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app sock %d release channel obj failed with ret %d",
-                                       sock, ret);
+               if (ret < 0) {
+                       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                               DBG3("UST app channel %s release failed. Application is dead: pid = %d, sock = %d",
+                                               ua_chan->name, app->pid,
+                                               app->sock);
+                       } else if (ret == -EAGAIN) {
+                               WARN("UST app channel %s release failed. Communication time out: pid = %d, sock = %d",
+                                               ua_chan->name, app->pid,
+                                               app->sock);
+                       } else {
+                               ERR("UST app channel %s release failed with ret %d: pid = %d, sock = %d",
+                                               ua_chan->name, ret, app->pid,
+                                               app->sock);
+                       }
                }
                lttng_fd_put(LTTNG_FD_APPS, 1);
                free(ua_chan->obj);
@@ -740,6 +781,10 @@ error:
  * nullified. The session lock MUST be held unless the application is
  * in the destroy path.
  *
+ * Do not hold the registry lock while communicating with the consumerd, because
+ * doing so causes inter-process deadlocks between consumerd and sessiond with
+ * the metadata request notification.
+ *
  * Return 0 on success else a negative value.
  */
 static int close_metadata(struct ust_registry_session *registry,
@@ -747,6 +792,8 @@ static int close_metadata(struct ust_registry_session *registry,
 {
        int ret;
        struct consumer_socket *socket;
+       uint64_t metadata_key;
+       bool registry_was_already_closed;
 
        assert(registry);
        assert(consumer);
@@ -754,8 +801,19 @@ static int close_metadata(struct ust_registry_session *registry,
        rcu_read_lock();
 
        pthread_mutex_lock(&registry->lock);
+       metadata_key = registry->metadata_key;
+       registry_was_already_closed = registry->metadata_closed;
+       if (metadata_key != 0) {
+               /*
+                * Metadata closed. Even on error this means that the consumer
+                * is not responding or not found so either way a second close
+                * should NOT be emit for this registry.
+                */
+               registry->metadata_closed = 1;
+       }
+       pthread_mutex_unlock(&registry->lock);
 
-       if (!registry->metadata_key || registry->metadata_closed) {
+       if (metadata_key == 0 || registry_was_already_closed) {
                ret = 0;
                goto end;
        }
@@ -765,23 +823,15 @@ static int close_metadata(struct ust_registry_session *registry,
                        consumer);
        if (!socket) {
                ret = -1;
-               goto error;
+               goto end;
        }
 
-       ret = consumer_close_metadata(socket, registry->metadata_key);
+       ret = consumer_close_metadata(socket, metadata_key);
        if (ret < 0) {
-               goto error;
+               goto end;
        }
 
-error:
-       /*
-        * Metadata closed. Even on error this means that the consumer is not
-        * responding or not found so either way a second close should NOT be emit
-        * for this registry.
-        */
-       registry->metadata_closed = 1;
 end:
-       pthread_mutex_unlock(&registry->lock);
        rcu_read_unlock();
        return ret;
 }
@@ -866,10 +916,19 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess,
                pthread_mutex_lock(&app->sock_lock);
                ret = ustctl_release_handle(sock, ua_sess->handle);
                pthread_mutex_unlock(&app->sock_lock);
-               if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app sock %d release session handle failed with ret %d",
-                                       sock, ret);
+               if (ret < 0) {
+                       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                               DBG3("UST app release session handle failed. Application is dead: pid = %d, sock = %d",
+                                               app->pid, app->sock);
+                       } else if (ret == -EAGAIN) {
+                               WARN("UST app release session handle failed. Communication time out: pid = %d, sock = %d",
+                                               app->pid, app->sock);
+                       } else {
+                               ERR("UST app release session handle failed with ret %d: pid = %d, sock = %d",
+                                               ret, app->pid, app->sock);
+                       }
                }
+
                /* Remove session from application UST object descriptor. */
                iter.iter.node = &ua_sess->ust_objd_node.node;
                ret = lttng_ht_del(app->ust_sessions_objd, &iter);
@@ -1099,7 +1158,7 @@ struct ust_app_event *alloc_ust_app_event(char *name,
        /* Init most of the default value by allocating and zeroing */
        ua_event = zmalloc(sizeof(struct ust_app_event));
        if (ua_event == NULL) {
-               PERROR("malloc");
+               PERROR("Failed to allocate ust_app_event structure");
                goto error;
        }
 
@@ -1311,17 +1370,17 @@ int create_ust_channel_context(struct ust_app_channel *ua_chan,
                        ua_chan->obj, &ua_ctx->obj);
        pthread_mutex_unlock(&app->sock_lock);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app create channel context failed for app (pid: %d) "
-                                       "with ret %d", app->pid, ret);
-               } else {
-                       /*
-                        * This is normal behavior, an application can die during the
-                        * creation process. Don't report an error so the execution can
-                        * continue normally.
-                        */
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
                        ret = 0;
-                       DBG3("UST app add context failed. Application is dead.");
+                       DBG3("UST app create channel context failed. Application is dead: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
+                       ret = 0;
+                       WARN("UST app create channel context failed. Communication time out: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else {
+                       ERR("UST app create channel context failed with ret %d: pid = %d, sock = %d",
+                                       ret, app->pid, app->sock);
                }
                goto error;
        }
@@ -1363,17 +1422,18 @@ int set_ust_event_filter(struct ust_app_event *ua_event,
                        ua_event->obj);
        pthread_mutex_unlock(&app->sock_lock);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app event %s filter failed for app (pid: %d) "
-                                       "with ret %d", ua_event->attr.name, app->pid, ret);
-               } else {
-                       /*
-                        * This is normal behavior, an application can die during the
-                        * creation process. Don't report an error so the execution can
-                        * continue normally.
-                        */
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       ret = 0;
+                       DBG3("UST app set filter failed. Application is dead: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
                        ret = 0;
-                       DBG3("UST app filter event failed. Application is dead.");
+                       DBG3("UST app set filter failed. Communication timeout: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else {
+                       ERR("UST app event set filter failed with ret %d: pid = %d, sock = %d",
+                                       ret, app->pid,
+                                       app->sock);
                }
                goto error;
        }
@@ -1434,17 +1494,17 @@ int set_ust_event_exclusion(struct ust_app_event *ua_event,
        ret = ustctl_set_exclusion(app->sock, ust_exclusion, ua_event->obj);
        pthread_mutex_unlock(&app->sock_lock);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app event %s exclusions failed for app (pid: %d) "
-                                       "with ret %d", ua_event->attr.name, app->pid, ret);
-               } else {
-                       /*
-                        * This is normal behavior, an application can die during the
-                        * creation process. Don't report an error so the execution can
-                        * continue normally.
-                        */
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
                        ret = 0;
-                       DBG3("UST app event exclusion failed. Application is dead.");
+                       DBG3("UST app event exclusion failed. Application is dead: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
+                       ret = 0;
+                       WARN("UST app event exclusion failed. Communication time out(pid: %d, sock = %d",
+                                       app->pid, app->sock);
+               } else {
+                       ERR("UST app event exclusions failed with ret %d: pid = %d, sock = %d, event = %s",
+                                       ret, app->pid, app->sock, ua_event->name);
                }
                goto error;
        }
@@ -1471,18 +1531,17 @@ static int disable_ust_event(struct ust_app *app,
        ret = ustctl_disable(app->sock, ua_event->obj);
        pthread_mutex_unlock(&app->sock_lock);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app event %s disable failed for app (pid: %d) "
-                                       "and session handle %d with ret %d",
-                                       ua_event->attr.name, app->pid, ua_sess->handle, ret);
-               } else {
-                       /*
-                        * This is normal behavior, an application can die during the
-                        * creation process. Don't report an error so the execution can
-                        * continue normally.
-                        */
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       ret = 0;
+                       DBG3("UST app disable event failed. Application is dead: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
                        ret = 0;
-                       DBG3("UST app disable event failed. Application is dead.");
+                       WARN("UST app disable event failed. Communication time out: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else {
+                       ERR("UST app disable event failed with ret %d: pid = %d, sock = %d , name = %s",
+                                       ret, app->pid, app->sock, ua_event->attr.name);
                }
                goto error;
        }
@@ -1509,23 +1568,23 @@ static int disable_ust_channel(struct ust_app *app,
        ret = ustctl_disable(app->sock, ua_chan->obj);
        pthread_mutex_unlock(&app->sock_lock);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app channel %s disable failed for app (pid: %d) "
-                                       "and session handle %d with ret %d",
-                                       ua_chan->name, app->pid, ua_sess->handle, ret);
-               } else {
-                       /*
-                        * This is normal behavior, an application can die during the
-                        * creation process. Don't report an error so the execution can
-                        * continue normally.
-                        */
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       ret = 0;
+                       DBG3("UST app disable channel failed. Application is dead: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
                        ret = 0;
-                       DBG3("UST app disable channel failed. Application is dead.");
+                       WARN("UST app disable channel failed. Communication time out: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else {
+                       ERR("UST app channel %s disable failed, session handle %d, with ret %d: pid = %d, sock = %d",
+                                       ua_chan->name, ua_sess->handle, ret,
+                                       app->pid, app->sock);
                }
                goto error;
        }
 
-       DBG2("UST app channel %s disabled successfully for app (pid: %d)",
+       DBG2("UST app channel %s disabled successfully for app: pid = %d",
                        ua_chan->name, app->pid);
 
 error:
@@ -1547,25 +1606,25 @@ static int enable_ust_channel(struct ust_app *app,
        ret = ustctl_enable(app->sock, ua_chan->obj);
        pthread_mutex_unlock(&app->sock_lock);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app channel %s enable failed for app (pid: %d) "
-                                       "and session handle %d with ret %d",
-                                       ua_chan->name, app->pid, ua_sess->handle, ret);
-               } else {
-                       /*
-                        * This is normal behavior, an application can die during the
-                        * creation process. Don't report an error so the execution can
-                        * continue normally.
-                        */
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
                        ret = 0;
-                       DBG3("UST app enable channel failed. Application is dead.");
+                       DBG3("UST app channel %s enable failed. Application is dead: pid = %d, sock = %d",
+                                       ua_chan->name, app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
+                       ret = 0;
+                       WARN("UST app channel %s enable failed. Communication time out: pid = %d, sock = %d",
+                                       ua_chan->name, app->pid, app->sock);
+               } else {
+                       ERR("UST app channel %s enable failed, session handle %d, with ret %d: pid = %d, sock = %d",
+                                       ua_chan->name, ua_sess->handle, ret,
+                                       app->pid, app->sock);
                }
                goto error;
        }
 
        ua_chan->enabled = 1;
 
-       DBG2("UST app channel %s enabled successfully for app (pid: %d)",
+       DBG2("UST app channel %s enabled successfully for app: pid = %d",
                        ua_chan->name, app->pid);
 
 error:
@@ -1587,18 +1646,17 @@ static int enable_ust_event(struct ust_app *app,
        ret = ustctl_enable(app->sock, ua_event->obj);
        pthread_mutex_unlock(&app->sock_lock);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app event %s enable failed for app (pid: %d) "
-                                       "and session handle %d with ret %d",
-                                       ua_event->attr.name, app->pid, ua_sess->handle, ret);
-               } else {
-                       /*
-                        * This is normal behavior, an application can die during the
-                        * creation process. Don't report an error so the execution can
-                        * continue normally.
-                        */
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       ret = 0;
+                       DBG3("UST app enable event failed. Application is dead: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
                        ret = 0;
-                       DBG3("UST app enable event failed. Application is dead.");
+                       WARN("UST app enable event failed. Communication time out: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else {
+                       ERR("UST app enable event failed with ret %d: pid = %d, sock = %d, event = %s",
+                                       ret, app->pid, app->sock, ua_event->attr.name);
                }
                goto error;
        }
@@ -1636,6 +1694,13 @@ static int send_channel_pid_to_ust(struct ust_app *app,
        if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
                ret = -ENOTCONN;        /* Caused by app exiting. */
                goto error;
+       } else if (ret == -EAGAIN) {
+               /* Caused by timeout. */
+               WARN("Communication with application %d timed out on send_channel for channel \"%s\" of session \"%" PRIu64 "\".",
+                               app->pid, ua_chan->name, ua_sess->tracing_id);
+               /* Treat this the same way as an application that is exiting. */
+               ret = -ENOTCONN;
+               goto error;
        } else if (ret < 0) {
                goto error;
        }
@@ -1646,8 +1711,18 @@ static int send_channel_pid_to_ust(struct ust_app *app,
        cds_list_for_each_entry_safe(stream, stmp, &ua_chan->streams.head, list) {
                ret = ust_consumer_send_stream_to_ust(app, ua_chan, stream);
                if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
-                       ret = -ENOTCONN;        /* Caused by app exiting. */
+                       ret = -ENOTCONN; /* Caused by app exiting. */
                        goto error;
+               } else if (ret == -EAGAIN) {
+                       /* Caused by timeout. */
+                       WARN("Communication with application %d timed out on send_stream for stream \"%s\" of channel \"%s\" of session \"%" PRIu64 "\".",
+                                       app->pid, stream->name, ua_chan->name,
+                                       ua_sess->tracing_id);
+                       /*
+                        * Treat this the same way as an application that is
+                        * exiting.
+                        */
+                       ret = -ENOTCONN;
                } else if (ret < 0) {
                        goto error;
                }
@@ -1682,18 +1757,18 @@ int create_ust_event(struct ust_app *app, struct ust_app_session *ua_sess,
                        &ua_event->obj);
        pthread_mutex_unlock(&app->sock_lock);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       abort();
-                       ERR("Error ustctl create event %s for app pid: %d with ret %d",
-                                       ua_event->attr.name, app->pid, ret);
-               } else {
-                       /*
-                        * This is normal behavior, an application can die during the
-                        * creation process. Don't report an error so the execution can
-                        * continue normally.
-                        */
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
                        ret = 0;
-                       DBG3("UST app create event failed. Application is dead.");
+                       DBG3("UST app create event failed. Application is dead: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
+                       ret = 0;
+                       WARN("UST app create event failed. Communication time out: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else {
+                       ERR("UST app create event '%s' failed with ret %d: pid = %d, sock = %d",
+                                       ua_event->attr.name, ret, app->pid,
+                                       app->sock);
                }
                goto error;
        }
@@ -1831,25 +1906,22 @@ static void shadow_copy_channel(struct ust_app_channel *ua_chan,
 static void shadow_copy_session(struct ust_app_session *ua_sess,
                struct ltt_ust_session *usess, struct ust_app *app)
 {
-       time_t rawtime;
        struct tm *timeinfo;
        char datetime[16];
        int ret;
        char tmp_shm_path[PATH_MAX];
 
-       /* Get date and time for unique app path */
-       time(&rawtime);
-       timeinfo = localtime(&rawtime);
+       timeinfo = localtime(&app->registration_time);
        strftime(datetime, sizeof(datetime), "%Y%m%d-%H%M%S", timeinfo);
 
        DBG2("Shadow copy of session handle %d", ua_sess->handle);
 
        ua_sess->tracing_id = usess->id;
        ua_sess->id = get_next_session_id();
-       ua_sess->uid = app->uid;
-       ua_sess->gid = app->gid;
-       ua_sess->euid = usess->uid;
-       ua_sess->egid = usess->gid;
+       ua_sess->real_credentials.uid = app->uid;
+       ua_sess->real_credentials.gid = app->gid;
+       ua_sess->effective_credentials.uid = usess->uid;
+       ua_sess->effective_credentials.gid = usess->gid;
        ua_sess->buffer_type = usess->buffer_type;
        ua_sess->bits_per_long = app->bits_per_long;
 
@@ -1870,7 +1942,9 @@ static void shadow_copy_session(struct ust_app_session *ua_sess,
                break;
        case LTTNG_BUFFER_PER_UID:
                ret = snprintf(ua_sess->path, sizeof(ua_sess->path),
-                               DEFAULT_UST_TRACE_UID_PATH, ua_sess->uid, app->bits_per_long);
+                               DEFAULT_UST_TRACE_UID_PATH,
+                               ua_sess->real_credentials.uid,
+                               app->bits_per_long);
                break;
        default:
                assert(0);
@@ -1990,10 +2064,11 @@ static int setup_buffer_reg_pid(struct ust_app_session *ua_sess,
                        app->bits_per_long, app->uint8_t_alignment,
                        app->uint16_t_alignment, app->uint32_t_alignment,
                        app->uint64_t_alignment, app->long_alignment,
-                       app->byte_order, app->version.major,
-                       app->version.minor, reg_pid->root_shm_path,
-                       reg_pid->shm_path,
-                       ua_sess->euid, ua_sess->egid);
+                       app->byte_order, app->version.major, app->version.minor,
+                       reg_pid->root_shm_path, reg_pid->shm_path,
+                       ua_sess->effective_credentials.uid,
+                       ua_sess->effective_credentials.gid, ua_sess->tracing_id,
+                       app->uid);
        if (ret < 0) {
                /*
                 * reg_pid->registry->reg.ust is NULL upon error, so we need to
@@ -2060,7 +2135,8 @@ static int setup_buffer_reg_uid(struct ltt_ust_session *usess,
                        app->uint64_t_alignment, app->long_alignment,
                        app->byte_order, app->version.major,
                        app->version.minor, reg_uid->root_shm_path,
-                       reg_uid->shm_path, usess->uid, usess->gid);
+                       reg_uid->shm_path, usess->uid, usess->gid,
+                       ua_sess->tracing_id, app->uid);
        if (ret < 0) {
                /*
                 * reg_uid->registry->reg.ust is NULL upon error, so we need to
@@ -2154,18 +2230,17 @@ static int find_or_create_ust_app_session(struct ltt_ust_session *usess,
                ret = ustctl_create_session(app->sock);
                pthread_mutex_unlock(&app->sock_lock);
                if (ret < 0) {
-                       if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                               ERR("Creating session for app pid %d with ret %d",
-                                               app->pid, ret);
-                       } else {
-                               DBG("UST app creating session failed. Application is dead");
-                               /*
-                                * This is normal behavior, an application can die during the
-                                * creation process. Don't report an error so the execution can
-                                * continue normally. This will get flagged ENOTCONN and the
-                                * caller will handle it.
-                                */
+                       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                               DBG("UST app creating session failed. Application is dead: pid = %d, sock = %d",
+                                               app->pid, app->sock);
+                               ret = 0;
+                       } else if (ret == -EAGAIN) {
+                               DBG("UST app creating session failed. Communication time out: pid = %d, sock = %d",
+                                               app->pid, app->sock);
                                ret = 0;
+                       } else {
+                               ERR("UST app creating session failed with ret %d: pid = %d, sock =%d",
+                                               ret, app->pid, app->sock);
                        }
                        delete_ust_app_session(-1, ua_sess, app);
                        if (ret != -ENOMEM) {
@@ -2462,7 +2537,7 @@ static int do_consumer_create_channel(struct ltt_ust_session *usess,
         * stream we have to expect.
         */
        ret = ust_consumer_ask_channel(ua_sess, ua_chan, usess->consumer, socket,
-                       registry, trace_archive_id);
+                       registry, usess->current_trace_chunk);
        if (ret < 0) {
                goto error_ask;
        }
@@ -2483,7 +2558,7 @@ static int do_consumer_create_channel(struct ltt_ust_session *usess,
        health_code_update();
 
        /*
-        * Now get the channel from the consumer. This call wil populate the stream
+        * Now get the channel from the consumer. This call will populate the stream
         * list of that channel and set the ust objects.
         */
        if (usess->consumer->enabled) {
@@ -2747,6 +2822,13 @@ static int send_channel_uid_to_ust(struct buffer_reg_channel *reg_chan,
        if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
                ret = -ENOTCONN;        /* Caused by app exiting. */
                goto error;
+       } else if (ret == -EAGAIN) {
+               /* Caused by timeout. */
+               WARN("Communication with application %d timed out on send_channel for channel \"%s\" of session \"%" PRIu64 "\".",
+                               app->pid, ua_chan->name, ua_sess->tracing_id);
+               /* Treat this the same way as an application that is exiting. */
+               ret = -ENOTCONN;
+               goto error;
        } else if (ret < 0) {
                goto error;
        }
@@ -2765,10 +2847,21 @@ static int send_channel_uid_to_ust(struct buffer_reg_channel *reg_chan,
 
                ret = ust_consumer_send_stream_to_ust(app, ua_chan, &stream);
                if (ret < 0) {
-                       (void) release_ust_app_stream(-1, &stream, app);
                        if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
                                ret = -ENOTCONN; /* Caused by app exiting. */
+                       } else if (ret == -EAGAIN) {
+                               /*
+                                * Caused by timeout.
+                                * Treat this the same way as an application
+                                * that is exiting.
+                                */
+                               WARN("Communication with application %d timed out on send_stream for stream \"%s\" of channel \"%s\" of session \"%" PRIu64 "\".",
+                                               app->pid, stream.name,
+                                               ua_chan->name,
+                                               ua_sess->tracing_id);
+                               ret = -ENOTCONN;
                        }
+                       (void) release_ust_app_stream(-1, &stream, app);
                        goto error_stream_unlock;
                }
 
@@ -2845,7 +2938,7 @@ static int create_channel_per_uid(struct ust_app *app,
         */
        ret = do_consumer_create_channel(usess, ua_sess, ua_chan,
                        app->bits_per_long, reg_uid->registry->reg.ust,
-                       session->current_archive_id);
+                       session->most_recent_chunk_id.value);
        if (ret < 0) {
                ERR("Error creating UST channel \"%s\" on the consumer daemon",
                                ua_chan->name);
@@ -2882,10 +2975,9 @@ static int create_channel_per_uid(struct ust_app *app,
 
        notification_ret = notification_thread_command_add_channel(
                        notification_thread_handle, session->name,
-                       ua_sess->euid, ua_sess->egid,
-                       ua_chan->name,
-                       ua_chan->key,
-                       LTTNG_DOMAIN_UST,
+                       ua_sess->effective_credentials.uid,
+                       ua_sess->effective_credentials.gid, ua_chan->name,
+                       ua_chan->key, LTTNG_DOMAIN_UST,
                        ua_chan->attr.subbuf_size * ua_chan->attr.num_subbuf);
        if (notification_ret != LTTNG_OK) {
                ret = - (int) notification_ret;
@@ -2959,7 +3051,7 @@ static int create_channel_per_pid(struct ust_app *app,
        /* Create and get channel on the consumer side. */
        ret = do_consumer_create_channel(usess, ua_sess, ua_chan,
                        app->bits_per_long, registry,
-                       session->current_archive_id);
+                       session->most_recent_chunk_id.value);
        if (ret < 0) {
                ERR("Error creating UST channel \"%s\" on the consumer daemon",
                        ua_chan->name);
@@ -2983,10 +3075,9 @@ static int create_channel_per_pid(struct ust_app *app,
 
        cmd_ret = notification_thread_command_add_channel(
                        notification_thread_handle, session->name,
-                       ua_sess->euid, ua_sess->egid,
-                       ua_chan->name,
-                       ua_chan->key,
-                       LTTNG_DOMAIN_UST,
+                       ua_sess->effective_credentials.uid,
+                       ua_sess->effective_credentials.gid, ua_chan->name,
+                       ua_chan->key, LTTNG_DOMAIN_UST,
                        ua_chan->attr.subbuf_size * ua_chan->attr.num_subbuf);
        if (cmd_ret != LTTNG_OK) {
                ret = - (int) cmd_ret;
@@ -3133,7 +3224,7 @@ int create_ust_app_event(struct ust_app_session *ua_sess,
 
        ua_event = alloc_ust_app_event(uevent->attr.name, &uevent->attr);
        if (ua_event == NULL) {
-               /* Only malloc can failed so something is really wrong */
+               /* Only failure mode of alloc_ust_app_event(). */
                ret = -ENOMEM;
                goto end;
        }
@@ -3142,8 +3233,19 @@ int create_ust_app_event(struct ust_app_session *ua_sess,
        /* Create it on the tracer side */
        ret = create_ust_event(app, ua_sess, ua_chan, ua_event);
        if (ret < 0) {
-               /* Not found previously means that it does not exist on the tracer */
-               assert(ret != -LTTNG_UST_ERR_EXIST);
+               /*
+                * Not found previously means that it does not exist on the
+                * tracer. If the application reports that the event existed,
+                * it means there is a bug in the sessiond or lttng-ust
+                * (or corruption, etc.)
+                */
+               if (ret == -LTTNG_UST_ERR_EXIST) {
+                       ERR("Tracer for application reported that an event being created already existed: "
+                                       "event_name = \"%s\", pid = %d, ppid = %d, uid = %d, gid = %d",
+                                       uevent->attr.name,
+                                       app->pid, app->ppid, app->uid,
+                                       app->gid);
+               }
                goto error;
        }
 
@@ -3236,7 +3338,7 @@ static int create_ust_app_metadata(struct ust_app_session *ua_sess,
         * consumer.
         */
        ret = ust_consumer_ask_channel(ua_sess, metadata, consumer, socket,
-                       registry, session->current_archive_id);
+                       registry, session->current_trace_chunk);
        if (ret < 0) {
                /* Nullify the metadata key so we don't try to close it later on. */
                registry->metadata_key = 0;
@@ -3376,6 +3478,8 @@ void ust_app_add(struct ust_app *app)
        assert(app);
        assert(app->notify_sock >= 0);
 
+       app->registration_time = time(NULL);
+
        rcu_read_lock();
 
        /*
@@ -3395,8 +3499,8 @@ void ust_app_add(struct ust_app *app)
        lttng_ht_node_init_ulong(&app->notify_sock_n, app->notify_sock);
        lttng_ht_add_unique_ulong(ust_app_ht_by_notify_sock, &app->notify_sock_n);
 
-       DBG("App registered with pid:%d ppid:%d uid:%d gid:%d sock:%d name:%s "
-                       "notify_sock:%d (version %d.%d)", app->pid, app->ppid, app->uid,
+       DBG("App registered with pid:%d ppid:%d uid:%d gid:%d sock =%d name:%s "
+                       "notify_sock =%d (version %d.%d)", app->pid, app->ppid, app->uid,
                        app->gid, app->sock, app->name, app->notify_sock, app->v_major,
                        app->v_minor);
 
@@ -3419,10 +3523,15 @@ int ust_app_version(struct ust_app *app)
        ret = ustctl_tracer_version(app->sock, &app->version);
        pthread_mutex_unlock(&app->sock_lock);
        if (ret < 0) {
-               if (ret != -LTTNG_UST_ERR_EXITING && ret != -EPIPE) {
-                       ERR("UST app %d version failed with ret %d", app->sock, ret);
+               if (ret == -LTTNG_UST_ERR_EXITING || ret == -EPIPE) {
+                       DBG3("UST app version failed. Application is dead: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
+                       WARN("UST app version failed. Communication time out: pid = %d, sock = %d",
+                                       app->pid, app->sock);
                } else {
-                       DBG3("UST app %d version failed. Application is dead", app->sock);
+                       ERR("UST app version failed with ret %d: pid = %d, sock = %d",
+                                       ret, app->pid, app->sock);
                }
        }
 
@@ -3604,11 +3713,6 @@ int ust_app_list_events(struct lttng_event **events)
                                                        app->sock, ret);
                                } else {
                                        DBG3("UST app tp list get failed. Application is dead");
-                                       /*
-                                        * This is normal behavior, an application can die during the
-                                        * creation process. Don't report an error so the execution can
-                                        * continue normally. Continue normal execution.
-                                        */
                                        break;
                                }
                                free(tmp_event);
@@ -3663,8 +3767,17 @@ int ust_app_list_events(struct lttng_event **events)
                }
                ret = ustctl_release_handle(app->sock, handle);
                pthread_mutex_unlock(&app->sock_lock);
-               if (ret < 0 && ret != -LTTNG_UST_ERR_EXITING && ret != -EPIPE) {
-                       ERR("Error releasing app handle for app %d with ret %d", app->sock, ret);
+               if (ret < 0) {
+                       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                               DBG3("Error releasing app handle. Application died: pid = %d, sock = %d",
+                                               app->pid, app->sock);
+                       } else if (ret == -EAGAIN) {
+                               WARN("Error releasing app handle. Communication time out: pid = %d, sock = %d",
+                                               app->pid, app->sock);
+                       } else {
+                               ERR("Error releasing app handle with ret %d: pid = %d, sock = %d",
+                                               ret, app->pid, app->sock);
+                       }
                }
        }
 
@@ -3735,11 +3848,6 @@ int ust_app_list_event_fields(struct lttng_event_field **fields)
                                                        app->sock, ret);
                                } else {
                                        DBG3("UST app tp list field failed. Application is dead");
-                                       /*
-                                        * This is normal behavior, an application can die during the
-                                        * creation process. Don't report an error so the execution can
-                                        * continue normally. Reset list and count for next app.
-                                        */
                                        break;
                                }
                                free(tmp_event);
@@ -4096,11 +4204,14 @@ int ust_app_channel_create(struct ltt_ust_session *usess,
                ret = ust_app_channel_allocate(ua_sess, uchan,
                        LTTNG_UST_CHAN_PER_CPU, usess,
                        &ua_chan);
-               if (ret == 0) {
-                       ret = ust_app_channel_send(app, usess,
-                               ua_sess, ua_chan);
-               } else {
-                       goto end;
+               if (ret < 0) {
+                       goto error;
+               }
+
+               ret = ust_app_channel_send(app, usess,
+                       ua_sess, ua_chan);
+               if (ret) {
+                       goto error;
                }
 
                /* Add contexts. */
@@ -4108,10 +4219,12 @@ int ust_app_channel_create(struct ltt_ust_session *usess,
                        ret = create_ust_app_channel_context(ua_chan,
                                &uctx->ctx, app);
                        if (ret) {
-                               goto end;
+                               goto error;
                        }
                }
        }
+
+error:
        if (ret < 0) {
                switch (ret) {
                case -ENOTCONN:
@@ -4127,7 +4240,7 @@ int ust_app_channel_create(struct ltt_ust_session *usess,
                        break;
                }
        }
-end:
+
        if (ret == 0 && _ua_chan) {
                /*
                 * Only return the application's channel on success. Note
@@ -4199,7 +4312,7 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess,
                        if (session_was_created) {
                                destroy_app_session(app, ua_sess);
                        }
-                       goto error_rcu_unlock;
+                       /* Continue to the next application. */
                }
        }
 
@@ -4399,15 +4512,6 @@ int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app)
                goto skip_setup;
        }
 
-       /*
-        * Create the metadata for the application. This returns gracefully if a
-        * metadata was already set for the session.
-        */
-       ret = create_ust_app_metadata(ua_sess, app, usess->consumer);
-       if (ret < 0) {
-               goto error_unlock;
-       }
-
        health_code_update();
 
 skip_setup:
@@ -4416,18 +4520,20 @@ skip_setup:
        ret = ustctl_start_session(app->sock, ua_sess->handle);
        pthread_mutex_unlock(&app->sock_lock);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("Error starting tracing for app pid: %d (ret: %d)",
-                                       app->pid, ret);
-               } else {
-                       DBG("UST app start session failed. Application is dead.");
-                       /*
-                        * This is normal behavior, an application can die during the
-                        * creation process. Don't report an error so the execution can
-                        * continue normally.
-                        */
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       DBG3("UST app start session failed. Application is dead: pid = %d, sock = %d",
+                                       app->pid, app->sock);
                        pthread_mutex_unlock(&ua_sess->lock);
                        goto end;
+               } else if (ret == -EAGAIN) {
+                       WARN("UST app start session failed. Communication time out: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+                       pthread_mutex_unlock(&ua_sess->lock);
+                       goto end;
+
+               } else {
+                       ERR("UST app start session failed with ret %d: pid = %d, sock = %d",
+                                       ret, app->pid, app->sock);
                }
                goto error_unlock;
        }
@@ -4443,9 +4549,17 @@ skip_setup:
        pthread_mutex_lock(&app->sock_lock);
        ret = ustctl_wait_quiescent(app->sock);
        pthread_mutex_unlock(&app->sock_lock);
-       if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-               ERR("UST app wait quiescent failed for app pid %d ret %d",
-                               app->pid, ret);
+       if (ret < 0) {
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       DBG3("UST app wait quiescent failed. Application is dead: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
+                       WARN("UST app wait quiescent failed. Communication time out: pid =  %d, sock = %d",
+                                       app->pid, app->sock);
+               } else {
+                       ERR("UST app wait quiescent failed with ret %d: pid %d, sock = %d",
+                                       ret, app->pid, app->sock);
+               }
        }
 
 end:
@@ -4507,17 +4621,18 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app)
        ret = ustctl_stop_session(app->sock, ua_sess->handle);
        pthread_mutex_unlock(&app->sock_lock);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("Error stopping tracing for app pid: %d (ret: %d)",
-                                       app->pid, ret);
-               } else {
-                       DBG("UST app stop session failed. Application is dead.");
-                       /*
-                        * This is normal behavior, an application can die during the
-                        * creation process. Don't report an error so the execution can
-                        * continue normally.
-                        */
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       DBG3("UST app stop session failed. Application is dead: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+                       goto end_unlock;
+               } else if (ret == -EAGAIN) {
+                       WARN("UST app stop session failed. Communication time out: pid = %d, sock = %d",
+                                       app->pid, app->sock);
                        goto end_unlock;
+
+               } else {
+                       ERR("UST app stop session failed with ret %d: pid = %d, sock = %d",
+                                       ret, app->pid, app->sock);
                }
                goto error_rcu_unlock;
        }
@@ -4528,9 +4643,17 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app)
        pthread_mutex_lock(&app->sock_lock);
        ret = ustctl_wait_quiescent(app->sock);
        pthread_mutex_unlock(&app->sock_lock);
-       if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-               ERR("UST app wait quiescent failed for app pid %d ret %d",
-                               app->pid, ret);
+       if (ret < 0) {
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       DBG3("UST app wait quiescent failed. Application is dead: pid= %d, sock = %d)",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
+                       WARN("UST app wait quiescent failed. Communication time out: pid= %d, sock = %d)",
+                                       app->pid, app->sock);
+               } else {
+                       ERR("UST app wait quiescent failed with ret %d: pid= %d, sock = %d)",
+                                       ret, app->pid, app->sock);
+               }
        }
 
        health_code_update();
@@ -4878,9 +5001,17 @@ static int destroy_trace(struct ltt_ust_session *usess, struct ust_app *app)
        pthread_mutex_lock(&app->sock_lock);
        ret = ustctl_wait_quiescent(app->sock);
        pthread_mutex_unlock(&app->sock_lock);
-       if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-               ERR("UST app wait quiescent failed for app pid %d ret %d",
-                               app->pid, ret);
+       if (ret < 0) {
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       DBG3("UST app wait quiescent failed. Application is dead: pid= %d, sock = %d)",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
+                       WARN("UST app wait quiescent failed. Communication time out: pid= %d, sock = %d)",
+                                       app->pid, app->sock);
+               } else {
+                       ERR("UST app wait quiescent failed with ret %d: pid= %d, sock = %d)",
+                                       ret, app->pid, app->sock);
+               }
        }
 end:
        rcu_read_unlock();
@@ -5073,6 +5204,7 @@ void ust_app_synchronize(struct ltt_ust_session *usess,
        }
 
        rcu_read_lock();
+
        cds_lfht_for_each_entry(usess->domain_global.channels->ht, &uchan_iter,
                        uchan, node.node) {
                struct ust_app_channel *ua_chan;
@@ -5116,6 +5248,21 @@ void ust_app_synchronize(struct ltt_ust_session *usess,
                        }
                }
        }
+
+       /*
+        * Create the metadata for the application. This returns gracefully if a
+        * metadata was already set for the session.
+        *
+        * The metadata channel must be created after the data channels as the
+        * consumer daemon assumes this ordering. When interacting with a relay
+        * daemon, the consumer will use this assumption to send the
+        * "STREAMS_SENT" message to the relay daemon.
+        */
+       ret = create_ust_app_metadata(ua_sess, app, usess->consumer);
+       if (ret < 0) {
+               goto error_unlock;
+       }
+
        rcu_read_unlock();
 
 end:
@@ -5376,7 +5523,7 @@ static int reply_ust_register_channel(int sock, int cobjd,
        if (!app) {
                DBG("Application socket %d is being torn down. Abort event notify",
                                sock);
-               ret = 0;
+               ret = -1;
                goto error_rcu_unlock;
        }
 
@@ -5440,15 +5587,20 @@ static int reply_ust_register_channel(int sock, int cobjd,
 
 reply:
        DBG3("UST app replying to register channel key %" PRIu64
-                       " with id %u, type: %d, ret: %d", chan_reg_key, chan_id, type,
+                       " with id %u, type = %d, ret = %d", chan_reg_key, chan_id, type,
                        ret_code);
 
        ret = ustctl_reply_register_channel(sock, chan_id, type, ret_code);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app reply channel failed with ret %d", ret);
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       DBG3("UST app reply channel failed. Application died: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
+                       WARN("UST app reply channel failed. Communication time out: pid = %d, sock = %d",
+                                       app->pid, app->sock);
                } else {
-                       DBG3("UST app reply channel failed. Application died");
+                       ERR("UST app reply channel failed with ret %d: pid = %d, sock = %d",
+                                       ret, app->pid, app->sock);
                }
                goto error;
        }
@@ -5492,7 +5644,7 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
        if (!app) {
                DBG("Application socket %d is being torn down. Abort event notify",
                                sock);
-               ret = 0;
+               ret = -1;
                goto error_rcu_unlock;
        }
 
@@ -5542,10 +5694,15 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
         */
        ret = ustctl_reply_register_event(sock, event_id, ret_code);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app reply event failed with ret %d", ret);
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       DBG3("UST app reply event failed. Application died: pid = %d, sock = %d.",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
+                       WARN("UST app reply event failed. Communication time out: pid = %d, sock = %d",
+                                       app->pid, app->sock);
                } else {
-                       DBG3("UST app reply event failed. Application died");
+                       ERR("UST app reply event failed with ret %d: pid = %d, sock = %d",
+                                       ret, app->pid, app->sock);
                }
                /*
                 * No need to wipe the create event since the application socket will
@@ -5593,6 +5750,7 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
                DBG("Application socket %d is being torn down. Aborting enum registration",
                                sock);
                free(entries);
+               ret = -1;
                goto error_rcu_unlock;
        }
 
@@ -5600,14 +5758,14 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
        ua_sess = find_session_by_objd(app, sobjd);
        if (!ua_sess) {
                /* Return an error since this is not an error */
-               DBG("Application session is being torn down (session not found). Aborting enum registration.");
+               DBG("Application session is being torn down (session not found Aborting enum registration.");
                free(entries);
                goto error_rcu_unlock;
        }
 
        registry = get_session_registry(ua_sess);
        if (!registry) {
-               DBG("Application session is being torn down (registry not found). Aborting enum registration.");
+               DBG("Application session is being torn down (registry not found Aborting enum registration.");
                free(entries);
                goto error_rcu_unlock;
        }
@@ -5630,10 +5788,15 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
         */
        ret = ustctl_reply_register_enum(sock, enum_id, ret_code);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app reply enum failed with ret %d", ret);
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       DBG3("UST app reply enum failed. Application died: pid = %d, sock = %d",
+                                       app->pid, app->sock);
+               } else if (ret == -EAGAIN) {
+                       WARN("UST app reply enum failed. Communication time out: pid = %d, sock = %d",
+                                       app->pid, app->sock);
                } else {
-                       DBG3("UST app reply enum failed. Application died");
+                       ERR("UST app reply enum failed with ret %d: pid = %d, sock = %d",
+                                       ret, app->pid, app->sock);
                }
                /*
                 * No need to wipe the create enum since the application socket will
@@ -5665,10 +5828,15 @@ int ust_app_recv_notify(int sock)
 
        ret = ustctl_recv_notify(sock, &cmd);
        if (ret < 0) {
-               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                       ERR("UST app recv notify failed with ret %d", ret);
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       DBG3("UST app recv notify failed. Application died: sock = %d",
+                                       sock);
+               } else if (ret == -EAGAIN) {
+                       WARN("UST app recv notify failed. Communication time out: sock = %d",
+                                       sock);
                } else {
-                       DBG3("UST app recv notify failed. Application died");
+                       ERR("UST app recv notify failed with ret %d: sock = %d",
+                                       ret, sock);
                }
                goto error;
        }
@@ -5687,10 +5855,15 @@ int ust_app_recv_notify(int sock)
                                &loglevel_value, &sig, &nr_fields, &fields,
                                &model_emf_uri);
                if (ret < 0) {
-                       if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                               ERR("UST app recv event failed with ret %d", ret);
+                       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                               DBG3("UST app recv event failed. Application died: sock = %d",
+                                               sock);
+                       } else if (ret == -EAGAIN) {
+                               WARN("UST app recv event failed. Communication time out: sock = %d",
+                                               sock);
                        } else {
-                               DBG3("UST app recv event failed. Application died");
+                               ERR("UST app recv event failed with ret %d: sock = %d",
+                                               ret, sock);
                        }
                        goto error;
                }
@@ -5720,10 +5893,15 @@ int ust_app_recv_notify(int sock)
                ret = ustctl_recv_register_channel(sock, &sobjd, &cobjd, &nr_fields,
                                &fields);
                if (ret < 0) {
-                       if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                               ERR("UST app recv channel failed with ret %d", ret);
+                       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                               DBG3("UST app recv channel failed. Application died: sock = %d",
+                                               sock);
+                       } else if (ret == -EAGAIN) {
+                               WARN("UST app recv channel failed. Communication time out: sock = %d",
+                                               sock);
                        } else {
-                               DBG3("UST app recv channel failed. Application died");
+                               ERR("UST app recv channel failed with ret %d: sock = %d)",
+                                               ret, sock);
                        }
                        goto error;
                }
@@ -5753,10 +5931,15 @@ int ust_app_recv_notify(int sock)
                ret = ustctl_recv_register_enum(sock, &sobjd, name,
                                &entries, &nr_entries);
                if (ret < 0) {
-                       if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                               ERR("UST app recv enum failed with ret %d", ret);
+                       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                               DBG3("UST app recv enum failed. Application died: sock = %d",
+                                               sock);
+                       } else if (ret == -EAGAIN) {
+                               WARN("UST app recv enum failed. Communication time out: sock = %d",
+                                               sock);
                        } else {
-                               DBG3("UST app recv enum failed. Application died");
+                               ERR("UST app recv enum failed with ret %d: sock = %d",
+                                               ret, sock);
                        }
                        goto error;
                }
@@ -5876,28 +6059,20 @@ void ust_app_destroy(struct ust_app *app)
  */
 enum lttng_error_code ust_app_snapshot_record(
                const struct ltt_ust_session *usess,
-               const struct snapshot_output *output, int wait,
+               const struct consumer_output *output, int wait,
                uint64_t nb_packets_per_stream)
 {
        int ret = 0;
        enum lttng_error_code status = LTTNG_OK;
        struct lttng_ht_iter iter;
        struct ust_app *app;
-       char pathname[PATH_MAX];
-       struct ltt_session *session = NULL;
-       uint64_t trace_archive_id;
+       char *trace_path = NULL;
 
        assert(usess);
        assert(output);
 
        rcu_read_lock();
 
-       session = session_find_by_id(usess->id);
-       assert(session);
-       assert(pthread_mutex_trylock(&session->lock));
-       assert(session_trylock_list());
-       trace_archive_id = session->current_archive_id;
-
        switch (usess->buffer_type) {
        case LTTNG_BUFFER_PER_UID:
        {
@@ -5906,6 +6081,7 @@ enum lttng_error_code ust_app_snapshot_record(
                cds_list_for_each_entry(reg, &usess->buffer_reg_uid_list, lnode) {
                        struct buffer_reg_channel *reg_chan;
                        struct consumer_socket *socket;
+                       char pathname[PATH_MAX];
 
                        if (!reg->registry->reg.ust->metadata_key) {
                                /* Skip since no metadata is present */
@@ -5921,32 +6097,40 @@ enum lttng_error_code ust_app_snapshot_record(
                        }
 
                        memset(pathname, 0, sizeof(pathname));
+                       /*
+                        * DEFAULT_UST_TRACE_UID_PATH already contains a path
+                        * separator.
+                        */
                        ret = snprintf(pathname, sizeof(pathname),
-                                       DEFAULT_UST_TRACE_DIR "/" DEFAULT_UST_TRACE_UID_PATH,
+                                       DEFAULT_UST_TRACE_DIR DEFAULT_UST_TRACE_UID_PATH,
                                        reg->uid, reg->bits_per_long);
                        if (ret < 0) {
                                PERROR("snprintf snapshot path");
                                status = LTTNG_ERR_INVALID;
                                goto error;
                        }
-
-                       /* Add the UST default trace dir to path. */
+                       /* Free path allowed on previous iteration. */
+                       free(trace_path);
+                       trace_path = setup_channel_trace_path(usess->consumer, pathname);
+                       if (!trace_path) {
+                               status = LTTNG_ERR_INVALID;
+                               goto error;
+                       }
+                        /* Add the UST default trace dir to path. */
                        cds_lfht_for_each_entry(reg->registry->channels->ht, &iter.iter,
                                        reg_chan, node.node) {
                                status = consumer_snapshot_channel(socket,
                                                reg_chan->consumer_key,
                                                output, 0, usess->uid,
-                                               usess->gid, pathname, wait,
-                                               nb_packets_per_stream,
-                                               trace_archive_id);
+                                               usess->gid, trace_path, wait,
+                                               nb_packets_per_stream);
                                if (status != LTTNG_OK) {
                                        goto error;
                                }
                        }
                        status = consumer_snapshot_channel(socket,
                                        reg->registry->reg.ust->metadata_key, output, 1,
-                                       usess->uid, usess->gid, pathname, wait, 0,
-                                       trace_archive_id);
+                                       usess->uid, usess->gid, trace_path, wait, 0);
                        if (status != LTTNG_OK) {
                                goto error;
                        }
@@ -5961,6 +6145,7 @@ enum lttng_error_code ust_app_snapshot_record(
                        struct ust_app_channel *ua_chan;
                        struct ust_app_session *ua_sess;
                        struct ust_registry_session *registry;
+                       char pathname[PATH_MAX];
 
                        ua_sess = lookup_session_by_app(usess, app);
                        if (!ua_sess) {
@@ -5970,7 +6155,7 @@ enum lttng_error_code ust_app_snapshot_record(
 
                        /* Get the right consumer socket for the application. */
                        socket = consumer_find_socket_by_bitness(app->bits_per_long,
-                                       output->consumer);
+                                       output);
                        if (!socket) {
                                status = LTTNG_ERR_INVALID;
                                goto error;
@@ -5978,22 +6163,30 @@ enum lttng_error_code ust_app_snapshot_record(
 
                        /* Add the UST default trace dir to path. */
                        memset(pathname, 0, sizeof(pathname));
-                       ret = snprintf(pathname, sizeof(pathname), DEFAULT_UST_TRACE_DIR "/%s",
+                       ret = snprintf(pathname, sizeof(pathname), DEFAULT_UST_TRACE_DIR "%s",
                                        ua_sess->path);
                        if (ret < 0) {
                                status = LTTNG_ERR_INVALID;
                                PERROR("snprintf snapshot path");
                                goto error;
                        }
-
-                       cds_lfht_for_each_entry(ua_sess->channels->ht, &chan_iter.iter,
+                       /* Free path allowed on previous iteration. */
+                       free(trace_path);
+                       trace_path = setup_channel_trace_path(usess->consumer, pathname);
+                       if (!trace_path) {
+                               status = LTTNG_ERR_INVALID;
+                               goto error;
+                       }
+                        cds_lfht_for_each_entry(ua_sess->channels->ht, &chan_iter.iter,
                                        ua_chan, node.node) {
                                status = consumer_snapshot_channel(socket,
-                                               ua_chan->key, output,
-                                               0, ua_sess->euid, ua_sess->egid,
-                                               pathname, wait,
-                                               nb_packets_per_stream,
-                                               trace_archive_id);
+                                               ua_chan->key, output, 0,
+                                               ua_sess->effective_credentials
+                                                               .uid,
+                                               ua_sess->effective_credentials
+                                                               .gid,
+                                               trace_path, wait,
+                                               nb_packets_per_stream);
                                switch (status) {
                                case LTTNG_OK:
                                        break;
@@ -6010,10 +6203,10 @@ enum lttng_error_code ust_app_snapshot_record(
                                continue;
                        }
                        status = consumer_snapshot_channel(socket,
-                                       registry->metadata_key, output,
-                                       1, ua_sess->euid, ua_sess->egid,
-                                       pathname, wait, 0,
-                                       trace_archive_id);
+                                       registry->metadata_key, output, 1,
+                                       ua_sess->effective_credentials.uid,
+                                       ua_sess->effective_credentials.gid,
+                                       trace_path, wait, 0);
                        switch (status) {
                        case LTTNG_OK:
                                break;
@@ -6031,10 +6224,8 @@ enum lttng_error_code ust_app_snapshot_record(
        }
 
 error:
+       free(trace_path);
        rcu_read_unlock();
-       if (session) {
-               session_put(session);
-       }
        return status;
 }
 
@@ -6281,7 +6472,6 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
        struct lttng_ht_iter iter;
        struct ust_app *app;
        struct ltt_ust_session *usess = session->ust_session;
-       char pathname[LTTNG_PATH_MAX];
 
        assert(usess);
 
@@ -6304,38 +6494,40 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
                                goto error;
                        }
 
-                       ret = snprintf(pathname, sizeof(pathname),
-                                       DEFAULT_UST_TRACE_DIR "/" DEFAULT_UST_TRACE_UID_PATH,
-                                       reg->uid, reg->bits_per_long);
-                       if (ret < 0 || ret >= sizeof(pathname)) {
-                               PERROR("Failed to format rotation path");
-                               cmd_ret = LTTNG_ERR_INVALID;
-                               goto error;
-                       }
-
                        /* Rotate the data channels. */
                        cds_lfht_for_each_entry(reg->registry->channels->ht, &iter.iter,
                                        reg_chan, node.node) {
                                ret = consumer_rotate_channel(socket,
                                                reg_chan->consumer_key,
                                                usess->uid, usess->gid,
-                                               usess->consumer, pathname,
-                                               /* is_metadata_channel */ false,
-                                               session->current_archive_id);
+                                               usess->consumer,
+                                               /* is_metadata_channel */ false);
                                if (ret < 0) {
                                        cmd_ret = LTTNG_ERR_ROTATION_FAIL_CONSUMER;
                                        goto error;
                                }
                        }
 
+                       /*
+                        * The metadata channel might not be present.
+                        *
+                        * Consumer stream allocation can be done
+                        * asynchronously and can fail on intermediary
+                        * operations (i.e add context) and lead to data
+                        * channels created with no metadata channel.
+                        */
+                       if (!reg->registry->reg.ust->metadata_key) {
+                               /* Skip since no metadata is present. */
+                               continue;
+                       }
+
                        (void) push_metadata(reg->registry->reg.ust, usess->consumer);
 
                        ret = consumer_rotate_channel(socket,
                                        reg->registry->reg.ust->metadata_key,
                                        usess->uid, usess->gid,
-                                       usess->consumer, pathname,
-                                       /* is_metadata_channel */ true,
-                                       session->current_archive_id);
+                                       usess->consumer,
+                                       /* is_metadata_channel */ true);
                        if (ret < 0) {
                                cmd_ret = LTTNG_ERR_ROTATION_FAIL_CONSUMER;
                                goto error;
@@ -6357,14 +6549,6 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
                                /* Session not associated with this app. */
                                continue;
                        }
-                       ret = snprintf(pathname, sizeof(pathname),
-                                       DEFAULT_UST_TRACE_DIR "/%s",
-                                       ua_sess->path);
-                       if (ret < 0 || ret >= sizeof(pathname)) {
-                               PERROR("Failed to format rotation path");
-                               cmd_ret = LTTNG_ERR_INVALID;
-                               goto error;
-                       }
 
                        /* Get the right consumer socket for the application. */
                        socket = consumer_find_socket_by_bitness(app->bits_per_long,
@@ -6380,15 +6564,17 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
                                continue;
                        }
 
-
                        /* Rotate the data channels. */
                        cds_lfht_for_each_entry(ua_sess->channels->ht, &chan_iter.iter,
                                        ua_chan, node.node) {
-                               ret = consumer_rotate_channel(socket, ua_chan->key,
-                                               ua_sess->euid, ua_sess->egid,
-                                               ua_sess->consumer, pathname,
-                                               /* is_metadata_channel */ false,
-                                               session->current_archive_id);
+                               ret = consumer_rotate_channel(socket,
+                                               ua_chan->key,
+                                               ua_sess->effective_credentials
+                                                               .uid,
+                                               ua_sess->effective_credentials
+                                                               .gid,
+                                               ua_sess->consumer,
+                                               /* is_metadata_channel */ false);
                                if (ret < 0) {
                                        /* Per-PID buffer and application going away. */
                                        if (ret == -LTTNG_ERR_CHAN_NOT_FOUND)
@@ -6400,11 +6586,12 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
 
                        /* Rotate the metadata channel. */
                        (void) push_metadata(registry, usess->consumer);
-                       ret = consumer_rotate_channel(socket, registry->metadata_key,
-                                       ua_sess->euid, ua_sess->egid,
-                                       ua_sess->consumer, pathname,
-                                       /* is_metadata_channel */ true,
-                                       session->current_archive_id);
+                       ret = consumer_rotate_channel(socket,
+                                       registry->metadata_key,
+                                       ua_sess->effective_credentials.uid,
+                                       ua_sess->effective_credentials.gid,
+                                       ua_sess->consumer,
+                                       /* is_metadata_channel */ true);
                        if (ret < 0) {
                                /* Per-PID buffer and application going away. */
                                if (ret == -LTTNG_ERR_CHAN_NOT_FOUND)
@@ -6426,3 +6613,110 @@ error:
        rcu_read_unlock();
        return cmd_ret;
 }
+
+enum lttng_error_code ust_app_create_channel_subdirectories(
+               const struct ltt_ust_session *usess)
+{
+       enum lttng_error_code ret = LTTNG_OK;
+       struct lttng_ht_iter iter;
+       enum lttng_trace_chunk_status chunk_status;
+       char *pathname_index;
+       int fmt_ret;
+
+       assert(usess->current_trace_chunk);
+       rcu_read_lock();
+
+       switch (usess->buffer_type) {
+       case LTTNG_BUFFER_PER_UID:
+       {
+               struct buffer_reg_uid *reg;
+
+               cds_list_for_each_entry(reg, &usess->buffer_reg_uid_list, lnode) {
+                       fmt_ret = asprintf(&pathname_index,
+                                      DEFAULT_UST_TRACE_DIR DEFAULT_UST_TRACE_UID_PATH "/" DEFAULT_INDEX_DIR,
+                                      reg->uid, reg->bits_per_long);
+                       if (fmt_ret < 0) {
+                               ERR("Failed to format channel index directory");
+                               ret = LTTNG_ERR_CREATE_DIR_FAIL;
+                               goto error;
+                       }
+
+                       /*
+                        * Create the index subdirectory which will take care
+                        * of implicitly creating the channel's path.
+                        */
+                       chunk_status = lttng_trace_chunk_create_subdirectory(
+                                       usess->current_trace_chunk,
+                                       pathname_index);
+                       free(pathname_index);
+                       if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) {
+                               ret = LTTNG_ERR_CREATE_DIR_FAIL;
+                               goto error;
+                       }
+               }
+               break;
+       }
+       case LTTNG_BUFFER_PER_PID:
+       {
+               struct ust_app *app;
+
+               /*
+                * Create the toplevel ust/ directory in case no apps are running.
+                */
+               chunk_status = lttng_trace_chunk_create_subdirectory(
+                               usess->current_trace_chunk,
+                               DEFAULT_UST_TRACE_DIR);
+               if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) {
+                       ret = LTTNG_ERR_CREATE_DIR_FAIL;
+                       goto error;
+               }
+
+               cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app,
+                               pid_n.node) {
+                       struct ust_app_session *ua_sess;
+                       struct ust_registry_session *registry;
+
+                       ua_sess = lookup_session_by_app(usess, app);
+                       if (!ua_sess) {
+                               /* Session not associated with this app. */
+                               continue;
+                       }
+
+                       registry = get_session_registry(ua_sess);
+                       if (!registry) {
+                               DBG("Application session is being torn down. Skip application.");
+                               continue;
+                       }
+
+                       fmt_ret = asprintf(&pathname_index,
+                                       DEFAULT_UST_TRACE_DIR "%s/" DEFAULT_INDEX_DIR,
+                                       ua_sess->path);
+                       if (fmt_ret < 0) {
+                               ERR("Failed to format channel index directory");
+                               ret = LTTNG_ERR_CREATE_DIR_FAIL;
+                               goto error;
+                       }
+                       /*
+                        * Create the index subdirectory which will take care
+                        * of implicitly creating the channel's path.
+                        */
+                       chunk_status = lttng_trace_chunk_create_subdirectory(
+                                       usess->current_trace_chunk,
+                                       pathname_index);
+                       free(pathname_index);
+                       if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) {
+                               ret = LTTNG_ERR_CREATE_DIR_FAIL;
+                               goto error;
+                       }
+               }
+               break;
+       }
+       default:
+               abort();
+       }
+
+       ret = LTTNG_OK;
+error:
+       rcu_read_unlock();
+       return ret;
+}
This page took 0.045481 seconds and 4 git commands to generate.