Fix: sessiond: session destroy hang in per-uid when context cannot be added
[lttng-tools.git] / src / bin / lttng-sessiond / ust-app.c
index ea3f860398d3ca35edb16e515f55f14cbebcdcf7..35e4e3bceed8913f9b17b6b745ebd149058c84e5 100644 (file)
@@ -2488,7 +2488,7 @@ static int do_consumer_create_channel(struct ltt_ust_session *usess,
        health_code_update();
 
        /*
-        * Now get the channel from the consumer. This call wil populate the stream
+        * Now get the channel from the consumer. This call will populate the stream
         * list of that channel and set the ust objects.
         */
        if (usess->consumer->enabled) {
@@ -4112,11 +4112,14 @@ int ust_app_channel_create(struct ltt_ust_session *usess,
                ret = ust_app_channel_allocate(ua_sess, uchan,
                        LTTNG_UST_CHAN_PER_CPU, usess,
                        &ua_chan);
-               if (ret == 0) {
-                       ret = ust_app_channel_send(app, usess,
-                               ua_sess, ua_chan);
-               } else {
-                       goto end;
+               if (ret < 0) {
+                       goto error;
+               }
+
+               ret = ust_app_channel_send(app, usess,
+                       ua_sess, ua_chan);
+               if (ret) {
+                       goto error;
                }
 
                /* Add contexts. */
@@ -4124,10 +4127,12 @@ int ust_app_channel_create(struct ltt_ust_session *usess,
                        ret = create_ust_app_channel_context(ua_chan,
                                &uctx->ctx, app);
                        if (ret) {
-                               goto end;
+                               goto error;
                        }
                }
        }
+
+error:
        if (ret < 0) {
                switch (ret) {
                case -ENOTCONN:
@@ -4143,7 +4148,7 @@ int ust_app_channel_create(struct ltt_ust_session *usess,
                        break;
                }
        }
-end:
+
        if (ret == 0 && _ua_chan) {
                /*
                 * Only return the application's channel on success. Note
@@ -4351,15 +4356,6 @@ int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app)
                goto skip_setup;
        }
 
-       /*
-        * Create the metadata for the application. This returns gracefully if a
-        * metadata was already set for the session.
-        */
-       ret = create_ust_app_metadata(ua_sess, app, usess->consumer);
-       if (ret < 0) {
-               goto error_unlock;
-       }
-
        health_code_update();
 
 skip_setup:
@@ -4996,7 +4992,7 @@ end:
 
 /*
  * The caller must ensure that the application is compatible and is tracked
- * by the PID tracker.
+ * by the process attribute trackers.
  */
 static
 void ust_app_synchronize(struct ltt_ust_session *usess,
@@ -5027,6 +5023,7 @@ void ust_app_synchronize(struct ltt_ust_session *usess,
        }
 
        rcu_read_lock();
+
        cds_lfht_for_each_entry(usess->domain_global.channels->ht, &uchan_iter,
                        uchan, node.node) {
                struct ust_app_channel *ua_chan;
@@ -5070,6 +5067,21 @@ void ust_app_synchronize(struct ltt_ust_session *usess,
                        }
                }
        }
+
+       /*
+        * Create the metadata for the application. This returns gracefully if a
+        * metadata was already set for the session.
+        *
+        * The metadata channel must be created after the data channels as the
+        * consumer daemon assumes this ordering. When interacting with a relay
+        * daemon, the consumer will use this assumption to send the
+        * "STREAMS_SENT" message to the relay daemon.
+        */
+       ret = create_ust_app_metadata(ua_sess, app, usess->consumer);
+       if (ret < 0) {
+               goto error_unlock;
+       }
+
        rcu_read_unlock();
 
 end:
@@ -6265,11 +6277,6 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
                        struct buffer_reg_channel *reg_chan;
                        struct consumer_socket *socket;
 
-                       if (!reg->registry->reg.ust->metadata_key) {
-                               /* Skip since no metadata is present */
-                               continue;
-                       }
-
                        /* Get consumer socket to use to push the metadata.*/
                        socket = consumer_find_socket_by_bitness(reg->bits_per_long,
                                        usess->consumer);
@@ -6292,6 +6299,19 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
                                }
                        }
 
+                       /*
+                        * The metadata channel might not be present.
+                        *
+                        * Consumer stream allocation can be done
+                        * asynchronously and can fail on intermediary
+                        * operations (i.e add context) and lead to data
+                        * channels created with no metadata channel.
+                        */
+                       if (!reg->registry->reg.ust->metadata_key) {
+                               /* Skip since no metadata is present. */
+                               continue;
+                       }
+
                        (void) push_metadata(reg->registry->reg.ust, usess->consumer);
 
                        ret = consumer_rotate_channel(socket,
@@ -6639,3 +6659,126 @@ end:
        rcu_read_unlock();
        return cmd_ret;
 }
+
+/*
+ * This function skips the metadata channel as the begin/end timestamps of a
+ * metadata packet are useless.
+ *
+ * Moreover, opening a packet after a "clear" will cause problems for live
+ * sessions as it will introduce padding that was not part of the first trace
+ * chunk. The relay daemon expects the content of the metadata stream of
+ * successive metadata trace chunks to be strict supersets of one another.
+ *
+ * For example, flushing a packet at the beginning of the metadata stream of
+ * a trace chunk resulting from a "clear" session command will cause the
+ * size of the metadata stream of the new trace chunk to not match the size of
+ * the metadata stream of the original chunk. This will confuse the relay
+ * daemon as the same "offset" in a metadata stream will no longer point
+ * to the same content.
+ */
+enum lttng_error_code ust_app_open_packets(struct ltt_session *session)
+{
+       enum lttng_error_code ret = LTTNG_OK;
+       struct lttng_ht_iter iter;
+       struct ltt_ust_session *usess = session->ust_session;
+
+       assert(usess);
+
+       rcu_read_lock();
+
+       switch (usess->buffer_type) {
+       case LTTNG_BUFFER_PER_UID:
+       {
+               struct buffer_reg_uid *reg;
+
+               cds_list_for_each_entry (
+                               reg, &usess->buffer_reg_uid_list, lnode) {
+                       struct buffer_reg_channel *reg_chan;
+                       struct consumer_socket *socket;
+
+                       socket = consumer_find_socket_by_bitness(
+                                       reg->bits_per_long, usess->consumer);
+                       if (!socket) {
+                               ret = LTTNG_ERR_FATAL;
+                               goto error;
+                       }
+
+                       cds_lfht_for_each_entry(reg->registry->channels->ht,
+                                       &iter.iter, reg_chan, node.node) {
+                               const int open_ret =
+                                               consumer_open_channel_packets(
+                                                       socket,
+                                                       reg_chan->consumer_key);
+
+                               if (open_ret < 0) {
+                                       ret = LTTNG_ERR_UNK;
+                                       goto error;
+                               }
+                       }
+               }
+               break;
+       }
+       case LTTNG_BUFFER_PER_PID:
+       {
+               struct ust_app *app;
+
+               cds_lfht_for_each_entry (
+                               ust_app_ht->ht, &iter.iter, app, pid_n.node) {
+                       struct consumer_socket *socket;
+                       struct lttng_ht_iter chan_iter;
+                       struct ust_app_channel *ua_chan;
+                       struct ust_app_session *ua_sess;
+                       struct ust_registry_session *registry;
+
+                       ua_sess = lookup_session_by_app(usess, app);
+                       if (!ua_sess) {
+                               /* Session not associated with this app. */
+                               continue;
+                       }
+
+                       /* Get the right consumer socket for the application. */
+                       socket = consumer_find_socket_by_bitness(
+                                       app->bits_per_long, usess->consumer);
+                       if (!socket) {
+                               ret = LTTNG_ERR_FATAL;
+                               goto error;
+                       }
+
+                       registry = get_session_registry(ua_sess);
+                       if (!registry) {
+                               DBG("Application session is being torn down. Skip application.");
+                               continue;
+                       }
+
+                       cds_lfht_for_each_entry(ua_sess->channels->ht,
+                                       &chan_iter.iter, ua_chan, node.node) {
+                               const int open_ret =
+                                               consumer_open_channel_packets(
+                                                       socket,
+                                                       ua_chan->key);
+
+                               if (open_ret < 0) {
+                                       /*
+                                        * Per-PID buffer and application going
+                                        * away.
+                                        */
+                                       if (open_ret == -LTTNG_ERR_CHAN_NOT_FOUND) {
+                                               continue;
+                                       }
+
+                                       ret = LTTNG_ERR_UNK;
+                                       goto error;
+                               }
+                       }
+               }
+               break;
+       }
+       default:
+               abort();
+               break;
+       }
+
+error:
+       rcu_read_unlock();
+       return ret;
+}
This page took 0.026917 seconds and 4 git commands to generate.