Fix: metadata channel leak when using the snapshot tracing mode
[lttng-tools.git] / src / common / ust-consumer / ust-consumer.c
index 01d5ecf6505cc0667a41194d5cb1f07d06e3540b..2f2c2ff25d47403f4a34fd7a4d90f2728a155f8a 100644 (file)
@@ -831,6 +831,7 @@ static int close_metadata(uint64_t chan_key)
 {
        int ret = 0;
        struct lttng_consumer_channel *channel;
+       unsigned int channel_monitor;
 
        DBG("UST consumer close metadata key %" PRIu64, chan_key);
 
@@ -849,13 +850,48 @@ static int close_metadata(uint64_t chan_key)
 
        pthread_mutex_lock(&consumer_data.lock);
        pthread_mutex_lock(&channel->lock);
-
+       channel_monitor = channel->monitor;
        if (cds_lfht_is_node_deleted(&channel->node.node)) {
                goto error_unlock;
        }
 
        lttng_ustconsumer_close_metadata(channel);
+       pthread_mutex_unlock(&channel->lock);
+       pthread_mutex_unlock(&consumer_data.lock);
 
+       /*
+        * The ownership of a metadata channel depends on the type of
+        * session to which it belongs. In effect, the monitor flag is checked
+        * to determine if this metadata channel is in "snapshot" mode or not.
+        *
+        * In the non-snapshot case, the metadata channel is created along with
+        * a single stream which will remain present until the metadata channel
+        * is destroyed (on the destruction of its session). In this case, the
+        * metadata stream in "monitored" by the metadata poll thread and holds
+        * the ownership of its channel.
+        *
+        * Closing the metadata will cause the metadata stream's "metadata poll
+        * pipe" to be closed. Closing this pipe will wake-up the metadata poll
+        * thread which will teardown the metadata stream which, in return,
+        * deletes the metadata channel.
+        *
+        * In the snapshot case, the metadata stream is created and destroyed
+        * on every snapshot record. Since the channel doesn't have an owner
+        * other than the session daemon, it is safe to destroy it immediately
+        * on reception of the CLOSE_METADATA command.
+        */
+       if (!channel_monitor) {
+               /*
+                * The channel and consumer_data locks must be
+                * released before this call since consumer_del_channel
+                * re-acquires the channel and consumer_data locks to teardown
+                * the channel and queue its reclamation by the "call_rcu"
+                * worker thread.
+                */
+               consumer_del_channel(channel);
+       }
+
+       return ret;
 error_unlock:
        pthread_mutex_unlock(&channel->lock);
        pthread_mutex_unlock(&consumer_data.lock);
@@ -1076,9 +1112,6 @@ static int snapshot_channel(uint64_t key, char *path, uint64_t relayd_id,
        DBG("UST consumer snapshot channel %" PRIu64, key);
 
        cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
-               /* Are we at a position _before_ the first available packet ? */
-               bool before_first_packet = true;
-
                health_code_update();
 
                /* Lock stream because we are about to change its state. */
@@ -1150,7 +1183,6 @@ static int snapshot_channel(uint64_t key, char *path, uint64_t relayd_id,
                while (consumed_pos < produced_pos) {
                        ssize_t read_len;
                        unsigned long len, padded_len;
-                       int lost_packet = 0;
 
                        health_code_update();
 
@@ -1164,15 +1196,7 @@ static int snapshot_channel(uint64_t key, char *path, uint64_t relayd_id,
                                }
                                DBG("UST consumer get subbuf failed. Skipping it.");
                                consumed_pos += stream->max_sb_size;
-
-                               /*
-                                * Start accounting lost packets only when we
-                                * already have extracted packets (to match the
-                                * content of the final snapshot).
-                                */
-                               if (!before_first_packet) {
-                                       lost_packet = 1;
-                               }
+                               stream->chan->lost_packets++;
                                continue;
                        }
 
@@ -1208,16 +1232,6 @@ static int snapshot_channel(uint64_t key, char *path, uint64_t relayd_id,
                                goto error_close_stream;
                        }
                        consumed_pos += stream->max_sb_size;
-
-                       /*
-                        * Only account lost packets located between
-                        * succesfully extracted packets (do not account before
-                        * and after since they are not visible in the
-                        * resulting snapshot).
-                        */
-                       stream->chan->lost_packets += lost_packet;
-                       lost_packet = 0;
-                       before_first_packet = false;
                }
 
                /* Simply close the stream so we can use it on the next snapshot. */
@@ -2273,10 +2287,10 @@ int lttng_ustconsumer_sync_metadata(struct lttng_consumer_local_data *ctx,
         * because we locked the metadata thread.
         */
        ret = lttng_ustconsumer_request_metadata(ctx, metadata->chan, 0, 0);
+       pthread_mutex_lock(&metadata->lock);
        if (ret < 0) {
                goto end;
        }
-       pthread_mutex_lock(&metadata->lock);
 
        ret = commit_one_metadata_packet(metadata);
        if (ret <= 0) {
@@ -2496,6 +2510,8 @@ retry:
                index.offset = htobe64(stream->out_fd_offset);
                ret = get_index_values(&index, ustream);
                if (ret < 0) {
+                       err = ustctl_put_subbuf(ustream);
+                       assert(err == 0);
                        goto end;
                }
 
@@ -2503,6 +2519,8 @@ retry:
                ret = update_stream_stats(stream);
                if (ret < 0) {
                        PERROR("kernctl_get_events_discarded");
+                       err = ustctl_put_subbuf(ustream);
+                       assert(err == 0);
                        goto end;
                }
        } else {
@@ -2620,14 +2638,17 @@ int lttng_ustconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
                stream->tracefile_size_current = 0;
 
                if (!stream->metadata_flag) {
-                       ret = index_create_file(stream->chan->pathname,
+                       struct lttng_index_file *index_file;
+
+                       index_file = lttng_index_file_create(stream->chan->pathname,
                                        stream->name, stream->uid, stream->gid,
                                        stream->chan->tracefile_size,
-                                       stream->tracefile_count_current);
-                       if (ret < 0) {
+                                       stream->tracefile_count_current,
+                                       CTF_INDEX_MAJOR, CTF_INDEX_MINOR);
+                       if (!index_file) {
                                goto error;
                        }
-                       stream->index_fd = ret;
+                       stream->index_file = index_file;
                }
        }
        ret = 0;
This page took 0.025592 seconds and 4 git commands to generate.