Fix: consumer: snapshot: assertion on subsequent snapshot
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
index dd5cf1761a4992c9e7384d48675038c5438ece0e..530a518c8e29451cb158f7d7a474b31ea6cbb2af 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 Julien Desfossez <julien.desfossez@polymtl.ca>
+ * Copyright (C) 2011 EfficiOS Inc.
  * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  * Copyright (C) 2017 Jérémie Galarneau <jeremie.galarneau@efficios.com>
  *
@@ -136,10 +136,34 @@ error:
        return ret;
 }
 
+static void finalize_snapshot_stream(
+               struct lttng_consumer_stream *stream, uint64_t relayd_id)
+{
+       ASSERT_LOCKED(stream->lock);
+
+       if (relayd_id == (uint64_t) -1ULL) {
+               if (stream->out_fd >= 0) {
+                       const int ret = close(stream->out_fd);
+
+                       if (ret < 0) {
+                               PERROR("Failed to close stream snapshot output file descriptor");
+                       }
+
+                       stream->out_fd = -1;
+               }
+       } else {
+               close_relayd_stream(stream);
+               stream->net_seq_idx = (uint64_t) -1ULL;
+       }
+
+       lttng_trace_chunk_put(stream->trace_chunk);
+       stream->trace_chunk = NULL;
+}
+
 /*
  * Take a snapshot of all the stream of a channel
  * RCU read-side lock must be held across this function to ensure existence of
- * channel. The channel lock must be held by the caller.
+ * channel.
  *
  * Returns 0 on success, < 0 on error
  */
@@ -154,6 +178,9 @@ static int lttng_kconsumer_snapshot_channel(
 
        DBG("Kernel consumer snapshot channel %" PRIu64, key);
 
+       /* Prevent channel modifications while we perform the snapshot.*/
+       pthread_mutex_lock(&channel->lock);
+
        rcu_read_lock();
 
        /* Splice is not supported yet for channel snapshot. */
@@ -197,13 +224,13 @@ static int lttng_kconsumer_snapshot_channel(
                        ret = consumer_send_relayd_stream(stream, path);
                        if (ret < 0) {
                                ERR("sending stream to relayd");
-                               goto end_unlock;
+                               goto error_finalize_stream;
                        }
                } else {
                        ret = consumer_stream_create_output_files(stream,
                                        false);
                        if (ret < 0) {
-                               goto end_unlock;
+                               goto error_finalize_stream;
                        }
                        DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
                                        stream->key);
@@ -221,7 +248,7 @@ static int lttng_kconsumer_snapshot_channel(
                        ret = kernctl_buffer_flush(stream->wait_fd);
                        if (ret < 0) {
                                ERR("Failed to flush kernel stream");
-                               goto end_unlock;
+                               goto error_finalize_stream;
                        }
                        goto end_unlock;
                }
@@ -229,19 +256,19 @@ static int lttng_kconsumer_snapshot_channel(
                ret = lttng_kconsumer_take_snapshot(stream);
                if (ret < 0) {
                        ERR("Taking kernel snapshot");
-                       goto end_unlock;
+                       goto error_finalize_stream;
                }
 
                ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
                if (ret < 0) {
                        ERR("Produced kernel snapshot position");
-                       goto end_unlock;
+                       goto error_finalize_stream;
                }
 
                ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
                if (ret < 0) {
                        ERR("Consumerd kernel snapshot position");
-                       goto end_unlock;
+                       goto error_finalize_stream;
                }
 
                consumed_pos = consumer_get_consume_start_pos(consumed_pos,
@@ -261,7 +288,7 @@ static int lttng_kconsumer_snapshot_channel(
                        if (ret < 0) {
                                if (ret != -EAGAIN) {
                                        PERROR("kernctl_get_subbuf snapshot");
-                                       goto end_unlock;
+                                       goto error_finalize_stream;
                                }
                                DBG("Kernel consumer get subbuf failed. Skipping it.");
                                consumed_pos += stream->max_sb_size;
@@ -311,26 +338,12 @@ static int lttng_kconsumer_snapshot_channel(
                        ret = kernctl_put_subbuf(stream->wait_fd);
                        if (ret < 0) {
                                ERR("Snapshot kernctl_put_subbuf");
-                               goto end_unlock;
+                               goto error_finalize_stream;
                        }
                        consumed_pos += stream->max_sb_size;
                }
 
-               if (relayd_id == (uint64_t) -1ULL) {
-                       if (stream->out_fd >= 0) {
-                               ret = close(stream->out_fd);
-                               if (ret < 0) {
-                                       PERROR("Kernel consumer snapshot close out_fd");
-                                       goto end_unlock;
-                               }
-                               stream->out_fd = -1;
-                       }
-               } else {
-                       close_relayd_stream(stream);
-                       stream->net_seq_idx = (uint64_t) -1ULL;
-               }
-               lttng_trace_chunk_put(stream->trace_chunk);
-               stream->trace_chunk = NULL;
+               finalize_snapshot_stream(stream, relayd_id);
                pthread_mutex_unlock(&stream->lock);
        }
 
@@ -343,17 +356,20 @@ error_put_subbuf:
        if (ret < 0) {
                ERR("Snapshot kernctl_put_subbuf error path");
        }
+error_finalize_stream:
+       finalize_snapshot_stream(stream, relayd_id);
 end_unlock:
        pthread_mutex_unlock(&stream->lock);
 end:
        rcu_read_unlock();
+       pthread_mutex_unlock(&channel->lock);
        return ret;
 }
 
 /*
  * Read the whole metadata available for a snapshot.
  * RCU read-side lock must be held across this function to ensure existence of
- * metadata_channel. The channel lock must be held by the caller.
+ * metadata_channel.
  *
  * Returns 0 on success, < 0 on error
  */
@@ -376,7 +392,8 @@ static int lttng_kconsumer_snapshot_metadata(
        metadata_stream = metadata_channel->metadata_stream;
        assert(metadata_stream);
 
-       pthread_mutex_lock(&metadata_stream->lock);
+       /* Take all the appropriate locks hehehe.*/
+       metadata_stream->read_subbuffer_ops.lock(metadata_stream);
        assert(metadata_channel->trace_chunk);
        assert(metadata_stream->trace_chunk);
 
@@ -403,16 +420,12 @@ static int lttng_kconsumer_snapshot_metadata(
 
                ret_read = lttng_consumer_read_subbuffer(metadata_stream, ctx, true);
                if (ret_read < 0) {
-                       if (ret_read != -EAGAIN) {
-                               ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
-                                               ret_read);
-                               ret = ret_read;
-                               goto error_snapshot;
-                       }
-                       /* ret_read is negative at this point so we will exit the loop. */
-                       continue;
+                       ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
+                                       ret_read);
+                       ret = ret_read;
+                       goto error_snapshot;
                }
-       } while (ret_read >= 0);
+       } while (ret_read > 0);
 
        if (use_relayd) {
                close_relayd_stream(metadata_stream);
@@ -435,8 +448,7 @@ static int lttng_kconsumer_snapshot_metadata(
 
        ret = 0;
 error_snapshot:
-       pthread_mutex_unlock(&metadata_stream->lock);
-       cds_list_del(&metadata_stream->send_node);
+       metadata_stream->read_subbuffer_ops.unlock(metadata_stream);
        consumer_stream_destroy(metadata_stream, NULL);
        metadata_channel->metadata_stream = NULL;
        rcu_read_unlock();
@@ -479,11 +491,17 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
        switch (msg.cmd_type) {
        case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
        {
+               uint32_t major = msg.u.relayd_sock.major;
+               uint32_t minor = msg.u.relayd_sock.minor;
+               enum lttcomm_sock_proto protocol =
+                               msg.u.relayd_sock.relayd_socket_protocol;
+
                /* Session daemon status message are handled in the following call. */
                consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
-                               msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
-                               &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
-                               msg.u.relayd_sock.relayd_session_id);
+                               msg.u.relayd_sock.type, ctx, sock,
+                               consumer_sockpoll, msg.u.relayd_sock.session_id,
+                               msg.u.relayd_sock.relayd_session_id, major,
+                               minor, protocol);
                goto end_nosignal;
        }
        case LTTNG_CONSUMER_ADD_CHANNEL:
@@ -939,7 +957,6 @@ error_streams_sent_nosignal:
                        ERR("Channel %" PRIu64 " not found", key);
                        ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
                } else {
-                       pthread_mutex_lock(&channel->lock);
                        if (msg.u.snapshot_channel.metadata == 1) {
                                ret = lttng_kconsumer_snapshot_metadata(channel, key,
                                                msg.u.snapshot_channel.pathname,
@@ -959,7 +976,6 @@ error_streams_sent_nosignal:
                                        ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
                                }
                        }
-                       pthread_mutex_unlock(&channel->lock);
                }
                health_code_update();
 
@@ -1311,6 +1327,24 @@ error_rotate_channel:
                                msg.u.trace_chunk_exists.chunk_id);
                goto end_msg_sessiond;
        }
+       case LTTNG_CONSUMER_OPEN_CHANNEL_PACKETS:
+       {
+               const uint64_t key = msg.u.open_channel_packets.key;
+               struct lttng_consumer_channel *channel =
+                               consumer_find_channel(key);
+
+               if (channel) {
+                       pthread_mutex_lock(&channel->lock);
+                       ret_code = lttng_consumer_open_channel_packets(channel);
+                       pthread_mutex_unlock(&channel->lock);
+               } else {
+                       WARN("Channel %" PRIu64 " not found", key);
+                       ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
+               }
+
+               health_code_update();
+               goto end_msg_sessiond;
+       }
        default:
                goto end_nosignal;
        }
@@ -1348,36 +1382,38 @@ end:
  * metadata thread can consumer them.
  *
  * Metadata stream lock MUST be acquired.
- *
- * Return 0 if new metadatda is available, EAGAIN if the metadata stream
- * is empty or a negative value on error.
  */
-int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
+enum sync_metadata_status lttng_kconsumer_sync_metadata(
+               struct lttng_consumer_stream *metadata)
 {
        int ret;
+       enum sync_metadata_status status;
 
        assert(metadata);
 
        ret = kernctl_buffer_flush(metadata->wait_fd);
        if (ret < 0) {
                ERR("Failed to flush kernel stream");
+               status = SYNC_METADATA_STATUS_ERROR;
                goto end;
        }
 
        ret = kernctl_snapshot(metadata->wait_fd);
        if (ret < 0) {
-               if (ret != -EAGAIN) {
+               if (errno == EAGAIN) {
+                       /* No new metadata, exit. */
+                       DBG("Sync metadata, no new kernel metadata");
+                       status = SYNC_METADATA_STATUS_NO_DATA;
+               } else {
                        ERR("Sync metadata, taking kernel snapshot failed.");
-                       goto end;
+                       status = SYNC_METADATA_STATUS_ERROR;
                }
-               DBG("Sync metadata, no new kernel metadata");
-               /* No new metadata, exit. */
-               ret = ENODATA;
-               goto end;
+       } else {
+               status = SYNC_METADATA_STATUS_NEW_DATA;
        }
 
 end:
-       return ret;
+       return status;
 }
 
 static
@@ -1511,6 +1547,17 @@ int get_subbuffer_common(struct lttng_consumer_stream *stream,
 
        ret = kernctl_get_next_subbuf(stream->wait_fd);
        if (ret) {
+               /*
+                * The caller only expects -ENODATA when there is no data to
+                * read, but the kernel tracer returns -EAGAIN when there is
+                * currently no data for a non-finalized stream, and -ENODATA
+                * when there is no data for a finalized stream. Those can be
+                * combined into a -ENODATA return value.
+                */
+               if (ret == -EAGAIN) {
+                       ret = -ENODATA;
+               }
+
                goto end;
        }
 
@@ -1592,6 +1639,16 @@ int get_next_subbuffer_metadata_check(struct lttng_consumer_stream *stream,
                        subbuffer->info.metadata.padded_subbuf_size,
                        coherent ? "true" : "false");
 end:
+       /*
+        * The caller only expects -ENODATA when there is no data to read, but
+        * the kernel tracer returns -EAGAIN when there is currently no data
+        * for a non-finalized stream, and -ENODATA when there is no data for a
+        * finalized stream. Those can be combined into a -ENODATA return value.
+        */
+       if (ret == -EAGAIN) {
+               ret = -ENODATA;
+       }
+
        return ret;
 }
 
@@ -1616,8 +1673,23 @@ int put_next_subbuffer(struct lttng_consumer_stream *stream,
 static
 bool is_get_next_check_metadata_available(int tracer_fd)
 {
-       return kernctl_get_next_subbuf_metadata_check(tracer_fd, NULL) !=
-                       -ENOTTY;
+       const int ret = kernctl_get_next_subbuf_metadata_check(tracer_fd, NULL);
+       const bool available = ret != -ENOTTY;
+
+       if (ret == 0) {
+               /* get succeeded, make sure to put the subbuffer. */
+               kernctl_put_subbuf(tracer_fd);
+       }
+
+       return available;
+}
+
+static
+int signal_metadata(struct lttng_consumer_stream *stream,
+               struct lttng_consumer_local_data *ctx)
+{
+       ASSERT_LOCKED(stream->metadata_rdv_lock);
+       return pthread_cond_broadcast(&stream->metadata_rdv) ? -errno : 0;
 }
 
 static
@@ -1650,6 +1722,8 @@ int lttng_kconsumer_set_stream_ops(
                        metadata_bucket_destroy(stream->metadata_bucket);
                        stream->metadata_bucket = NULL;
                }
+
+               stream->read_subbuffer_ops.on_sleep = signal_metadata;
        }
 
        if (!stream->read_subbuffer_ops.get_next_subbuffer) {
This page took 0.032568 seconds and 4 git commands to generate.