Fix: perform local data pending before checking data pending with relayd

[lttng-tools.git] / src / common / consumer / consumer.c
diff --git a/src/common/consumer/consumer.c b/src/common/consumer/consumer.c

index 2897fb855dde0844be2984df3278818be910eb56..53a50efbb922602069a6a7ab2418310f6b979ff7 100644 (file)
--- a/src/common/consumer/consumer.c
+++ b/src/common/consumer/consumer.c
@@ -47,6 +47,7 @@
  #include <common/consumer/consumer-stream.h>
  #include <common/consumer/consumer-testpoint.h>
  #include <common/align.h>
+#include <common/consumer/consumer-metadata-cache.h>
  
  struct lttng_consumer_global_data consumer_data = {
         .stream_count = 0,
@@ -319,6 +320,7 @@ static void free_relayd_rcu(struct rcu_head *head)
         (void) relayd_close(&relayd->control_sock);
         (void) relayd_close(&relayd->data_sock);
  
+       pthread_mutex_destroy(&relayd->ctrl_sock_mutex);
         free(relayd);
  }
  
@@ -457,14 +459,13 @@ static void update_endpoint_status_by_netidx(uint64_t net_seq_idx,
   * If a local data context is available, notify the threads that the streams'
   * state have changed.
   */
-static void cleanup_relayd(struct consumer_relayd_sock_pair *relayd,
-               struct lttng_consumer_local_data *ctx)
+void lttng_consumer_cleanup_relayd(struct consumer_relayd_sock_pair *relayd)
  {
         uint64_t netidx;
  
         assert(relayd);
  
-       DBG("Cleaning up relayd sockets");
+       DBG("Cleaning up relayd object ID %"PRIu64, relayd->net_seq_idx);
  
         /* Save the net sequence index before destroying the object */
         netidx = relayd->net_seq_idx;
@@ -484,10 +485,8 @@ static void cleanup_relayd(struct consumer_relayd_sock_pair *relayd,
          * memory barrier ordering the updates of the end point status from the
          * read of this status which happens AFTER receiving this notify.
          */
-       if (ctx) {
-               notify_thread_lttng_pipe(ctx->consumer_data_pipe);
-               notify_thread_lttng_pipe(ctx->consumer_metadata_pipe);
-       }
+       notify_thread_lttng_pipe(relayd->ctx->consumer_data_pipe);
+       notify_thread_lttng_pipe(relayd->ctx->consumer_metadata_pipe);
  }
  
  /*
@@ -570,7 +569,8 @@ struct lttng_consumer_stream *consumer_allocate_stream(uint64_t channel_key,
         stream->session_id = session_id;
         stream->monitor = monitor;
         stream->endpoint_status = CONSUMER_ENDPOINT_ACTIVE;
-       stream->index_fd = -1;
+       stream->index_file = NULL;
+       stream->last_sequence_number = -1ULL;
         pthread_mutex_init(&stream->lock, NULL);
         pthread_mutex_init(&stream->metadata_timer_lock, NULL);
  
@@ -794,6 +794,8 @@ int consumer_send_relayd_stream(struct lttng_consumer_stream *stream,
                                 stream->chan->tracefile_size, stream->chan->tracefile_count);
                 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                 if (ret < 0) {
+                       ERR("Relayd add stream failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+                       lttng_consumer_cleanup_relayd(relayd);
                         goto end;
                 }
  
@@ -835,6 +837,8 @@ int consumer_send_relayd_streams_sent(uint64_t net_seq_idx)
                 ret = relayd_streams_sent(&relayd->control_sock);
                 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                 if (ret < 0) {
+                       ERR("Relayd streams sent failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+                       lttng_consumer_cleanup_relayd(relayd);
                         goto end;
                 }
         } else {
@@ -1020,7 +1024,7 @@ struct lttng_consumer_channel *consumer_allocate_channel(uint64_t key,
  
         CDS_INIT_LIST_HEAD(&channel->streams.head);
  
-       DBG("Allocated channel (key %" PRIu64 ")", channel->key)
+       DBG("Allocated channel (key %" PRIu64 ")", channel->key);
  
  end:
         return channel;
@@ -1069,7 +1073,7 @@ int consumer_add_channel(struct lttng_consumer_channel *channel,
   */
  static int update_poll_array(struct lttng_consumer_local_data *ctx,
                 struct pollfd **pollfd, struct lttng_consumer_stream **local_stream,
-               struct lttng_ht *ht)
+               struct lttng_ht *ht, int *nb_inactive_fd)
  {
         int i = 0;
         struct lttng_ht_iter iter;
@@ -1081,6 +1085,7 @@ static int update_poll_array(struct lttng_consumer_local_data *ctx,
         assert(local_stream);
  
         DBG("Updating poll fd array");
+       *nb_inactive_fd = 0;
         rcu_read_lock();
         cds_lfht_for_each_entry(ht->ht, &iter.iter, stream, node.node) {
                 /*
@@ -1091,9 +1096,14 @@ static int update_poll_array(struct lttng_consumer_local_data *ctx,
                  * just after the check. However, this is OK since the stream(s) will
                  * be deleted once the thread is notified that the end point state has
                  * changed where this function will be called back again.
+                *
+                * We track the number of inactive FDs because they still need to be
+                * closed by the polling thread after a wakeup on the data_pipe or
+                * metadata_pipe.
                  */
                 if (stream->state != LTTNG_CONSUMER_ACTIVE_STREAM ||
                                 stream->endpoint_status == CONSUMER_ENDPOINT_INACTIVE) {
+                       (*nb_inactive_fd)++;
                         continue;
                 }
                 /*
@@ -1228,9 +1238,15 @@ void lttng_consumer_should_exit(struct lttng_consumer_local_data *ctx)
         DBG("Consumer flag that it should quit");
  }
  
+
+/*
+ * Flush pending writes to trace output disk file.
+ */
+static
  void lttng_consumer_sync_trace_file(struct lttng_consumer_stream *stream,
                 off_t orig_offset)
  {
+       int ret;
         int outfd = stream->out_fd;
  
         /*
@@ -1261,8 +1277,12 @@ void lttng_consumer_sync_trace_file(struct lttng_consumer_stream *stream,
          * defined. So it can be expected to lead to lower throughput in
          * streaming.
          */
-       posix_fadvise(outfd, orig_offset - stream->max_sb_size,
+       ret = posix_fadvise(outfd, orig_offset - stream->max_sb_size,
                         stream->max_sb_size, POSIX_FADV_DONTNEED);
+       if (ret && ret != -ENOSYS) {
+               errno = ret;
+               PERROR("posix_fadvise on fd %i", outfd);
+       }
  }
  
  /*
@@ -1519,7 +1539,6 @@ ssize_t lttng_consumer_on_read_subbuffer_mmap(
                 mmap_base = stream->mmap_base;
                 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
                 if (ret < 0) {
-                       ret = -errno;
                         PERROR("tracer ctl get_mmap_read_offset");
                         goto end;
                 }
@@ -1555,6 +1574,16 @@ ssize_t lttng_consumer_on_read_subbuffer_mmap(
                 if (stream->metadata_flag) {
                         /* Metadata requires the control socket. */
                         pthread_mutex_lock(&relayd->ctrl_sock_mutex);
+                       if (stream->reset_metadata_flag) {
+                               ret = relayd_reset_metadata(&relayd->control_sock,
+                                               stream->relayd_stream_id,
+                                               stream->metadata_version);
+                               if (ret < 0) {
+                                       relayd_hang_up = 1;
+                                       goto write_error;
+                               }
+                               stream->reset_metadata_flag = 0;
+                       }
                         netlen += sizeof(struct lttcomm_relayd_metadata_payload);
                 }
  
@@ -1578,6 +1607,15 @@ ssize_t lttng_consumer_on_read_subbuffer_mmap(
                 /* No streaming, we have to set the len with the full padding */
                 len += padding;
  
+               if (stream->metadata_flag && stream->reset_metadata_flag) {
+                       ret = utils_truncate_stream_file(stream->out_fd, 0);
+                       if (ret < 0) {
+                               ERR("Reset metadata file");
+                               goto end;
+                       }
+                       stream->reset_metadata_flag = 0;
+               }
+
                 /*
                  * Check if we need to change the tracefile before writing the packet.
                  */
@@ -1595,15 +1633,16 @@ ssize_t lttng_consumer_on_read_subbuffer_mmap(
                         }
                         outfd = stream->out_fd;
  
-                       if (stream->index_fd >= 0) {
-                               ret = index_create_file(stream->chan->pathname,
+                       if (stream->index_file) {
+                               lttng_index_file_put(stream->index_file);
+                               stream->index_file = lttng_index_file_create(stream->chan->pathname,
                                                 stream->name, stream->uid, stream->gid,
                                                 stream->chan->tracefile_size,
-                                               stream->tracefile_count_current);
-                               if (ret < 0) {
+                                               stream->tracefile_count_current,
+                                               CTF_INDEX_MAJOR, CTF_INDEX_MINOR);
+                               if (!stream->index_file) {
                                         goto end;
                                 }
-                               stream->index_fd = ret;
                         }
  
                         /* Reset current size because we just perform a rotation. */
@@ -1656,8 +1695,8 @@ ssize_t lttng_consumer_on_read_subbuffer_mmap(
                 lttng_sync_file_range(outfd, stream->out_fd_offset, len,
                                 SYNC_FILE_RANGE_WRITE);
                 stream->out_fd_offset += len;
+               lttng_consumer_sync_trace_file(stream, orig_offset);
         }
-       lttng_consumer_sync_trace_file(stream, orig_offset);
  
  write_error:
         /*
@@ -1665,7 +1704,8 @@ write_error:
          * cleanup the relayd object and all associated streams.
          */
         if (relayd && relayd_hang_up) {
-               cleanup_relayd(relayd, ctx);
+               ERR("Relayd hangup. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+               lttng_consumer_cleanup_relayd(relayd);
         }
  
  end:
@@ -1737,6 +1777,16 @@ ssize_t lttng_consumer_on_read_subbuffer_splice(
                          */
                         pthread_mutex_lock(&relayd->ctrl_sock_mutex);
  
+                       if (stream->reset_metadata_flag) {
+                               ret = relayd_reset_metadata(&relayd->control_sock,
+                                               stream->relayd_stream_id,
+                                               stream->metadata_version);
+                               if (ret < 0) {
+                                       relayd_hang_up = 1;
+                                       goto write_error;
+                               }
+                               stream->reset_metadata_flag = 0;
+                       }
                         ret = write_relayd_metadata_id(splice_pipe[1], stream, relayd,
                                         padding);
                         if (ret < 0) {
@@ -1760,6 +1810,14 @@ ssize_t lttng_consumer_on_read_subbuffer_splice(
                 /* No streaming, we have to set the len with the full padding */
                 len += padding;
  
+               if (stream->metadata_flag && stream->reset_metadata_flag) {
+                       ret = utils_truncate_stream_file(stream->out_fd, 0);
+                       if (ret < 0) {
+                               ERR("Reset metadata file");
+                               goto end;
+                       }
+                       stream->reset_metadata_flag = 0;
+               }
                 /*
                  * Check if we need to change the tracefile before writing the packet.
                  */
@@ -1778,16 +1836,16 @@ ssize_t lttng_consumer_on_read_subbuffer_splice(
                         }
                         outfd = stream->out_fd;
  
-                       if (stream->index_fd >= 0) {
-                               ret = index_create_file(stream->chan->pathname,
+                       if (stream->index_file) {
+                               lttng_index_file_put(stream->index_file);
+                               stream->index_file = lttng_index_file_create(stream->chan->pathname,
                                                 stream->name, stream->uid, stream->gid,
                                                 stream->chan->tracefile_size,
-                                               stream->tracefile_count_current);
-                               if (ret < 0) {
-                                       written = ret;
+                                               stream->tracefile_count_current,
+                                               CTF_INDEX_MAJOR, CTF_INDEX_MINOR);
+                               if (!stream->index_file) {
                                         goto end;
                                 }
-                               stream->index_fd = ret;
                         }
  
                         /* Reset current size because we just perform a rotation. */
@@ -1862,7 +1920,9 @@ ssize_t lttng_consumer_on_read_subbuffer_splice(
                 stream->output_written += ret_splice;
                 written += ret_splice;
         }
-       lttng_consumer_sync_trace_file(stream, orig_offset);
+       if (!relayd) {
+               lttng_consumer_sync_trace_file(stream, orig_offset);
+       }
         goto end;
  
  write_error:
@@ -1871,7 +1931,8 @@ write_error:
          * cleanup the relayd object and all associated streams.
          */
         if (relayd && relayd_hang_up) {
-               cleanup_relayd(relayd, ctx);
+               ERR("Relayd hangup. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+               lttng_consumer_cleanup_relayd(relayd);
                 /* Skip splice error so the consumer does not fail */
                 goto end;
         }
@@ -2002,6 +2063,10 @@ void consumer_del_metadata_stream(struct lttng_consumer_stream *stream,
         pthread_mutex_lock(&consumer_data.lock);
         pthread_mutex_lock(&stream->chan->lock);
         pthread_mutex_lock(&stream->lock);
+       if (stream->chan->metadata_cache) {
+               /* Only applicable to userspace consumers. */
+               pthread_mutex_lock(&stream->chan->metadata_cache->lock);
+       }
  
         /* Remove any reference to that stream. */
         consumer_stream_delete(stream, ht);
@@ -2025,6 +2090,9 @@ void consumer_del_metadata_stream(struct lttng_consumer_stream *stream,
          */
         stream->chan->metadata_stream = NULL;
  
+       if (stream->chan->metadata_cache) {
+               pthread_mutex_unlock(&stream->chan->metadata_cache->lock);
+       }
         pthread_mutex_unlock(&stream->lock);
         pthread_mutex_unlock(&stream->chan->lock);
         pthread_mutex_unlock(&consumer_data.lock);
@@ -2087,7 +2155,7 @@ int consumer_add_metadata_stream(struct lttng_consumer_stream *stream)
  
         lttng_ht_add_unique_u64(ht, &stream->node);
  
-       lttng_ht_add_unique_u64(consumer_data.stream_per_chan_id_ht,
+       lttng_ht_add_u64(consumer_data.stream_per_chan_id_ht,
                 &stream->node_channel_id);
  
         /*
@@ -2211,10 +2279,10 @@ restart:
                 DBG("Metadata poll return from wait with %d fd(s)",
                                 LTTNG_POLL_GETNB(&events));
                 health_poll_exit();
-               DBG("Metadata event catched in thread");
+               DBG("Metadata event caught in thread");
                 if (ret < 0) {
                         if (errno == EINTR) {
-                               ERR("Poll EINTR catched");
+                               ERR("Poll EINTR caught");
                                 goto restart;
                         }
                         if (LTTNG_POLL_GETNB(&events) == 0) {
@@ -2312,7 +2380,7 @@ restart:
                                         len = ctx->on_buffer_ready(stream, ctx);
                                         /*
                                          * We don't check the return value here since if we get
-                                        * a negative len, it means an error occured thus we
+                                        * a negative len, it means an error occurred thus we
                                          * simply remove it from the poll set and free the
                                          * stream.
                                          */
@@ -2339,7 +2407,7 @@ restart:
                                                 len = ctx->on_buffer_ready(stream, ctx);
                                                 /*
                                                  * We don't check the return value here since if we get
-                                                * a negative len, it means an error occured thus we
+                                                * a negative len, it means an error occurred thus we
                                                  * simply remove it from the poll set and free the
                                                  * stream.
                                                  */
@@ -2391,6 +2459,8 @@ void *consumer_thread_data_poll(void *data)
         struct lttng_consumer_stream **local_stream = NULL, *new_stream = NULL;
         /* local view of consumer_data.fds_count */
         int nb_fd = 0;
+       /* Number of FDs with CONSUMER_ENDPOINT_INACTIVE but still open. */
+       int nb_inactive_fd = 0;
         struct lttng_consumer_local_data *ctx = data;
         ssize_t len;
  
@@ -2447,7 +2517,7 @@ void *consumer_thread_data_poll(void *data)
                                 goto end;
                         }
                         ret = update_poll_array(ctx, &pollfd, local_stream,
-                                       data_ht);
+                                       data_ht, &nb_inactive_fd);
                         if (ret < 0) {
                                 ERR("Error in allocating pollfd or local_outfds");
                                 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_POLL_ERROR);
@@ -2460,7 +2530,7 @@ void *consumer_thread_data_poll(void *data)
                 pthread_mutex_unlock(&consumer_data.lock);
  
                 /* No FDs and consumer_quit, consumer_cleanup the thread */
-               if (nb_fd == 0 && consumer_quit == 1) {
+               if (nb_fd == 0 && consumer_quit == 1 && nb_inactive_fd == 0) {
                         err = 0;        /* All is OK */
                         goto end;
                 }
@@ -2794,10 +2864,10 @@ restart:
                 DBG("Channel poll return from wait with %d fd(s)",
                                 LTTNG_POLL_GETNB(&events));
                 health_poll_exit();
-               DBG("Channel event catched in thread");
+               DBG("Channel event caught in thread");
                 if (ret < 0) {
                         if (errno == EINTR) {
-                               ERR("Poll EINTR catched");
+                               ERR("Poll EINTR caught");
                                 goto restart;
                         }
                         if (LTTNG_POLL_GETNB(&events) == 0) {
@@ -3442,6 +3512,7 @@ int consumer_add_relayd_socket(uint64_t net_seq_idx, int sock_type,
          * Add relayd socket pair to consumer data hashtable. If object already
          * exists or on error, the function gracefully returns.
          */
+       relayd->ctx = ctx;
         add_relayd(relayd);
  
         /* All good! */
@@ -3467,34 +3538,6 @@ error_nosignal:
         return ret;
  }
  
-/*
- * Try to lock the stream mutex.
- *
- * On success, 1 is returned else 0 indicating that the mutex is NOT lock.
- */
-static int stream_try_lock(struct lttng_consumer_stream *stream)
-{
-       int ret;
-
-       assert(stream);
-
-       /*
-        * Try to lock the stream mutex. On failure, we know that the stream is
-        * being used else where hence there is data still being extracted.
-        */
-       ret = pthread_mutex_trylock(&stream->lock);
-       if (ret) {
-               /* For both EBUSY and EINVAL error, the mutex is NOT locked. */
-               ret = 0;
-               goto end;
-       }
-
-       ret = 1;
-
-end:
-       return ret;
-}
-
  /*
   * Search for a relayd associated to the session id and return the reference.
   *
@@ -3561,28 +3604,11 @@ int consumer_data_pending(uint64_t id)
         /* Ease our life a bit */
         ht = consumer_data.stream_list_ht;
  
-       relayd = find_relayd_by_session_id(id);
-       if (relayd) {
-               /* Send init command for data pending. */
-               pthread_mutex_lock(&relayd->ctrl_sock_mutex);
-               ret = relayd_begin_data_pending(&relayd->control_sock,
-                               relayd->relayd_session_id);
-               pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
-               if (ret < 0) {
-                       /* Communication error thus the relayd so no data pending. */
-                       goto data_not_pending;
-               }
-       }
-
         cds_lfht_for_each_entry_duplicate(ht->ht,
                         ht->hash_fct(&id, lttng_ht_seed),
                         ht->match_fct, &id,
                         &iter.iter, stream, node_session_id.node) {
-               /* If this call fails, the stream is being used hence data pending. */
-               ret = stream_try_lock(stream);
-               if (!ret) {
-                       goto data_pending;
-               }
+               pthread_mutex_lock(&stream->lock);
  
                 /*
                  * A removed node from the hash table indicates that the stream has
@@ -3600,9 +3626,27 @@ int consumer_data_pending(uint64_t id)
                         }
                 }
  
-               /* Relayd check */
-               if (relayd) {
-                       pthread_mutex_lock(&relayd->ctrl_sock_mutex);
+               pthread_mutex_unlock(&stream->lock);
+       }
+
+       relayd = find_relayd_by_session_id(id);
+       if (relayd) {
+               unsigned int is_data_inflight = 0;
+
+               /* Send init command for data pending. */
+               pthread_mutex_lock(&relayd->ctrl_sock_mutex);
+               ret = relayd_begin_data_pending(&relayd->control_sock,
+                               relayd->relayd_session_id);
+               if (ret < 0) {
+                       pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
+                       /* Communication error thus the relayd so no data pending. */
+                       goto data_not_pending;
+               }
+
+               cds_lfht_for_each_entry_duplicate(ht->ht,
+                               ht->hash_fct(&id, lttng_ht_seed),
+                               ht->match_fct, &id,
+                               &iter.iter, stream, node_session_id.node) {
                         if (stream->metadata_flag) {
                                 ret = relayd_quiescent_control(&relayd->control_sock,
                                                 stream->relayd_stream_id);
@@ -3611,24 +3655,25 @@ int consumer_data_pending(uint64_t id)
                                                 stream->relayd_stream_id,
                                                 stream->next_net_seq_num - 1);
                         }
-                       pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
+
                         if (ret == 1) {
-                               pthread_mutex_unlock(&stream->lock);
+                               pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                                 goto data_pending;
+                       } else if (ret < 0) {
+                               ERR("Relayd data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+                               lttng_consumer_cleanup_relayd(relayd);
+                               pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
+                               goto data_not_pending;
                         }
                 }
-               pthread_mutex_unlock(&stream->lock);
-       }
-
-       if (relayd) {
-               unsigned int is_data_inflight = 0;
  
-               /* Send init command for data pending. */
-               pthread_mutex_lock(&relayd->ctrl_sock_mutex);
+               /* Send end command for data pending. */
                 ret = relayd_end_data_pending(&relayd->control_sock,
                                 relayd->relayd_session_id, &is_data_inflight);
                 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                 if (ret < 0) {
+                       ERR("Relayd end data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+                       lttng_consumer_cleanup_relayd(relayd);
                         goto data_not_pending;
                 }
                 if (is_data_inflight) {