X-Git-Url: https://git.liburcu.org/?a=blobdiff_plain;f=src%2Fcommon%2Fconsumer%2Fconsumer.c;h=782ace930c0f7c6cb4952c703202a8fcea4b5e1f;hb=56c86e0a3f16a2f7fd3e473dd8c9d753a13e86ee;hp=cbd99e7ec3ab08146142706383085b2933e314ba;hpb=fa2f196b44f1b8fa07671658b8c74f37477c18cc;p=lttng-tools.git diff --git a/src/common/consumer/consumer.c b/src/common/consumer/consumer.c index cbd99e7ec..782ace930 100644 --- a/src/common/consumer/consumer.c +++ b/src/common/consumer/consumer.c @@ -63,6 +63,12 @@ struct consumer_channel_msg { uint64_t key; /* del */ }; +enum open_packet_status { + OPEN_PACKET_STATUS_OPENED, + OPEN_PACKET_STATUS_NO_SPACE, + OPEN_PACKET_STATUS_ERROR, +}; + /* Flag used to temporarily pause data consumption from testpoints. */ int data_consumption_paused; @@ -3310,11 +3316,145 @@ error_testpoint: return NULL; } +static +int consumer_flush_buffer(struct lttng_consumer_stream *stream, + int producer_active) +{ + int ret = 0; + + switch (consumer_data.type) { + case LTTNG_CONSUMER_KERNEL: + if (producer_active) { + ret = kernctl_buffer_flush(stream->wait_fd); + if (ret < 0) { + ERR("Failed to flush kernel stream"); + goto end; + } + } else { + ret = kernctl_buffer_flush_empty(stream->wait_fd); + if (ret < 0) { + /* + * Doing a buffer flush which does not take into + * account empty packets. This is not perfect, + * but required as a fall-back when + * "flush_empty" is not implemented by + * lttng-modules. + */ + ret = kernctl_buffer_flush(stream->wait_fd); + if (ret < 0) { + ERR("Failed to flush kernel stream"); + goto end; + } + } + } + break; + case LTTNG_CONSUMER32_UST: + case LTTNG_CONSUMER64_UST: + lttng_ustconsumer_flush_buffer(stream, producer_active); + break; + default: + ERR("Unknown consumer_data type"); + abort(); + } + +end: + return ret; +} + +static enum open_packet_status open_packet(struct lttng_consumer_stream *stream) +{ + int ret; + enum open_packet_status status; + unsigned long produced_pos_before, produced_pos_after; + + ret = lttng_consumer_sample_snapshot_positions(stream); + if (ret < 0) { + ERR("Failed to snapshot positions before post-rotation empty packet flush: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + status = OPEN_PACKET_STATUS_ERROR; + goto end; + } + + ret = lttng_consumer_get_produced_snapshot( + stream, &produced_pos_before); + if (ret < 0) { + ERR("Failed to read produced position before post-rotation empty packet flush: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + status = OPEN_PACKET_STATUS_ERROR; + goto end; + } + + ret = consumer_flush_buffer(stream, 0); + if (ret) { + ERR("Failed to flush an empty packet at rotation point: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + status = OPEN_PACKET_STATUS_ERROR; + goto end; + } + + ret = lttng_consumer_sample_snapshot_positions(stream); + if (ret < 0) { + ERR("Failed to snapshot positions after post-rotation empty packet flush: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + status = OPEN_PACKET_STATUS_ERROR; + goto end; + } + + ret = lttng_consumer_get_produced_snapshot(stream, &produced_pos_after); + if (ret < 0) { + ERR("Failed to read produced position after post-rotation empty packet flush: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + status = OPEN_PACKET_STATUS_ERROR; + goto end; + } + + /* + * Determine if the flush had an effect by comparing the produced + * positons before and after the flush. + */ + status = produced_pos_before != produced_pos_after ? + OPEN_PACKET_STATUS_OPENED : + OPEN_PACKET_STATUS_NO_SPACE; + if (status == OPEN_PACKET_STATUS_OPENED) { + stream->opened_packet_in_current_trace_chunk = true; + } +end: + return status; +} + +static bool stream_is_rotating_to_null_chunk( + const struct lttng_consumer_stream *stream) +{ + bool rotating_to_null_chunk = false; + + if (stream->rotate_position == -1ULL) { + /* No rotation ongoing. */ + goto end; + } + + if (stream->trace_chunk == stream->chan->trace_chunk || + !stream->chan->trace_chunk) { + rotating_to_null_chunk = true; + } +end: + return rotating_to_null_chunk; +} + ssize_t lttng_consumer_read_subbuffer(struct lttng_consumer_stream *stream, struct lttng_consumer_local_data *ctx, bool locked_by_caller) { - ssize_t ret, written_bytes; + ssize_t ret, written_bytes = 0; int rotation_ret; struct stream_subbuffer subbuffer = {}; @@ -3360,24 +3500,10 @@ ssize_t lttng_consumer_read_subbuffer(struct lttng_consumer_stream *stream, written_bytes = stream->read_subbuffer_ops.consume_subbuffer( ctx, stream, &subbuffer); - /* - * Should write subbuf_size amount of data when network streaming or - * the full padded size when we are not streaming. - */ - if ((written_bytes != subbuffer.info.data.subbuf_size && - stream->net_seq_idx != (uint64_t) -1ULL) || - (written_bytes != subbuffer.info.data.padded_subbuf_size && - stream->net_seq_idx == - (uint64_t) -1ULL)) { - /* - * Display the error but continue processing to try to - * release the subbuffer. This is a DBG statement - * since this can happen without being a critical - * error. - */ - DBG("Failed to write to tracefile (written_bytes: %zd != padded subbuffer size: %lu, subbuffer size: %lu)", - written_bytes, subbuffer.info.data.subbuf_size, - subbuffer.info.data.padded_subbuf_size); + if (written_bytes <= 0) { + ERR("Error consuming subbuffer: (%zd)", written_bytes); + ret = (int) written_bytes; + goto error_put_subbuf; } ret = stream->read_subbuffer_ops.put_next_subbuffer(stream, &subbuffer); @@ -3413,7 +3539,45 @@ ssize_t lttng_consumer_read_subbuffer(struct lttng_consumer_stream *stream, goto end; } -stream_sleep: + /* + * TODO roll into a post_consume op as this doesn't apply to metadata + * streams. + */ + if (!stream->opened_packet_in_current_trace_chunk && + stream->trace_chunk && !stream->metadata_flag && + !stream_is_rotating_to_null_chunk(stream)) { + const enum open_packet_status status = open_packet(stream); + + switch (status) { + case OPEN_PACKET_STATUS_OPENED: + DBG("Opened a packet after consuming a packet rotation: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + break; + case OPEN_PACKET_STATUS_NO_SPACE: + /* + * Can't open a packet as there is no space left. + * This means that new events were produced, resulting + * in a packet being opened, which is what we wanted + * anyhow. + */ + DBG("No space left to open a packet after consuming a packet: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + stream->opened_packet_in_current_trace_chunk = true; + break; + case OPEN_PACKET_STATUS_ERROR: + /* Logged by callee. */ + ret = -1; + goto end; + default: + abort(); + } + } + +sleep_stream: if (stream->read_subbuffer_ops.on_sleep) { stream->read_subbuffer_ops.on_sleep(stream, ctx); } @@ -3901,50 +4065,6 @@ unsigned long consumer_get_consume_start_pos(unsigned long consumed_pos, return start_pos; } -static -int consumer_flush_buffer(struct lttng_consumer_stream *stream, int producer_active) -{ - int ret = 0; - - switch (consumer_data.type) { - case LTTNG_CONSUMER_KERNEL: - if (producer_active) { - ret = kernctl_buffer_flush(stream->wait_fd); - if (ret < 0) { - ERR("Failed to flush kernel stream"); - goto end; - } - } else { - ret = kernctl_buffer_flush_empty(stream->wait_fd); - if (ret < 0) { - /* - * Doing a buffer flush which does not take into - * account empty packets. This is not perfect, - * but required as a fall-back when - * "flush_empty" is not implemented by - * lttng-modules. - */ - ret = kernctl_buffer_flush(stream->wait_fd); - if (ret < 0) { - ERR("Failed to flush kernel stream"); - goto end; - } - } - } - break; - case LTTNG_CONSUMER32_UST: - case LTTNG_CONSUMER64_UST: - lttng_ustconsumer_flush_buffer(stream, producer_active); - break; - default: - ERR("Unknown consumer_data type"); - abort(); - } - -end: - return ret; -} - /* * Sample the rotate position for all the streams of a channel. If a stream * is already at the rotate position (produced == consumed), we flag it as @@ -3968,11 +4088,15 @@ int lttng_consumer_rotate_channel(struct lttng_consumer_channel *channel, const bool is_local_trace = relayd_id == -1ULL; struct consumer_relayd_sock_pair *relayd = NULL; bool rotating_to_new_chunk = true; + /* Array of `struct lttng_consumer_stream *` */ + struct lttng_dynamic_pointer_array streams_packet_to_open; + size_t stream_idx; DBG("Consumer sample rotate position for channel %" PRIu64, key); lttng_dynamic_array_init(&stream_rotation_positions, sizeof(struct relayd_stream_rotation_position), NULL); + lttng_dynamic_pointer_array_init(&streams_packet_to_open, NULL); rcu_read_lock(); @@ -4102,36 +4226,135 @@ int lttng_consumer_rotate_channel(struct lttng_consumer_channel *channel, } stream_count++; } + + stream->opened_packet_in_current_trace_chunk = false; + + if (rotating_to_new_chunk && !stream->metadata_flag) { + /* + * Attempt to flush an empty packet as close to the + * rotation point as possible. In the event where a + * stream remains inactive after the rotation point, + * this ensures that the new trace chunk has a + * beginning timestamp set at the begining of the + * trace chunk instead of only creating an empty + * packet when the trace chunk is stopped. + * + * This indicates to the viewers that the stream + * was being recorded, but more importantly it + * allows viewers to determine a useable trace + * intersection. + * + * This presents a problem in the case where the + * ring-buffer is completely full. + * + * Consider the following scenario: + * - The consumption of data is slow (slow network, + * for instance), + * - The ring buffer is full, + * - A rotation is initiated, + * - The flush below does nothing (no space left to + * open a new packet), + * - The other streams rotate very soon, and new + * data is produced in the new chunk, + * - This stream completes its rotation long after the + * rotation was initiated + * - The session is stopped before any event can be + * produced in this stream's buffers. + * + * The resulting trace chunk will have a single packet + * temporaly at the end of the trace chunk for this + * stream making the stream intersection more narrow + * than it should be. + * + * To work-around this, an empty flush is performed + * after the first consumption of a packet during a + * rotation if open_packet fails. The idea is that + * consuming a packet frees enough space to switch + * packets in this scenario and allows the tracer to + * "stamp" the beginning of the new trace chunk at the + * earliest possible point. + * + * The packet open is performed after the channel + * rotation to ensure that no attempt to open a packet + * is performed in a stream that has no active trace + * chunk. + */ + ret = lttng_dynamic_pointer_array_add_pointer( + &streams_packet_to_open, stream); + if (ret) { + PERROR("Failed to add a stream pointer to array of streams in which to open a packet"); + ret = -1; + goto end_unlock_stream; + } + } + pthread_mutex_unlock(&stream->lock); } stream = NULL; - pthread_mutex_unlock(&channel->lock); - if (is_local_trace) { - ret = 0; - goto end; - } + if (!is_local_trace) { + relayd = consumer_find_relayd(relayd_id); + if (!relayd) { + ERR("Failed to find relayd %" PRIu64, relayd_id); + ret = -1; + goto end_unlock_channel; + } - relayd = consumer_find_relayd(relayd_id); - if (!relayd) { - ERR("Failed to find relayd %" PRIu64, relayd_id); - ret = -1; - goto end; + pthread_mutex_lock(&relayd->ctrl_sock_mutex); + ret = relayd_rotate_streams(&relayd->control_sock, stream_count, + rotating_to_new_chunk ? &next_chunk_id : NULL, + (const struct relayd_stream_rotation_position *) + stream_rotation_positions.buffer + .data); + pthread_mutex_unlock(&relayd->ctrl_sock_mutex); + if (ret < 0) { + ERR("Relayd rotate stream failed. Cleaning up relayd %" PRIu64, + relayd->net_seq_idx); + lttng_consumer_cleanup_relayd(relayd); + goto end_unlock_channel; + } } - pthread_mutex_lock(&relayd->ctrl_sock_mutex); - ret = relayd_rotate_streams(&relayd->control_sock, stream_count, - rotating_to_new_chunk ? &next_chunk_id : NULL, - (const struct relayd_stream_rotation_position *) - stream_rotation_positions.buffer.data); - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); - if (ret < 0) { - ERR("Relayd rotate stream failed. Cleaning up relayd %" PRIu64, - relayd->net_seq_idx); - lttng_consumer_cleanup_relayd(relayd); - goto end; + for (stream_idx = 0; + stream_idx < lttng_dynamic_pointer_array_get_count( + &streams_packet_to_open); + stream_idx++) { + enum open_packet_status status; + + stream = lttng_dynamic_pointer_array_get_pointer( + &streams_packet_to_open, stream_idx); + + pthread_mutex_lock(&stream->lock); + status = open_packet(stream); + pthread_mutex_unlock(&stream->lock); + switch (status) { + case OPEN_PACKET_STATUS_OPENED: + DBG("Opened a packet after a rotation: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + break; + case OPEN_PACKET_STATUS_NO_SPACE: + /* + * Can't open a packet as there is no space left + * in the buffer. A new packet will be opened + * once one has been consumed. + */ + DBG("No space left to open a packet after a rotation: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + break; + case OPEN_PACKET_STATUS_ERROR: + /* Logged by callee. */ + ret = -1; + goto end_unlock_channel; + default: + abort(); + } } + pthread_mutex_unlock(&channel->lock); ret = 0; goto end; @@ -4142,6 +4365,7 @@ end_unlock_channel: end: rcu_read_unlock(); lttng_dynamic_array_reset(&stream_rotation_positions); + lttng_dynamic_pointer_array_reset(&streams_packet_to_open); return ret; } @@ -4957,3 +5181,70 @@ int lttng_consumer_clear_channel(struct lttng_consumer_channel *channel) end: return ret; } + +enum lttcomm_return_code lttng_consumer_open_channel_packets( + struct lttng_consumer_channel *channel) +{ + struct lttng_consumer_stream *stream; + enum lttcomm_return_code ret = LTTCOMM_CONSUMERD_SUCCESS; + + if (channel->metadata_stream) { + ERR("Open channel packets command attempted on a metadata channel"); + ret = LTTCOMM_CONSUMERD_INVALID_PARAMETERS; + goto end; + } + + rcu_read_lock(); + cds_list_for_each_entry(stream, &channel->streams.head, send_node) { + enum open_packet_status status; + + pthread_mutex_lock(&stream->lock); + if (cds_lfht_is_node_deleted(&stream->node.node)) { + goto next; + } + + status = open_packet(stream); + switch (status) { + case OPEN_PACKET_STATUS_OPENED: + DBG("Opened a packet in \"open channel packets\" command: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + stream->opened_packet_in_current_trace_chunk = true; + break; + case OPEN_PACKET_STATUS_NO_SPACE: + DBG("No space left to open a packet in \"open channel packets\" command: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + break; + case OPEN_PACKET_STATUS_ERROR: + /* + * Only unexpected internal errors can lead to this + * failing. Report an unknown error. + */ + ERR("Failed to flush empty buffer in \"open channel packets\" command: stream id = %" PRIu64 + ", channel id = %" PRIu64 + ", channel name = %s" + ", session id = %" PRIu64, + stream->key, channel->key, + channel->name, channel->session_id); + ret = LTTCOMM_CONSUMERD_UNKNOWN_ERROR; + goto error_unlock; + default: + abort(); + } + + next: + pthread_mutex_unlock(&stream->lock); + } + +end_rcu_unlock: + rcu_read_unlock(); +end: + return ret; + +error_unlock: + pthread_mutex_unlock(&stream->lock); + goto end_rcu_unlock; +}