Fix: ust-consumer: segfault on snapshot after regenerate metadata
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * SPDX-License-Identifier: GPL-2.0-only
7 *
8 */
9
10 #define _LGPL_SOURCE
11 #include <assert.h>
12 #include <poll.h>
13 #include <pthread.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <sys/mman.h>
17 #include <sys/socket.h>
18 #include <sys/types.h>
19 #include <inttypes.h>
20 #include <unistd.h>
21 #include <sys/stat.h>
22 #include <stdint.h>
23
24 #include <bin/lttng-consumerd/health-consumerd.h>
25 #include <common/common.h>
26 #include <common/kernel-ctl/kernel-ctl.h>
27 #include <common/sessiond-comm/sessiond-comm.h>
28 #include <common/sessiond-comm/relayd.h>
29 #include <common/compat/fcntl.h>
30 #include <common/compat/endian.h>
31 #include <common/pipe.h>
32 #include <common/relayd/relayd.h>
33 #include <common/utils.h>
34 #include <common/consumer/consumer-stream.h>
35 #include <common/index/index.h>
36 #include <common/consumer/consumer-timer.h>
37 #include <common/optional.h>
38 #include <common/buffer-view.h>
39 #include <common/consumer/consumer.h>
40 #include <common/consumer/metadata-bucket.h>
41
42 #include "kernel-consumer.h"
43
44 extern struct lttng_consumer_global_data the_consumer_data;
45 extern int consumer_poll_timeout;
46
47 /*
48 * Take a snapshot for a specific fd
49 *
50 * Returns 0 on success, < 0 on error
51 */
52 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
53 {
54 int ret = 0;
55 int infd = stream->wait_fd;
56
57 ret = kernctl_snapshot(infd);
58 /*
59 * -EAGAIN is not an error, it just means that there is no data to
60 * be read.
61 */
62 if (ret != 0 && ret != -EAGAIN) {
63 PERROR("Getting sub-buffer snapshot.");
64 }
65
66 return ret;
67 }
68
69 /*
70 * Sample consumed and produced positions for a specific fd.
71 *
72 * Returns 0 on success, < 0 on error.
73 */
74 int lttng_kconsumer_sample_snapshot_positions(
75 struct lttng_consumer_stream *stream)
76 {
77 assert(stream);
78
79 return kernctl_snapshot_sample_positions(stream->wait_fd);
80 }
81
82 /*
83 * Get the produced position
84 *
85 * Returns 0 on success, < 0 on error
86 */
87 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
88 unsigned long *pos)
89 {
90 int ret;
91 int infd = stream->wait_fd;
92
93 ret = kernctl_snapshot_get_produced(infd, pos);
94 if (ret != 0) {
95 PERROR("kernctl_snapshot_get_produced");
96 }
97
98 return ret;
99 }
100
101 /*
102 * Get the consumerd position
103 *
104 * Returns 0 on success, < 0 on error
105 */
106 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
107 unsigned long *pos)
108 {
109 int ret;
110 int infd = stream->wait_fd;
111
112 ret = kernctl_snapshot_get_consumed(infd, pos);
113 if (ret != 0) {
114 PERROR("kernctl_snapshot_get_consumed");
115 }
116
117 return ret;
118 }
119
120 static
121 int get_current_subbuf_addr(struct lttng_consumer_stream *stream,
122 const char **addr)
123 {
124 int ret;
125 unsigned long mmap_offset;
126 const char *mmap_base = stream->mmap_base;
127
128 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
129 if (ret < 0) {
130 PERROR("Failed to get mmap read offset");
131 goto error;
132 }
133
134 *addr = mmap_base + mmap_offset;
135 error:
136 return ret;
137 }
138
139 /*
140 * Take a snapshot of all the stream of a channel
141 * RCU read-side lock must be held across this function to ensure existence of
142 * channel.
143 *
144 * Returns 0 on success, < 0 on error
145 */
146 static int lttng_kconsumer_snapshot_channel(
147 struct lttng_consumer_channel *channel,
148 uint64_t key, char *path, uint64_t relayd_id,
149 uint64_t nb_packets_per_stream,
150 struct lttng_consumer_local_data *ctx)
151 {
152 int ret;
153 struct lttng_consumer_stream *stream;
154
155 DBG("Kernel consumer snapshot channel %" PRIu64, key);
156
157 /* Prevent channel modifications while we perform the snapshot.*/
158 pthread_mutex_lock(&channel->lock);
159
160 rcu_read_lock();
161
162 /* Splice is not supported yet for channel snapshot. */
163 if (channel->output != CONSUMER_CHANNEL_MMAP) {
164 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
165 channel->name);
166 ret = -1;
167 goto end;
168 }
169
170 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
171 unsigned long consumed_pos, produced_pos;
172
173 health_code_update();
174
175 /*
176 * Lock stream because we are about to change its state.
177 */
178 pthread_mutex_lock(&stream->lock);
179
180 assert(channel->trace_chunk);
181 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
182 /*
183 * Can't happen barring an internal error as the channel
184 * holds a reference to the trace chunk.
185 */
186 ERR("Failed to acquire reference to channel's trace chunk");
187 ret = -1;
188 goto end_unlock;
189 }
190 assert(!stream->trace_chunk);
191 stream->trace_chunk = channel->trace_chunk;
192
193 /*
194 * Assign the received relayd ID so we can use it for streaming. The streams
195 * are not visible to anyone so this is OK to change it.
196 */
197 stream->net_seq_idx = relayd_id;
198 channel->relayd_id = relayd_id;
199 if (relayd_id != (uint64_t) -1ULL) {
200 ret = consumer_send_relayd_stream(stream, path);
201 if (ret < 0) {
202 ERR("sending stream to relayd");
203 goto end_unlock;
204 }
205 } else {
206 ret = consumer_stream_create_output_files(stream,
207 false);
208 if (ret < 0) {
209 goto end_unlock;
210 }
211 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
212 stream->key);
213 }
214
215 ret = kernctl_buffer_flush_empty(stream->wait_fd);
216 if (ret < 0) {
217 /*
218 * Doing a buffer flush which does not take into
219 * account empty packets. This is not perfect
220 * for stream intersection, but required as a
221 * fall-back when "flush_empty" is not
222 * implemented by lttng-modules.
223 */
224 ret = kernctl_buffer_flush(stream->wait_fd);
225 if (ret < 0) {
226 ERR("Failed to flush kernel stream");
227 goto end_unlock;
228 }
229 goto end_unlock;
230 }
231
232 ret = lttng_kconsumer_take_snapshot(stream);
233 if (ret < 0) {
234 ERR("Taking kernel snapshot");
235 goto end_unlock;
236 }
237
238 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
239 if (ret < 0) {
240 ERR("Produced kernel snapshot position");
241 goto end_unlock;
242 }
243
244 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
245 if (ret < 0) {
246 ERR("Consumerd kernel snapshot position");
247 goto end_unlock;
248 }
249
250 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
251 produced_pos, nb_packets_per_stream,
252 stream->max_sb_size);
253
254 while ((long) (consumed_pos - produced_pos) < 0) {
255 ssize_t read_len;
256 unsigned long len, padded_len;
257 const char *subbuf_addr;
258 struct lttng_buffer_view subbuf_view;
259
260 health_code_update();
261 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
262
263 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
264 if (ret < 0) {
265 if (ret != -EAGAIN) {
266 PERROR("kernctl_get_subbuf snapshot");
267 goto end_unlock;
268 }
269 DBG("Kernel consumer get subbuf failed. Skipping it.");
270 consumed_pos += stream->max_sb_size;
271 stream->chan->lost_packets++;
272 continue;
273 }
274
275 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
276 if (ret < 0) {
277 ERR("Snapshot kernctl_get_subbuf_size");
278 goto error_put_subbuf;
279 }
280
281 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
282 if (ret < 0) {
283 ERR("Snapshot kernctl_get_padded_subbuf_size");
284 goto error_put_subbuf;
285 }
286
287 ret = get_current_subbuf_addr(stream, &subbuf_addr);
288 if (ret) {
289 goto error_put_subbuf;
290 }
291
292 subbuf_view = lttng_buffer_view_init(
293 subbuf_addr, 0, padded_len);
294 read_len = lttng_consumer_on_read_subbuffer_mmap(
295 stream, &subbuf_view,
296 padded_len - len);
297 /*
298 * We write the padded len in local tracefiles but the data len
299 * when using a relay. Display the error but continue processing
300 * to try to release the subbuffer.
301 */
302 if (relayd_id != (uint64_t) -1ULL) {
303 if (read_len != len) {
304 ERR("Error sending to the relay (ret: %zd != len: %lu)",
305 read_len, len);
306 }
307 } else {
308 if (read_len != padded_len) {
309 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
310 read_len, padded_len);
311 }
312 }
313
314 ret = kernctl_put_subbuf(stream->wait_fd);
315 if (ret < 0) {
316 ERR("Snapshot kernctl_put_subbuf");
317 goto end_unlock;
318 }
319 consumed_pos += stream->max_sb_size;
320 }
321
322 if (relayd_id == (uint64_t) -1ULL) {
323 if (stream->out_fd >= 0) {
324 ret = close(stream->out_fd);
325 if (ret < 0) {
326 PERROR("Kernel consumer snapshot close out_fd");
327 goto end_unlock;
328 }
329 stream->out_fd = -1;
330 }
331 } else {
332 close_relayd_stream(stream);
333 stream->net_seq_idx = (uint64_t) -1ULL;
334 }
335 lttng_trace_chunk_put(stream->trace_chunk);
336 stream->trace_chunk = NULL;
337 pthread_mutex_unlock(&stream->lock);
338 }
339
340 /* All good! */
341 ret = 0;
342 goto end;
343
344 error_put_subbuf:
345 ret = kernctl_put_subbuf(stream->wait_fd);
346 if (ret < 0) {
347 ERR("Snapshot kernctl_put_subbuf error path");
348 }
349 end_unlock:
350 pthread_mutex_unlock(&stream->lock);
351 end:
352 rcu_read_unlock();
353 pthread_mutex_unlock(&channel->lock);
354 return ret;
355 }
356
357 /*
358 * Read the whole metadata available for a snapshot.
359 * RCU read-side lock must be held across this function to ensure existence of
360 * metadata_channel.
361 *
362 * Returns 0 on success, < 0 on error
363 */
364 static int lttng_kconsumer_snapshot_metadata(
365 struct lttng_consumer_channel *metadata_channel,
366 uint64_t key, char *path, uint64_t relayd_id,
367 struct lttng_consumer_local_data *ctx)
368 {
369 int ret, use_relayd = 0;
370 ssize_t ret_read;
371 struct lttng_consumer_stream *metadata_stream;
372
373 assert(ctx);
374
375 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
376 key, path);
377
378 rcu_read_lock();
379
380 metadata_stream = metadata_channel->metadata_stream;
381 assert(metadata_stream);
382
383 metadata_stream->read_subbuffer_ops.lock(metadata_stream);
384 assert(metadata_channel->trace_chunk);
385 assert(metadata_stream->trace_chunk);
386
387 /* Flag once that we have a valid relayd for the stream. */
388 if (relayd_id != (uint64_t) -1ULL) {
389 use_relayd = 1;
390 }
391
392 if (use_relayd) {
393 ret = consumer_send_relayd_stream(metadata_stream, path);
394 if (ret < 0) {
395 goto error_snapshot;
396 }
397 } else {
398 ret = consumer_stream_create_output_files(metadata_stream,
399 false);
400 if (ret < 0) {
401 goto error_snapshot;
402 }
403 }
404
405 do {
406 health_code_update();
407
408 ret_read = lttng_consumer_read_subbuffer(metadata_stream, ctx, true);
409 if (ret_read < 0) {
410 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
411 ret_read);
412 ret = ret_read;
413 goto error_snapshot;
414 }
415 } while (ret_read > 0);
416
417 if (use_relayd) {
418 close_relayd_stream(metadata_stream);
419 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
420 } else {
421 if (metadata_stream->out_fd >= 0) {
422 ret = close(metadata_stream->out_fd);
423 if (ret < 0) {
424 PERROR("Kernel consumer snapshot metadata close out_fd");
425 /*
426 * Don't go on error here since the snapshot was successful at this
427 * point but somehow the close failed.
428 */
429 }
430 metadata_stream->out_fd = -1;
431 lttng_trace_chunk_put(metadata_stream->trace_chunk);
432 metadata_stream->trace_chunk = NULL;
433 }
434 }
435
436 ret = 0;
437 error_snapshot:
438 metadata_stream->read_subbuffer_ops.unlock(metadata_stream);
439 cds_list_del(&metadata_stream->send_node);
440 consumer_stream_destroy(metadata_stream, NULL);
441 metadata_channel->metadata_stream = NULL;
442 rcu_read_unlock();
443 return ret;
444 }
445
446 /*
447 * Receive command from session daemon and process it.
448 *
449 * Return 1 on success else a negative value or 0.
450 */
451 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
452 int sock, struct pollfd *consumer_sockpoll)
453 {
454 int ret_func;
455 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
456 struct lttcomm_consumer_msg msg;
457
458 health_code_update();
459
460 {
461 ssize_t ret_recv;
462
463 ret_recv = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
464 if (ret_recv != sizeof(msg)) {
465 if (ret_recv > 0) {
466 lttng_consumer_send_error(ctx,
467 LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
468 ret_recv = -1;
469 }
470 return ret_recv;
471 }
472 }
473
474 health_code_update();
475
476 /* Deprecated command */
477 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
478
479 health_code_update();
480
481 /* relayd needs RCU read-side protection */
482 rcu_read_lock();
483
484 switch (msg.cmd_type) {
485 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
486 {
487 /* Session daemon status message are handled in the following call. */
488 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
489 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
490 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
491 msg.u.relayd_sock.relayd_session_id);
492 goto end_nosignal;
493 }
494 case LTTNG_CONSUMER_ADD_CHANNEL:
495 {
496 struct lttng_consumer_channel *new_channel;
497 int ret_send_status, ret_add_channel = 0;
498 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
499
500 health_code_update();
501
502 /* First send a status message before receiving the fds. */
503 ret_send_status = consumer_send_status_msg(sock, ret_code);
504 if (ret_send_status < 0) {
505 /* Somehow, the session daemon is not responding anymore. */
506 goto error_fatal;
507 }
508
509 health_code_update();
510
511 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
512 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
513 msg.u.channel.session_id,
514 msg.u.channel.chunk_id.is_set ?
515 &chunk_id : NULL,
516 msg.u.channel.pathname,
517 msg.u.channel.name,
518 msg.u.channel.relayd_id, msg.u.channel.output,
519 msg.u.channel.tracefile_size,
520 msg.u.channel.tracefile_count, 0,
521 msg.u.channel.monitor,
522 msg.u.channel.live_timer_interval,
523 msg.u.channel.is_live,
524 NULL, NULL);
525 if (new_channel == NULL) {
526 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
527 goto end_nosignal;
528 }
529 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
530 switch (msg.u.channel.output) {
531 case LTTNG_EVENT_SPLICE:
532 new_channel->output = CONSUMER_CHANNEL_SPLICE;
533 break;
534 case LTTNG_EVENT_MMAP:
535 new_channel->output = CONSUMER_CHANNEL_MMAP;
536 break;
537 default:
538 ERR("Channel output unknown %d", msg.u.channel.output);
539 goto end_nosignal;
540 }
541
542 /* Translate and save channel type. */
543 switch (msg.u.channel.type) {
544 case CONSUMER_CHANNEL_TYPE_DATA:
545 case CONSUMER_CHANNEL_TYPE_METADATA:
546 new_channel->type = msg.u.channel.type;
547 break;
548 default:
549 assert(0);
550 goto end_nosignal;
551 };
552
553 health_code_update();
554
555 if (ctx->on_recv_channel != NULL) {
556 int ret_recv_channel =
557 ctx->on_recv_channel(new_channel);
558 if (ret_recv_channel == 0) {
559 ret_add_channel = consumer_add_channel(
560 new_channel, ctx);
561 } else if (ret_recv_channel < 0) {
562 goto end_nosignal;
563 }
564 } else {
565 ret_add_channel =
566 consumer_add_channel(new_channel, ctx);
567 }
568 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA &&
569 !ret_add_channel) {
570 int monitor_start_ret;
571
572 DBG("Consumer starting monitor timer");
573 consumer_timer_live_start(new_channel,
574 msg.u.channel.live_timer_interval);
575 monitor_start_ret = consumer_timer_monitor_start(
576 new_channel,
577 msg.u.channel.monitor_timer_interval);
578 if (monitor_start_ret < 0) {
579 ERR("Starting channel monitoring timer failed");
580 goto end_nosignal;
581 }
582 }
583
584 health_code_update();
585
586 /* If we received an error in add_channel, we need to report it. */
587 if (ret_add_channel < 0) {
588 ret_send_status = consumer_send_status_msg(
589 sock, ret_add_channel);
590 if (ret_send_status < 0) {
591 goto error_fatal;
592 }
593 goto end_nosignal;
594 }
595
596 goto end_nosignal;
597 }
598 case LTTNG_CONSUMER_ADD_STREAM:
599 {
600 int fd;
601 struct lttng_pipe *stream_pipe;
602 struct lttng_consumer_stream *new_stream;
603 struct lttng_consumer_channel *channel;
604 int alloc_ret = 0;
605 int ret_send_status, ret_poll, ret_get_max_subbuf_size;
606 ssize_t ret_pipe_write, ret_recv;
607
608 /*
609 * Get stream's channel reference. Needed when adding the stream to the
610 * global hash table.
611 */
612 channel = consumer_find_channel(msg.u.stream.channel_key);
613 if (!channel) {
614 /*
615 * We could not find the channel. Can happen if cpu hotplug
616 * happens while tearing down.
617 */
618 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
619 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
620 }
621
622 health_code_update();
623
624 /* First send a status message before receiving the fds. */
625 ret_send_status = consumer_send_status_msg(sock, ret_code);
626 if (ret_send_status < 0) {
627 /* Somehow, the session daemon is not responding anymore. */
628 goto error_add_stream_fatal;
629 }
630
631 health_code_update();
632
633 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
634 /* Channel was not found. */
635 goto error_add_stream_nosignal;
636 }
637
638 /* Blocking call */
639 health_poll_entry();
640 ret_poll = lttng_consumer_poll_socket(consumer_sockpoll);
641 health_poll_exit();
642 if (ret_poll) {
643 goto error_add_stream_fatal;
644 }
645
646 health_code_update();
647
648 /* Get stream file descriptor from socket */
649 ret_recv = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
650 if (ret_recv != sizeof(fd)) {
651 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
652 ret_func = ret_recv;
653 goto end;
654 }
655
656 health_code_update();
657
658 /*
659 * Send status code to session daemon only if the recv works. If the
660 * above recv() failed, the session daemon is notified through the
661 * error socket and the teardown is eventually done.
662 */
663 ret_send_status = consumer_send_status_msg(sock, ret_code);
664 if (ret_send_status < 0) {
665 /* Somehow, the session daemon is not responding anymore. */
666 goto error_add_stream_nosignal;
667 }
668
669 health_code_update();
670
671 pthread_mutex_lock(&channel->lock);
672 new_stream = consumer_stream_create(
673 channel,
674 channel->key,
675 fd,
676 channel->name,
677 channel->relayd_id,
678 channel->session_id,
679 channel->trace_chunk,
680 msg.u.stream.cpu,
681 &alloc_ret,
682 channel->type,
683 channel->monitor);
684 if (new_stream == NULL) {
685 switch (alloc_ret) {
686 case -ENOMEM:
687 case -EINVAL:
688 default:
689 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
690 break;
691 }
692 pthread_mutex_unlock(&channel->lock);
693 goto error_add_stream_nosignal;
694 }
695
696 new_stream->wait_fd = fd;
697 ret_get_max_subbuf_size = kernctl_get_max_subbuf_size(
698 new_stream->wait_fd, &new_stream->max_sb_size);
699 if (ret_get_max_subbuf_size < 0) {
700 pthread_mutex_unlock(&channel->lock);
701 ERR("Failed to get kernel maximal subbuffer size");
702 goto error_add_stream_nosignal;
703 }
704
705 consumer_stream_update_channel_attributes(new_stream,
706 channel);
707
708 /*
709 * We've just assigned the channel to the stream so increment the
710 * refcount right now. We don't need to increment the refcount for
711 * streams in no monitor because we handle manually the cleanup of
712 * those. It is very important to make sure there is NO prior
713 * consumer_del_stream() calls or else the refcount will be unbalanced.
714 */
715 if (channel->monitor) {
716 uatomic_inc(&new_stream->chan->refcount);
717 }
718
719 /*
720 * The buffer flush is done on the session daemon side for the kernel
721 * so no need for the stream "hangup_flush_done" variable to be
722 * tracked. This is important for a kernel stream since we don't rely
723 * on the flush state of the stream to read data. It's not the case for
724 * user space tracing.
725 */
726 new_stream->hangup_flush_done = 0;
727
728 health_code_update();
729
730 pthread_mutex_lock(&new_stream->lock);
731 if (ctx->on_recv_stream) {
732 int ret_recv_stream = ctx->on_recv_stream(new_stream);
733 if (ret_recv_stream < 0) {
734 pthread_mutex_unlock(&new_stream->lock);
735 pthread_mutex_unlock(&channel->lock);
736 consumer_stream_free(new_stream);
737 goto error_add_stream_nosignal;
738 }
739 }
740 health_code_update();
741
742 if (new_stream->metadata_flag) {
743 channel->metadata_stream = new_stream;
744 }
745
746 /* Do not monitor this stream. */
747 if (!channel->monitor) {
748 DBG("Kernel consumer add stream %s in no monitor mode with "
749 "relayd id %" PRIu64, new_stream->name,
750 new_stream->net_seq_idx);
751 cds_list_add(&new_stream->send_node, &channel->streams.head);
752 pthread_mutex_unlock(&new_stream->lock);
753 pthread_mutex_unlock(&channel->lock);
754 goto end_add_stream;
755 }
756
757 /* Send stream to relayd if the stream has an ID. */
758 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
759 int ret_send_relayd_stream;
760
761 ret_send_relayd_stream = consumer_send_relayd_stream(
762 new_stream, new_stream->chan->pathname);
763 if (ret_send_relayd_stream < 0) {
764 pthread_mutex_unlock(&new_stream->lock);
765 pthread_mutex_unlock(&channel->lock);
766 consumer_stream_free(new_stream);
767 goto error_add_stream_nosignal;
768 }
769
770 /*
771 * If adding an extra stream to an already
772 * existing channel (e.g. cpu hotplug), we need
773 * to send the "streams_sent" command to relayd.
774 */
775 if (channel->streams_sent_to_relayd) {
776 int ret_send_relayd_streams_sent;
777
778 ret_send_relayd_streams_sent =
779 consumer_send_relayd_streams_sent(
780 new_stream->net_seq_idx);
781 if (ret_send_relayd_streams_sent < 0) {
782 pthread_mutex_unlock(&new_stream->lock);
783 pthread_mutex_unlock(&channel->lock);
784 goto error_add_stream_nosignal;
785 }
786 }
787 }
788 pthread_mutex_unlock(&new_stream->lock);
789 pthread_mutex_unlock(&channel->lock);
790
791 /* Get the right pipe where the stream will be sent. */
792 if (new_stream->metadata_flag) {
793 consumer_add_metadata_stream(new_stream);
794 stream_pipe = ctx->consumer_metadata_pipe;
795 } else {
796 consumer_add_data_stream(new_stream);
797 stream_pipe = ctx->consumer_data_pipe;
798 }
799
800 /* Visible to other threads */
801 new_stream->globally_visible = 1;
802
803 health_code_update();
804
805 ret_pipe_write = lttng_pipe_write(
806 stream_pipe, &new_stream, sizeof(new_stream));
807 if (ret_pipe_write < 0) {
808 ERR("Consumer write %s stream to pipe %d",
809 new_stream->metadata_flag ? "metadata" : "data",
810 lttng_pipe_get_writefd(stream_pipe));
811 if (new_stream->metadata_flag) {
812 consumer_del_stream_for_metadata(new_stream);
813 } else {
814 consumer_del_stream_for_data(new_stream);
815 }
816 goto error_add_stream_nosignal;
817 }
818
819 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
820 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
821 end_add_stream:
822 break;
823 error_add_stream_nosignal:
824 goto end_nosignal;
825 error_add_stream_fatal:
826 goto error_fatal;
827 }
828 case LTTNG_CONSUMER_STREAMS_SENT:
829 {
830 struct lttng_consumer_channel *channel;
831 int ret_send_status;
832
833 /*
834 * Get stream's channel reference. Needed when adding the stream to the
835 * global hash table.
836 */
837 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
838 if (!channel) {
839 /*
840 * We could not find the channel. Can happen if cpu hotplug
841 * happens while tearing down.
842 */
843 ERR("Unable to find channel key %" PRIu64,
844 msg.u.sent_streams.channel_key);
845 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
846 }
847
848 health_code_update();
849
850 /*
851 * Send status code to session daemon.
852 */
853 ret_send_status = consumer_send_status_msg(sock, ret_code);
854 if (ret_send_status < 0 ||
855 ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
856 /* Somehow, the session daemon is not responding anymore. */
857 goto error_streams_sent_nosignal;
858 }
859
860 health_code_update();
861
862 /*
863 * We should not send this message if we don't monitor the
864 * streams in this channel.
865 */
866 if (!channel->monitor) {
867 goto end_error_streams_sent;
868 }
869
870 health_code_update();
871 /* Send stream to relayd if the stream has an ID. */
872 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
873 int ret_send_relay_streams;
874
875 ret_send_relay_streams = consumer_send_relayd_streams_sent(
876 msg.u.sent_streams.net_seq_idx);
877 if (ret_send_relay_streams < 0) {
878 goto error_streams_sent_nosignal;
879 }
880 channel->streams_sent_to_relayd = true;
881 }
882 end_error_streams_sent:
883 break;
884 error_streams_sent_nosignal:
885 goto end_nosignal;
886 }
887 case LTTNG_CONSUMER_UPDATE_STREAM:
888 {
889 rcu_read_unlock();
890 return -ENOSYS;
891 }
892 case LTTNG_CONSUMER_DESTROY_RELAYD:
893 {
894 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
895 struct consumer_relayd_sock_pair *relayd;
896 int ret_send_status;
897
898 DBG("Kernel consumer destroying relayd %" PRIu64, index);
899
900 /* Get relayd reference if exists. */
901 relayd = consumer_find_relayd(index);
902 if (relayd == NULL) {
903 DBG("Unable to find relayd %" PRIu64, index);
904 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
905 }
906
907 /*
908 * Each relayd socket pair has a refcount of stream attached to it
909 * which tells if the relayd is still active or not depending on the
910 * refcount value.
911 *
912 * This will set the destroy flag of the relayd object and destroy it
913 * if the refcount reaches zero when called.
914 *
915 * The destroy can happen either here or when a stream fd hangs up.
916 */
917 if (relayd) {
918 consumer_flag_relayd_for_destroy(relayd);
919 }
920
921 health_code_update();
922
923 ret_send_status = consumer_send_status_msg(sock, ret_code);
924 if (ret_send_status < 0) {
925 /* Somehow, the session daemon is not responding anymore. */
926 goto error_fatal;
927 }
928
929 goto end_nosignal;
930 }
931 case LTTNG_CONSUMER_DATA_PENDING:
932 {
933 int32_t ret_data_pending;
934 uint64_t id = msg.u.data_pending.session_id;
935 ssize_t ret_send;
936
937 DBG("Kernel consumer data pending command for id %" PRIu64, id);
938
939 ret_data_pending = consumer_data_pending(id);
940
941 health_code_update();
942
943 /* Send back returned value to session daemon */
944 ret_send = lttcomm_send_unix_sock(sock, &ret_data_pending,
945 sizeof(ret_data_pending));
946 if (ret_send < 0) {
947 PERROR("send data pending ret code");
948 goto error_fatal;
949 }
950
951 /*
952 * No need to send back a status message since the data pending
953 * returned value is the response.
954 */
955 break;
956 }
957 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
958 {
959 struct lttng_consumer_channel *channel;
960 uint64_t key = msg.u.snapshot_channel.key;
961 int ret_send_status;
962
963 channel = consumer_find_channel(key);
964 if (!channel) {
965 ERR("Channel %" PRIu64 " not found", key);
966 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
967 } else {
968 if (msg.u.snapshot_channel.metadata == 1) {
969 int ret_snapshot;
970
971 ret_snapshot = lttng_kconsumer_snapshot_metadata(
972 channel, key,
973 msg.u.snapshot_channel.pathname,
974 msg.u.snapshot_channel.relayd_id,
975 ctx);
976 if (ret_snapshot < 0) {
977 ERR("Snapshot metadata failed");
978 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
979 }
980 } else {
981 int ret_snapshot;
982
983 ret_snapshot = lttng_kconsumer_snapshot_channel(
984 channel, key,
985 msg.u.snapshot_channel.pathname,
986 msg.u.snapshot_channel.relayd_id,
987 msg.u.snapshot_channel
988 .nb_packets_per_stream,
989 ctx);
990 if (ret_snapshot < 0) {
991 ERR("Snapshot channel failed");
992 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
993 }
994 }
995 }
996 health_code_update();
997
998 ret_send_status = consumer_send_status_msg(sock, ret_code);
999 if (ret_send_status < 0) {
1000 /* Somehow, the session daemon is not responding anymore. */
1001 goto end_nosignal;
1002 }
1003 break;
1004 }
1005 case LTTNG_CONSUMER_DESTROY_CHANNEL:
1006 {
1007 uint64_t key = msg.u.destroy_channel.key;
1008 struct lttng_consumer_channel *channel;
1009 int ret_send_status;
1010
1011 channel = consumer_find_channel(key);
1012 if (!channel) {
1013 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
1014 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1015 }
1016
1017 health_code_update();
1018
1019 ret_send_status = consumer_send_status_msg(sock, ret_code);
1020 if (ret_send_status < 0) {
1021 /* Somehow, the session daemon is not responding anymore. */
1022 goto end_destroy_channel;
1023 }
1024
1025 health_code_update();
1026
1027 /* Stop right now if no channel was found. */
1028 if (!channel) {
1029 goto end_destroy_channel;
1030 }
1031
1032 /*
1033 * This command should ONLY be issued for channel with streams set in
1034 * no monitor mode.
1035 */
1036 assert(!channel->monitor);
1037
1038 /*
1039 * The refcount should ALWAYS be 0 in the case of a channel in no
1040 * monitor mode.
1041 */
1042 assert(!uatomic_sub_return(&channel->refcount, 1));
1043
1044 consumer_del_channel(channel);
1045 end_destroy_channel:
1046 goto end_nosignal;
1047 }
1048 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1049 {
1050 ssize_t ret;
1051 uint64_t count;
1052 struct lttng_consumer_channel *channel;
1053 uint64_t id = msg.u.discarded_events.session_id;
1054 uint64_t key = msg.u.discarded_events.channel_key;
1055
1056 DBG("Kernel consumer discarded events command for session id %"
1057 PRIu64 ", channel key %" PRIu64, id, key);
1058
1059 channel = consumer_find_channel(key);
1060 if (!channel) {
1061 ERR("Kernel consumer discarded events channel %"
1062 PRIu64 " not found", key);
1063 count = 0;
1064 } else {
1065 count = channel->discarded_events;
1066 }
1067
1068 health_code_update();
1069
1070 /* Send back returned value to session daemon */
1071 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1072 if (ret < 0) {
1073 PERROR("send discarded events");
1074 goto error_fatal;
1075 }
1076
1077 break;
1078 }
1079 case LTTNG_CONSUMER_LOST_PACKETS:
1080 {
1081 ssize_t ret;
1082 uint64_t count;
1083 struct lttng_consumer_channel *channel;
1084 uint64_t id = msg.u.lost_packets.session_id;
1085 uint64_t key = msg.u.lost_packets.channel_key;
1086
1087 DBG("Kernel consumer lost packets command for session id %"
1088 PRIu64 ", channel key %" PRIu64, id, key);
1089
1090 channel = consumer_find_channel(key);
1091 if (!channel) {
1092 ERR("Kernel consumer lost packets channel %"
1093 PRIu64 " not found", key);
1094 count = 0;
1095 } else {
1096 count = channel->lost_packets;
1097 }
1098
1099 health_code_update();
1100
1101 /* Send back returned value to session daemon */
1102 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1103 if (ret < 0) {
1104 PERROR("send lost packets");
1105 goto error_fatal;
1106 }
1107
1108 break;
1109 }
1110 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1111 {
1112 int channel_monitor_pipe;
1113 int ret_send_status, ret_set_channel_monitor_pipe;
1114 ssize_t ret_recv;
1115
1116 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1117 /* Successfully received the command's type. */
1118 ret_send_status = consumer_send_status_msg(sock, ret_code);
1119 if (ret_send_status < 0) {
1120 goto error_fatal;
1121 }
1122
1123 ret_recv = lttcomm_recv_fds_unix_sock(
1124 sock, &channel_monitor_pipe, 1);
1125 if (ret_recv != sizeof(channel_monitor_pipe)) {
1126 ERR("Failed to receive channel monitor pipe");
1127 goto error_fatal;
1128 }
1129
1130 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1131 ret_set_channel_monitor_pipe =
1132 consumer_timer_thread_set_channel_monitor_pipe(
1133 channel_monitor_pipe);
1134 if (!ret_set_channel_monitor_pipe) {
1135 int flags;
1136 int ret_fcntl;
1137
1138 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1139 /* Set the pipe as non-blocking. */
1140 ret_fcntl = fcntl(channel_monitor_pipe, F_GETFL, 0);
1141 if (ret_fcntl == -1) {
1142 PERROR("fcntl get flags of the channel monitoring pipe");
1143 goto error_fatal;
1144 }
1145 flags = ret_fcntl;
1146
1147 ret_fcntl = fcntl(channel_monitor_pipe, F_SETFL,
1148 flags | O_NONBLOCK);
1149 if (ret_fcntl == -1) {
1150 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1151 goto error_fatal;
1152 }
1153 DBG("Channel monitor pipe set as non-blocking");
1154 } else {
1155 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1156 }
1157 ret_send_status = consumer_send_status_msg(sock, ret_code);
1158 if (ret_send_status < 0) {
1159 goto error_fatal;
1160 }
1161 break;
1162 }
1163 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1164 {
1165 struct lttng_consumer_channel *channel;
1166 uint64_t key = msg.u.rotate_channel.key;
1167 int ret_send_status;
1168
1169 DBG("Consumer rotate channel %" PRIu64, key);
1170
1171 channel = consumer_find_channel(key);
1172 if (!channel) {
1173 ERR("Channel %" PRIu64 " not found", key);
1174 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1175 } else {
1176 /*
1177 * Sample the rotate position of all the streams in this channel.
1178 */
1179 int ret_rotate_channel;
1180
1181 ret_rotate_channel = lttng_consumer_rotate_channel(
1182 channel, key,
1183 msg.u.rotate_channel.relayd_id,
1184 msg.u.rotate_channel.metadata, ctx);
1185 if (ret_rotate_channel < 0) {
1186 ERR("Rotate channel failed");
1187 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1188 }
1189
1190 health_code_update();
1191 }
1192
1193 ret_send_status = consumer_send_status_msg(sock, ret_code);
1194 if (ret_send_status < 0) {
1195 /* Somehow, the session daemon is not responding anymore. */
1196 goto error_rotate_channel;
1197 }
1198 if (channel) {
1199 /* Rotate the streams that are ready right now. */
1200 int ret_rotate;
1201
1202 ret_rotate = lttng_consumer_rotate_ready_streams(
1203 channel, key, ctx);
1204 if (ret_rotate < 0) {
1205 ERR("Rotate ready streams failed");
1206 }
1207 }
1208 break;
1209 error_rotate_channel:
1210 goto end_nosignal;
1211 }
1212 case LTTNG_CONSUMER_CLEAR_CHANNEL:
1213 {
1214 struct lttng_consumer_channel *channel;
1215 uint64_t key = msg.u.clear_channel.key;
1216 int ret_send_status;
1217
1218 channel = consumer_find_channel(key);
1219 if (!channel) {
1220 DBG("Channel %" PRIu64 " not found", key);
1221 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1222 } else {
1223 int ret_clear_channel;
1224
1225 ret_clear_channel =
1226 lttng_consumer_clear_channel(channel);
1227 if (ret_clear_channel) {
1228 ERR("Clear channel failed");
1229 ret_code = ret_clear_channel;
1230 }
1231
1232 health_code_update();
1233 }
1234
1235 ret_send_status = consumer_send_status_msg(sock, ret_code);
1236 if (ret_send_status < 0) {
1237 /* Somehow, the session daemon is not responding anymore. */
1238 goto end_nosignal;
1239 }
1240
1241 break;
1242 }
1243 case LTTNG_CONSUMER_INIT:
1244 {
1245 int ret_send_status;
1246
1247 ret_code = lttng_consumer_init_command(ctx,
1248 msg.u.init.sessiond_uuid);
1249 health_code_update();
1250 ret_send_status = consumer_send_status_msg(sock, ret_code);
1251 if (ret_send_status < 0) {
1252 /* Somehow, the session daemon is not responding anymore. */
1253 goto end_nosignal;
1254 }
1255 break;
1256 }
1257 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1258 {
1259 const struct lttng_credentials credentials = {
1260 .uid = LTTNG_OPTIONAL_INIT_VALUE(msg.u.create_trace_chunk.credentials.value.uid),
1261 .gid = LTTNG_OPTIONAL_INIT_VALUE(msg.u.create_trace_chunk.credentials.value.gid),
1262 };
1263 const bool is_local_trace =
1264 !msg.u.create_trace_chunk.relayd_id.is_set;
1265 const uint64_t relayd_id =
1266 msg.u.create_trace_chunk.relayd_id.value;
1267 const char *chunk_override_name =
1268 *msg.u.create_trace_chunk.override_name ?
1269 msg.u.create_trace_chunk.override_name :
1270 NULL;
1271 struct lttng_directory_handle *chunk_directory_handle = NULL;
1272
1273 /*
1274 * The session daemon will only provide a chunk directory file
1275 * descriptor for local traces.
1276 */
1277 if (is_local_trace) {
1278 int chunk_dirfd;
1279 int ret_send_status;
1280 ssize_t ret_recv;
1281
1282 /* Acnowledge the reception of the command. */
1283 ret_send_status = consumer_send_status_msg(
1284 sock, LTTCOMM_CONSUMERD_SUCCESS);
1285 if (ret_send_status < 0) {
1286 /* Somehow, the session daemon is not responding anymore. */
1287 goto end_nosignal;
1288 }
1289
1290 ret_recv = lttcomm_recv_fds_unix_sock(
1291 sock, &chunk_dirfd, 1);
1292 if (ret_recv != sizeof(chunk_dirfd)) {
1293 ERR("Failed to receive trace chunk directory file descriptor");
1294 goto error_fatal;
1295 }
1296
1297 DBG("Received trace chunk directory fd (%d)",
1298 chunk_dirfd);
1299 chunk_directory_handle = lttng_directory_handle_create_from_dirfd(
1300 chunk_dirfd);
1301 if (!chunk_directory_handle) {
1302 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1303 if (close(chunk_dirfd)) {
1304 PERROR("Failed to close chunk directory file descriptor");
1305 }
1306 goto error_fatal;
1307 }
1308 }
1309
1310 ret_code = lttng_consumer_create_trace_chunk(
1311 !is_local_trace ? &relayd_id : NULL,
1312 msg.u.create_trace_chunk.session_id,
1313 msg.u.create_trace_chunk.chunk_id,
1314 (time_t) msg.u.create_trace_chunk
1315 .creation_timestamp,
1316 chunk_override_name,
1317 msg.u.create_trace_chunk.credentials.is_set ?
1318 &credentials :
1319 NULL,
1320 chunk_directory_handle);
1321 lttng_directory_handle_put(chunk_directory_handle);
1322 goto end_msg_sessiond;
1323 }
1324 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1325 {
1326 enum lttng_trace_chunk_command_type close_command =
1327 msg.u.close_trace_chunk.close_command.value;
1328 const uint64_t relayd_id =
1329 msg.u.close_trace_chunk.relayd_id.value;
1330 struct lttcomm_consumer_close_trace_chunk_reply reply;
1331 char path[LTTNG_PATH_MAX];
1332 ssize_t ret_send;
1333
1334 ret_code = lttng_consumer_close_trace_chunk(
1335 msg.u.close_trace_chunk.relayd_id.is_set ?
1336 &relayd_id :
1337 NULL,
1338 msg.u.close_trace_chunk.session_id,
1339 msg.u.close_trace_chunk.chunk_id,
1340 (time_t) msg.u.close_trace_chunk.close_timestamp,
1341 msg.u.close_trace_chunk.close_command.is_set ?
1342 &close_command :
1343 NULL, path);
1344 reply.ret_code = ret_code;
1345 reply.path_length = strlen(path) + 1;
1346 ret_send = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1347 if (ret_send != sizeof(reply)) {
1348 goto error_fatal;
1349 }
1350 ret_send = lttcomm_send_unix_sock(
1351 sock, path, reply.path_length);
1352 if (ret_send != reply.path_length) {
1353 goto error_fatal;
1354 }
1355 goto end_nosignal;
1356 }
1357 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1358 {
1359 const uint64_t relayd_id =
1360 msg.u.trace_chunk_exists.relayd_id.value;
1361
1362 ret_code = lttng_consumer_trace_chunk_exists(
1363 msg.u.trace_chunk_exists.relayd_id.is_set ?
1364 &relayd_id : NULL,
1365 msg.u.trace_chunk_exists.session_id,
1366 msg.u.trace_chunk_exists.chunk_id);
1367 goto end_msg_sessiond;
1368 }
1369 case LTTNG_CONSUMER_OPEN_CHANNEL_PACKETS:
1370 {
1371 const uint64_t key = msg.u.open_channel_packets.key;
1372 struct lttng_consumer_channel *channel =
1373 consumer_find_channel(key);
1374
1375 if (channel) {
1376 pthread_mutex_lock(&channel->lock);
1377 ret_code = lttng_consumer_open_channel_packets(channel);
1378 pthread_mutex_unlock(&channel->lock);
1379 } else {
1380 WARN("Channel %" PRIu64 " not found", key);
1381 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1382 }
1383
1384 health_code_update();
1385 goto end_msg_sessiond;
1386 }
1387 default:
1388 goto end_nosignal;
1389 }
1390
1391 end_nosignal:
1392 /*
1393 * Return 1 to indicate success since the 0 value can be a socket
1394 * shutdown during the recv() or send() call.
1395 */
1396 ret_func = 1;
1397 goto end;
1398 error_fatal:
1399 /* This will issue a consumer stop. */
1400 ret_func = -1;
1401 goto end;
1402 end_msg_sessiond:
1403 /*
1404 * The returned value here is not useful since either way we'll return 1 to
1405 * the caller because the session daemon socket management is done
1406 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1407 */
1408 {
1409 int ret_send_status;
1410
1411 ret_send_status = consumer_send_status_msg(sock, ret_code);
1412 if (ret_send_status < 0) {
1413 goto error_fatal;
1414 }
1415 }
1416
1417 ret_func = 1;
1418
1419 end:
1420 health_code_update();
1421 rcu_read_unlock();
1422 return ret_func;
1423 }
1424
1425 /*
1426 * Sync metadata meaning request them to the session daemon and snapshot to the
1427 * metadata thread can consumer them.
1428 *
1429 * Metadata stream lock MUST be acquired.
1430 */
1431 enum sync_metadata_status lttng_kconsumer_sync_metadata(
1432 struct lttng_consumer_stream *metadata)
1433 {
1434 int ret;
1435 enum sync_metadata_status status;
1436
1437 assert(metadata);
1438
1439 ret = kernctl_buffer_flush(metadata->wait_fd);
1440 if (ret < 0) {
1441 ERR("Failed to flush kernel stream");
1442 status = SYNC_METADATA_STATUS_ERROR;
1443 goto end;
1444 }
1445
1446 ret = kernctl_snapshot(metadata->wait_fd);
1447 if (ret < 0) {
1448 if (errno == EAGAIN) {
1449 /* No new metadata, exit. */
1450 DBG("Sync metadata, no new kernel metadata");
1451 status = SYNC_METADATA_STATUS_NO_DATA;
1452 } else {
1453 ERR("Sync metadata, taking kernel snapshot failed.");
1454 status = SYNC_METADATA_STATUS_ERROR;
1455 }
1456 } else {
1457 status = SYNC_METADATA_STATUS_NEW_DATA;
1458 }
1459
1460 end:
1461 return status;
1462 }
1463
1464 static
1465 int extract_common_subbuffer_info(struct lttng_consumer_stream *stream,
1466 struct stream_subbuffer *subbuf)
1467 {
1468 int ret;
1469
1470 ret = kernctl_get_subbuf_size(
1471 stream->wait_fd, &subbuf->info.data.subbuf_size);
1472 if (ret) {
1473 goto end;
1474 }
1475
1476 ret = kernctl_get_padded_subbuf_size(
1477 stream->wait_fd, &subbuf->info.data.padded_subbuf_size);
1478 if (ret) {
1479 goto end;
1480 }
1481
1482 end:
1483 return ret;
1484 }
1485
1486 static
1487 int extract_metadata_subbuffer_info(struct lttng_consumer_stream *stream,
1488 struct stream_subbuffer *subbuf)
1489 {
1490 int ret;
1491
1492 ret = extract_common_subbuffer_info(stream, subbuf);
1493 if (ret) {
1494 goto end;
1495 }
1496
1497 ret = kernctl_get_metadata_version(
1498 stream->wait_fd, &subbuf->info.metadata.version);
1499 if (ret) {
1500 goto end;
1501 }
1502
1503 end:
1504 return ret;
1505 }
1506
1507 static
1508 int extract_data_subbuffer_info(struct lttng_consumer_stream *stream,
1509 struct stream_subbuffer *subbuf)
1510 {
1511 int ret;
1512
1513 ret = extract_common_subbuffer_info(stream, subbuf);
1514 if (ret) {
1515 goto end;
1516 }
1517
1518 ret = kernctl_get_packet_size(
1519 stream->wait_fd, &subbuf->info.data.packet_size);
1520 if (ret < 0) {
1521 PERROR("Failed to get sub-buffer packet size");
1522 goto end;
1523 }
1524
1525 ret = kernctl_get_content_size(
1526 stream->wait_fd, &subbuf->info.data.content_size);
1527 if (ret < 0) {
1528 PERROR("Failed to get sub-buffer content size");
1529 goto end;
1530 }
1531
1532 ret = kernctl_get_timestamp_begin(
1533 stream->wait_fd, &subbuf->info.data.timestamp_begin);
1534 if (ret < 0) {
1535 PERROR("Failed to get sub-buffer begin timestamp");
1536 goto end;
1537 }
1538
1539 ret = kernctl_get_timestamp_end(
1540 stream->wait_fd, &subbuf->info.data.timestamp_end);
1541 if (ret < 0) {
1542 PERROR("Failed to get sub-buffer end timestamp");
1543 goto end;
1544 }
1545
1546 ret = kernctl_get_events_discarded(
1547 stream->wait_fd, &subbuf->info.data.events_discarded);
1548 if (ret) {
1549 PERROR("Failed to get sub-buffer events discarded count");
1550 goto end;
1551 }
1552
1553 ret = kernctl_get_sequence_number(stream->wait_fd,
1554 &subbuf->info.data.sequence_number.value);
1555 if (ret) {
1556 /* May not be supported by older LTTng-modules. */
1557 if (ret != -ENOTTY) {
1558 PERROR("Failed to get sub-buffer sequence number");
1559 goto end;
1560 }
1561 } else {
1562 subbuf->info.data.sequence_number.is_set = true;
1563 }
1564
1565 ret = kernctl_get_stream_id(
1566 stream->wait_fd, &subbuf->info.data.stream_id);
1567 if (ret < 0) {
1568 PERROR("Failed to get stream id");
1569 goto end;
1570 }
1571
1572 ret = kernctl_get_instance_id(stream->wait_fd,
1573 &subbuf->info.data.stream_instance_id.value);
1574 if (ret) {
1575 /* May not be supported by older LTTng-modules. */
1576 if (ret != -ENOTTY) {
1577 PERROR("Failed to get stream instance id");
1578 goto end;
1579 }
1580 } else {
1581 subbuf->info.data.stream_instance_id.is_set = true;
1582 }
1583 end:
1584 return ret;
1585 }
1586
1587 static
1588 enum get_next_subbuffer_status get_subbuffer_common(
1589 struct lttng_consumer_stream *stream,
1590 struct stream_subbuffer *subbuffer)
1591 {
1592 int ret;
1593 enum get_next_subbuffer_status status;
1594
1595 ret = kernctl_get_next_subbuf(stream->wait_fd);
1596 switch (ret) {
1597 case 0:
1598 status = GET_NEXT_SUBBUFFER_STATUS_OK;
1599 break;
1600 case -ENODATA:
1601 case -EAGAIN:
1602 /*
1603 * The caller only expects -ENODATA when there is no data to
1604 * read, but the kernel tracer returns -EAGAIN when there is
1605 * currently no data for a non-finalized stream, and -ENODATA
1606 * when there is no data for a finalized stream. Those can be
1607 * combined into a -ENODATA return value.
1608 */
1609 status = GET_NEXT_SUBBUFFER_STATUS_NO_DATA;
1610 goto end;
1611 default:
1612 status = GET_NEXT_SUBBUFFER_STATUS_ERROR;
1613 goto end;
1614 }
1615
1616 ret = stream->read_subbuffer_ops.extract_subbuffer_info(
1617 stream, subbuffer);
1618 if (ret) {
1619 status = GET_NEXT_SUBBUFFER_STATUS_ERROR;
1620 }
1621 end:
1622 return status;
1623 }
1624
1625 static
1626 enum get_next_subbuffer_status get_next_subbuffer_splice(
1627 struct lttng_consumer_stream *stream,
1628 struct stream_subbuffer *subbuffer)
1629 {
1630 const enum get_next_subbuffer_status status =
1631 get_subbuffer_common(stream, subbuffer);
1632
1633 if (status != GET_NEXT_SUBBUFFER_STATUS_OK) {
1634 goto end;
1635 }
1636
1637 subbuffer->buffer.fd = stream->wait_fd;
1638 end:
1639 return status;
1640 }
1641
1642 static
1643 enum get_next_subbuffer_status get_next_subbuffer_mmap(
1644 struct lttng_consumer_stream *stream,
1645 struct stream_subbuffer *subbuffer)
1646 {
1647 int ret;
1648 enum get_next_subbuffer_status status;
1649 const char *addr;
1650
1651 status = get_subbuffer_common(stream, subbuffer);
1652 if (status != GET_NEXT_SUBBUFFER_STATUS_OK) {
1653 goto end;
1654 }
1655
1656 ret = get_current_subbuf_addr(stream, &addr);
1657 if (ret) {
1658 status = GET_NEXT_SUBBUFFER_STATUS_ERROR;
1659 goto end;
1660 }
1661
1662 subbuffer->buffer.buffer = lttng_buffer_view_init(
1663 addr, 0, subbuffer->info.data.padded_subbuf_size);
1664 end:
1665 return status;
1666 }
1667
1668 static
1669 enum get_next_subbuffer_status get_next_subbuffer_metadata_check(struct lttng_consumer_stream *stream,
1670 struct stream_subbuffer *subbuffer)
1671 {
1672 int ret;
1673 const char *addr;
1674 bool coherent;
1675 enum get_next_subbuffer_status status;
1676
1677 ret = kernctl_get_next_subbuf_metadata_check(stream->wait_fd,
1678 &coherent);
1679 if (ret) {
1680 goto end;
1681 }
1682
1683 ret = stream->read_subbuffer_ops.extract_subbuffer_info(
1684 stream, subbuffer);
1685 if (ret) {
1686 goto end;
1687 }
1688
1689 LTTNG_OPTIONAL_SET(&subbuffer->info.metadata.coherent, coherent);
1690
1691 ret = get_current_subbuf_addr(stream, &addr);
1692 if (ret) {
1693 goto end;
1694 }
1695
1696 subbuffer->buffer.buffer = lttng_buffer_view_init(
1697 addr, 0, subbuffer->info.data.padded_subbuf_size);
1698 DBG("Got metadata packet with padded_subbuf_size = %lu, coherent = %s",
1699 subbuffer->info.metadata.padded_subbuf_size,
1700 coherent ? "true" : "false");
1701 end:
1702 /*
1703 * The caller only expects -ENODATA when there is no data to read, but
1704 * the kernel tracer returns -EAGAIN when there is currently no data
1705 * for a non-finalized stream, and -ENODATA when there is no data for a
1706 * finalized stream. Those can be combined into a -ENODATA return value.
1707 */
1708 switch (ret) {
1709 case 0:
1710 status = GET_NEXT_SUBBUFFER_STATUS_OK;
1711 break;
1712 case -ENODATA:
1713 case -EAGAIN:
1714 /*
1715 * The caller only expects -ENODATA when there is no data to
1716 * read, but the kernel tracer returns -EAGAIN when there is
1717 * currently no data for a non-finalized stream, and -ENODATA
1718 * when there is no data for a finalized stream. Those can be
1719 * combined into a -ENODATA return value.
1720 */
1721 status = GET_NEXT_SUBBUFFER_STATUS_NO_DATA;
1722 break;
1723 default:
1724 status = GET_NEXT_SUBBUFFER_STATUS_ERROR;
1725 break;
1726 }
1727
1728 return status;
1729 }
1730
1731 static
1732 int put_next_subbuffer(struct lttng_consumer_stream *stream,
1733 struct stream_subbuffer *subbuffer)
1734 {
1735 const int ret = kernctl_put_next_subbuf(stream->wait_fd);
1736
1737 if (ret) {
1738 if (ret == -EFAULT) {
1739 PERROR("Error in unreserving sub buffer");
1740 } else if (ret == -EIO) {
1741 /* Should never happen with newer LTTng versions */
1742 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted");
1743 }
1744 }
1745
1746 return ret;
1747 }
1748
1749 static
1750 bool is_get_next_check_metadata_available(int tracer_fd)
1751 {
1752 const int ret = kernctl_get_next_subbuf_metadata_check(tracer_fd, NULL);
1753 const bool available = ret != -ENOTTY;
1754
1755 if (ret == 0) {
1756 /* get succeeded, make sure to put the subbuffer. */
1757 kernctl_put_subbuf(tracer_fd);
1758 }
1759
1760 return available;
1761 }
1762
1763 static
1764 int signal_metadata(struct lttng_consumer_stream *stream,
1765 struct lttng_consumer_local_data *ctx)
1766 {
1767 ASSERT_LOCKED(stream->metadata_rdv_lock);
1768 return pthread_cond_broadcast(&stream->metadata_rdv) ? -errno : 0;
1769 }
1770
1771 static
1772 int lttng_kconsumer_set_stream_ops(
1773 struct lttng_consumer_stream *stream)
1774 {
1775 int ret = 0;
1776
1777 if (stream->metadata_flag && stream->chan->is_live) {
1778 DBG("Attempting to enable metadata bucketization for live consumers");
1779 if (is_get_next_check_metadata_available(stream->wait_fd)) {
1780 DBG("Kernel tracer supports get_next_subbuffer_metadata_check, metadata will be accumulated until a coherent state is reached");
1781 stream->read_subbuffer_ops.get_next_subbuffer =
1782 get_next_subbuffer_metadata_check;
1783 ret = consumer_stream_enable_metadata_bucketization(
1784 stream);
1785 if (ret) {
1786 goto end;
1787 }
1788 } else {
1789 /*
1790 * The kernel tracer version is too old to indicate
1791 * when the metadata stream has reached a "coherent"
1792 * (parseable) point.
1793 *
1794 * This means that a live viewer may see an incoherent
1795 * sequence of metadata and fail to parse it.
1796 */
1797 WARN("Kernel tracer does not support get_next_subbuffer_metadata_check which may cause live clients to fail to parse the metadata stream");
1798 metadata_bucket_destroy(stream->metadata_bucket);
1799 stream->metadata_bucket = NULL;
1800 }
1801
1802 stream->read_subbuffer_ops.on_sleep = signal_metadata;
1803 }
1804
1805 if (!stream->read_subbuffer_ops.get_next_subbuffer) {
1806 if (stream->chan->output == CONSUMER_CHANNEL_MMAP) {
1807 stream->read_subbuffer_ops.get_next_subbuffer =
1808 get_next_subbuffer_mmap;
1809 } else {
1810 stream->read_subbuffer_ops.get_next_subbuffer =
1811 get_next_subbuffer_splice;
1812 }
1813 }
1814
1815 if (stream->metadata_flag) {
1816 stream->read_subbuffer_ops.extract_subbuffer_info =
1817 extract_metadata_subbuffer_info;
1818 } else {
1819 stream->read_subbuffer_ops.extract_subbuffer_info =
1820 extract_data_subbuffer_info;
1821 if (stream->chan->is_live) {
1822 stream->read_subbuffer_ops.send_live_beacon =
1823 consumer_flush_kernel_index;
1824 }
1825 }
1826
1827 stream->read_subbuffer_ops.put_next_subbuffer = put_next_subbuffer;
1828 end:
1829 return ret;
1830 }
1831
1832 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1833 {
1834 int ret;
1835
1836 assert(stream);
1837
1838 /*
1839 * Don't create anything if this is set for streaming or if there is
1840 * no current trace chunk on the parent channel.
1841 */
1842 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1843 stream->chan->trace_chunk) {
1844 ret = consumer_stream_create_output_files(stream, true);
1845 if (ret) {
1846 goto error;
1847 }
1848 }
1849
1850 if (stream->output == LTTNG_EVENT_MMAP) {
1851 /* get the len of the mmap region */
1852 unsigned long mmap_len;
1853
1854 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1855 if (ret != 0) {
1856 PERROR("kernctl_get_mmap_len");
1857 goto error_close_fd;
1858 }
1859 stream->mmap_len = (size_t) mmap_len;
1860
1861 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1862 MAP_PRIVATE, stream->wait_fd, 0);
1863 if (stream->mmap_base == MAP_FAILED) {
1864 PERROR("Error mmaping");
1865 ret = -1;
1866 goto error_close_fd;
1867 }
1868 }
1869
1870 ret = lttng_kconsumer_set_stream_ops(stream);
1871 if (ret) {
1872 goto error_close_fd;
1873 }
1874
1875 /* we return 0 to let the library handle the FD internally */
1876 return 0;
1877
1878 error_close_fd:
1879 if (stream->out_fd >= 0) {
1880 int err;
1881
1882 err = close(stream->out_fd);
1883 assert(!err);
1884 stream->out_fd = -1;
1885 }
1886 error:
1887 return ret;
1888 }
1889
1890 /*
1891 * Check if data is still being extracted from the buffers for a specific
1892 * stream. Consumer data lock MUST be acquired before calling this function
1893 * and the stream lock.
1894 *
1895 * Return 1 if the traced data are still getting read else 0 meaning that the
1896 * data is available for trace viewer reading.
1897 */
1898 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1899 {
1900 int ret;
1901
1902 assert(stream);
1903
1904 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1905 ret = 0;
1906 goto end;
1907 }
1908
1909 ret = kernctl_get_next_subbuf(stream->wait_fd);
1910 if (ret == 0) {
1911 /* There is still data so let's put back this subbuffer. */
1912 ret = kernctl_put_subbuf(stream->wait_fd);
1913 assert(ret == 0);
1914 ret = 1; /* Data is pending */
1915 goto end;
1916 }
1917
1918 /* Data is NOT pending and ready to be read. */
1919 ret = 0;
1920
1921 end:
1922 return ret;
1923 }
This page took 0.107307 seconds and 4 git commands to generate.