consumerd: tag metadata channel as being part of a live session
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "common/buffer-view.h"
21 #include <stdint.h>
22 #define _LGPL_SOURCE
23 #include <assert.h>
24 #include <poll.h>
25 #include <pthread.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/socket.h>
30 #include <sys/types.h>
31 #include <inttypes.h>
32 #include <unistd.h>
33 #include <sys/stat.h>
34
35 #include <bin/lttng-consumerd/health-consumerd.h>
36 #include <common/common.h>
37 #include <common/kernel-ctl/kernel-ctl.h>
38 #include <common/sessiond-comm/sessiond-comm.h>
39 #include <common/sessiond-comm/relayd.h>
40 #include <common/compat/fcntl.h>
41 #include <common/compat/endian.h>
42 #include <common/pipe.h>
43 #include <common/relayd/relayd.h>
44 #include <common/utils.h>
45 #include <common/consumer/consumer-stream.h>
46 #include <common/index/index.h>
47 #include <common/consumer/consumer-timer.h>
48 #include <common/optional.h>
49
50 #include "kernel-consumer.h"
51
52 extern struct lttng_consumer_global_data consumer_data;
53 extern int consumer_poll_timeout;
54
55 /*
56 * Take a snapshot for a specific fd
57 *
58 * Returns 0 on success, < 0 on error
59 */
60 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
61 {
62 int ret = 0;
63 int infd = stream->wait_fd;
64
65 ret = kernctl_snapshot(infd);
66 /*
67 * -EAGAIN is not an error, it just means that there is no data to
68 * be read.
69 */
70 if (ret != 0 && ret != -EAGAIN) {
71 PERROR("Getting sub-buffer snapshot.");
72 }
73
74 return ret;
75 }
76
77 /*
78 * Sample consumed and produced positions for a specific fd.
79 *
80 * Returns 0 on success, < 0 on error.
81 */
82 int lttng_kconsumer_sample_snapshot_positions(
83 struct lttng_consumer_stream *stream)
84 {
85 assert(stream);
86
87 return kernctl_snapshot_sample_positions(stream->wait_fd);
88 }
89
90 /*
91 * Get the produced position
92 *
93 * Returns 0 on success, < 0 on error
94 */
95 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
96 unsigned long *pos)
97 {
98 int ret;
99 int infd = stream->wait_fd;
100
101 ret = kernctl_snapshot_get_produced(infd, pos);
102 if (ret != 0) {
103 PERROR("kernctl_snapshot_get_produced");
104 }
105
106 return ret;
107 }
108
109 /*
110 * Get the consumerd position
111 *
112 * Returns 0 on success, < 0 on error
113 */
114 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
115 unsigned long *pos)
116 {
117 int ret;
118 int infd = stream->wait_fd;
119
120 ret = kernctl_snapshot_get_consumed(infd, pos);
121 if (ret != 0) {
122 PERROR("kernctl_snapshot_get_consumed");
123 }
124
125 return ret;
126 }
127
128 static
129 int get_current_subbuf_addr(struct lttng_consumer_stream *stream,
130 const char **addr)
131 {
132 int ret;
133 unsigned long mmap_offset;
134 const char *mmap_base = stream->mmap_base;
135
136 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
137 if (ret < 0) {
138 PERROR("Failed to get mmap read offset");
139 goto error;
140 }
141
142 *addr = mmap_base + mmap_offset;
143 error:
144 return ret;
145 }
146
147 /*
148 * Take a snapshot of all the stream of a channel
149 * RCU read-side lock must be held across this function to ensure existence of
150 * channel. The channel lock must be held by the caller.
151 *
152 * Returns 0 on success, < 0 on error
153 */
154 static int lttng_kconsumer_snapshot_channel(
155 struct lttng_consumer_channel *channel,
156 uint64_t key, char *path, uint64_t relayd_id,
157 uint64_t nb_packets_per_stream,
158 struct lttng_consumer_local_data *ctx)
159 {
160 int ret;
161 struct lttng_consumer_stream *stream;
162
163 DBG("Kernel consumer snapshot channel %" PRIu64, key);
164
165 rcu_read_lock();
166
167 /* Splice is not supported yet for channel snapshot. */
168 if (channel->output != CONSUMER_CHANNEL_MMAP) {
169 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
170 channel->name);
171 ret = -1;
172 goto end;
173 }
174
175 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
176 unsigned long consumed_pos, produced_pos;
177
178 health_code_update();
179
180 /*
181 * Lock stream because we are about to change its state.
182 */
183 pthread_mutex_lock(&stream->lock);
184
185 assert(channel->trace_chunk);
186 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
187 /*
188 * Can't happen barring an internal error as the channel
189 * holds a reference to the trace chunk.
190 */
191 ERR("Failed to acquire reference to channel's trace chunk");
192 ret = -1;
193 goto end_unlock;
194 }
195 assert(!stream->trace_chunk);
196 stream->trace_chunk = channel->trace_chunk;
197
198 /*
199 * Assign the received relayd ID so we can use it for streaming. The streams
200 * are not visible to anyone so this is OK to change it.
201 */
202 stream->net_seq_idx = relayd_id;
203 channel->relayd_id = relayd_id;
204 if (relayd_id != (uint64_t) -1ULL) {
205 ret = consumer_send_relayd_stream(stream, path);
206 if (ret < 0) {
207 ERR("sending stream to relayd");
208 goto end_unlock;
209 }
210 } else {
211 ret = consumer_stream_create_output_files(stream,
212 false);
213 if (ret < 0) {
214 goto end_unlock;
215 }
216 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
217 stream->key);
218 }
219
220 ret = kernctl_buffer_flush_empty(stream->wait_fd);
221 if (ret < 0) {
222 /*
223 * Doing a buffer flush which does not take into
224 * account empty packets. This is not perfect
225 * for stream intersection, but required as a
226 * fall-back when "flush_empty" is not
227 * implemented by lttng-modules.
228 */
229 ret = kernctl_buffer_flush(stream->wait_fd);
230 if (ret < 0) {
231 ERR("Failed to flush kernel stream");
232 goto end_unlock;
233 }
234 goto end_unlock;
235 }
236
237 ret = lttng_kconsumer_take_snapshot(stream);
238 if (ret < 0) {
239 ERR("Taking kernel snapshot");
240 goto end_unlock;
241 }
242
243 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
244 if (ret < 0) {
245 ERR("Produced kernel snapshot position");
246 goto end_unlock;
247 }
248
249 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
250 if (ret < 0) {
251 ERR("Consumerd kernel snapshot position");
252 goto end_unlock;
253 }
254
255 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
256 produced_pos, nb_packets_per_stream,
257 stream->max_sb_size);
258
259 while ((long) (consumed_pos - produced_pos) < 0) {
260 ssize_t read_len;
261 unsigned long len, padded_len;
262 const char *subbuf_addr;
263 struct lttng_buffer_view subbuf_view;
264
265 health_code_update();
266 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
267
268 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
269 if (ret < 0) {
270 if (ret != -EAGAIN) {
271 PERROR("kernctl_get_subbuf snapshot");
272 goto end_unlock;
273 }
274 DBG("Kernel consumer get subbuf failed. Skipping it.");
275 consumed_pos += stream->max_sb_size;
276 stream->chan->lost_packets++;
277 continue;
278 }
279
280 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
281 if (ret < 0) {
282 ERR("Snapshot kernctl_get_subbuf_size");
283 goto error_put_subbuf;
284 }
285
286 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
287 if (ret < 0) {
288 ERR("Snapshot kernctl_get_padded_subbuf_size");
289 goto error_put_subbuf;
290 }
291
292 ret = get_current_subbuf_addr(stream, &subbuf_addr);
293 if (ret) {
294 goto error_put_subbuf;
295 }
296
297 subbuf_view = lttng_buffer_view_init(
298 subbuf_addr, 0, padded_len);
299 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx,
300 stream, &subbuf_view,
301 padded_len - len, NULL);
302 /*
303 * We write the padded len in local tracefiles but the data len
304 * when using a relay. Display the error but continue processing
305 * to try to release the subbuffer.
306 */
307 if (relayd_id != (uint64_t) -1ULL) {
308 if (read_len != len) {
309 ERR("Error sending to the relay (ret: %zd != len: %lu)",
310 read_len, len);
311 }
312 } else {
313 if (read_len != padded_len) {
314 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
315 read_len, padded_len);
316 }
317 }
318
319 ret = kernctl_put_subbuf(stream->wait_fd);
320 if (ret < 0) {
321 ERR("Snapshot kernctl_put_subbuf");
322 goto end_unlock;
323 }
324 consumed_pos += stream->max_sb_size;
325 }
326
327 if (relayd_id == (uint64_t) -1ULL) {
328 if (stream->out_fd >= 0) {
329 ret = close(stream->out_fd);
330 if (ret < 0) {
331 PERROR("Kernel consumer snapshot close out_fd");
332 goto end_unlock;
333 }
334 stream->out_fd = -1;
335 }
336 } else {
337 close_relayd_stream(stream);
338 stream->net_seq_idx = (uint64_t) -1ULL;
339 }
340 lttng_trace_chunk_put(stream->trace_chunk);
341 stream->trace_chunk = NULL;
342 pthread_mutex_unlock(&stream->lock);
343 }
344
345 /* All good! */
346 ret = 0;
347 goto end;
348
349 error_put_subbuf:
350 ret = kernctl_put_subbuf(stream->wait_fd);
351 if (ret < 0) {
352 ERR("Snapshot kernctl_put_subbuf error path");
353 }
354 end_unlock:
355 pthread_mutex_unlock(&stream->lock);
356 end:
357 rcu_read_unlock();
358 return ret;
359 }
360
361 /*
362 * Read the whole metadata available for a snapshot.
363 * RCU read-side lock must be held across this function to ensure existence of
364 * metadata_channel. The channel lock must be held by the caller.
365 *
366 * Returns 0 on success, < 0 on error
367 */
368 static int lttng_kconsumer_snapshot_metadata(
369 struct lttng_consumer_channel *metadata_channel,
370 uint64_t key, char *path, uint64_t relayd_id,
371 struct lttng_consumer_local_data *ctx)
372 {
373 int ret, use_relayd = 0;
374 ssize_t ret_read;
375 struct lttng_consumer_stream *metadata_stream;
376
377 assert(ctx);
378
379 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
380 key, path);
381
382 rcu_read_lock();
383
384 metadata_stream = metadata_channel->metadata_stream;
385 assert(metadata_stream);
386
387 pthread_mutex_lock(&metadata_stream->lock);
388 assert(metadata_channel->trace_chunk);
389 assert(metadata_stream->trace_chunk);
390
391 /* Flag once that we have a valid relayd for the stream. */
392 if (relayd_id != (uint64_t) -1ULL) {
393 use_relayd = 1;
394 }
395
396 if (use_relayd) {
397 ret = consumer_send_relayd_stream(metadata_stream, path);
398 if (ret < 0) {
399 goto error_snapshot;
400 }
401 } else {
402 ret = consumer_stream_create_output_files(metadata_stream,
403 false);
404 if (ret < 0) {
405 goto error_snapshot;
406 }
407 }
408
409 do {
410 health_code_update();
411
412 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
413 if (ret_read < 0) {
414 if (ret_read != -EAGAIN) {
415 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
416 ret_read);
417 ret = ret_read;
418 goto error_snapshot;
419 }
420 /* ret_read is negative at this point so we will exit the loop. */
421 continue;
422 }
423 } while (ret_read >= 0);
424
425 if (use_relayd) {
426 close_relayd_stream(metadata_stream);
427 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
428 } else {
429 if (metadata_stream->out_fd >= 0) {
430 ret = close(metadata_stream->out_fd);
431 if (ret < 0) {
432 PERROR("Kernel consumer snapshot metadata close out_fd");
433 /*
434 * Don't go on error here since the snapshot was successful at this
435 * point but somehow the close failed.
436 */
437 }
438 metadata_stream->out_fd = -1;
439 lttng_trace_chunk_put(metadata_stream->trace_chunk);
440 metadata_stream->trace_chunk = NULL;
441 }
442 }
443
444 ret = 0;
445 error_snapshot:
446 pthread_mutex_unlock(&metadata_stream->lock);
447 cds_list_del(&metadata_stream->send_node);
448 consumer_stream_destroy(metadata_stream, NULL);
449 metadata_channel->metadata_stream = NULL;
450 rcu_read_unlock();
451 return ret;
452 }
453
454 /*
455 * Receive command from session daemon and process it.
456 *
457 * Return 1 on success else a negative value or 0.
458 */
459 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
460 int sock, struct pollfd *consumer_sockpoll)
461 {
462 ssize_t ret;
463 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
464 struct lttcomm_consumer_msg msg;
465
466 health_code_update();
467
468 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
469 if (ret != sizeof(msg)) {
470 if (ret > 0) {
471 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
472 ret = -1;
473 }
474 return ret;
475 }
476
477 health_code_update();
478
479 /* Deprecated command */
480 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
481
482 health_code_update();
483
484 /* relayd needs RCU read-side protection */
485 rcu_read_lock();
486
487 switch (msg.cmd_type) {
488 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
489 {
490 /* Session daemon status message are handled in the following call. */
491 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
492 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
493 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
494 msg.u.relayd_sock.relayd_session_id);
495 goto end_nosignal;
496 }
497 case LTTNG_CONSUMER_ADD_CHANNEL:
498 {
499 struct lttng_consumer_channel *new_channel;
500 int ret_recv;
501 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
502
503 health_code_update();
504
505 /* First send a status message before receiving the fds. */
506 ret = consumer_send_status_msg(sock, ret_code);
507 if (ret < 0) {
508 /* Somehow, the session daemon is not responding anymore. */
509 goto error_fatal;
510 }
511
512 health_code_update();
513
514 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
515 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
516 msg.u.channel.session_id,
517 msg.u.channel.chunk_id.is_set ?
518 &chunk_id : NULL,
519 msg.u.channel.pathname,
520 msg.u.channel.name,
521 msg.u.channel.relayd_id, msg.u.channel.output,
522 msg.u.channel.tracefile_size,
523 msg.u.channel.tracefile_count, 0,
524 msg.u.channel.monitor,
525 msg.u.channel.live_timer_interval,
526 msg.u.channel.is_live,
527 NULL, NULL);
528 if (new_channel == NULL) {
529 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
530 goto end_nosignal;
531 }
532 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
533 switch (msg.u.channel.output) {
534 case LTTNG_EVENT_SPLICE:
535 new_channel->output = CONSUMER_CHANNEL_SPLICE;
536 break;
537 case LTTNG_EVENT_MMAP:
538 new_channel->output = CONSUMER_CHANNEL_MMAP;
539 break;
540 default:
541 ERR("Channel output unknown %d", msg.u.channel.output);
542 goto end_nosignal;
543 }
544
545 /* Translate and save channel type. */
546 switch (msg.u.channel.type) {
547 case CONSUMER_CHANNEL_TYPE_DATA:
548 case CONSUMER_CHANNEL_TYPE_METADATA:
549 new_channel->type = msg.u.channel.type;
550 break;
551 default:
552 assert(0);
553 goto end_nosignal;
554 };
555
556 health_code_update();
557
558 if (ctx->on_recv_channel != NULL) {
559 ret_recv = ctx->on_recv_channel(new_channel);
560 if (ret_recv == 0) {
561 ret = consumer_add_channel(new_channel, ctx);
562 } else if (ret_recv < 0) {
563 goto end_nosignal;
564 }
565 } else {
566 ret = consumer_add_channel(new_channel, ctx);
567 }
568 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
569 int monitor_start_ret;
570
571 DBG("Consumer starting monitor timer");
572 consumer_timer_live_start(new_channel,
573 msg.u.channel.live_timer_interval);
574 monitor_start_ret = consumer_timer_monitor_start(
575 new_channel,
576 msg.u.channel.monitor_timer_interval);
577 if (monitor_start_ret < 0) {
578 ERR("Starting channel monitoring timer failed");
579 goto end_nosignal;
580 }
581
582 }
583
584 health_code_update();
585
586 /* If we received an error in add_channel, we need to report it. */
587 if (ret < 0) {
588 ret = consumer_send_status_msg(sock, ret);
589 if (ret < 0) {
590 goto error_fatal;
591 }
592 goto end_nosignal;
593 }
594
595 goto end_nosignal;
596 }
597 case LTTNG_CONSUMER_ADD_STREAM:
598 {
599 int fd;
600 struct lttng_pipe *stream_pipe;
601 struct lttng_consumer_stream *new_stream;
602 struct lttng_consumer_channel *channel;
603 int alloc_ret = 0;
604
605 /*
606 * Get stream's channel reference. Needed when adding the stream to the
607 * global hash table.
608 */
609 channel = consumer_find_channel(msg.u.stream.channel_key);
610 if (!channel) {
611 /*
612 * We could not find the channel. Can happen if cpu hotplug
613 * happens while tearing down.
614 */
615 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
616 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
617 }
618
619 health_code_update();
620
621 /* First send a status message before receiving the fds. */
622 ret = consumer_send_status_msg(sock, ret_code);
623 if (ret < 0) {
624 /* Somehow, the session daemon is not responding anymore. */
625 goto error_add_stream_fatal;
626 }
627
628 health_code_update();
629
630 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
631 /* Channel was not found. */
632 goto error_add_stream_nosignal;
633 }
634
635 /* Blocking call */
636 health_poll_entry();
637 ret = lttng_consumer_poll_socket(consumer_sockpoll);
638 health_poll_exit();
639 if (ret) {
640 goto error_add_stream_fatal;
641 }
642
643 health_code_update();
644
645 /* Get stream file descriptor from socket */
646 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
647 if (ret != sizeof(fd)) {
648 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
649 goto end;
650 }
651
652 health_code_update();
653
654 /*
655 * Send status code to session daemon only if the recv works. If the
656 * above recv() failed, the session daemon is notified through the
657 * error socket and the teardown is eventually done.
658 */
659 ret = consumer_send_status_msg(sock, ret_code);
660 if (ret < 0) {
661 /* Somehow, the session daemon is not responding anymore. */
662 goto error_add_stream_nosignal;
663 }
664
665 health_code_update();
666
667 pthread_mutex_lock(&channel->lock);
668 new_stream = consumer_allocate_stream(
669 channel,
670 channel->key,
671 fd,
672 channel->name,
673 channel->relayd_id,
674 channel->session_id,
675 channel->trace_chunk,
676 msg.u.stream.cpu,
677 &alloc_ret,
678 channel->type,
679 channel->monitor);
680 if (new_stream == NULL) {
681 switch (alloc_ret) {
682 case -ENOMEM:
683 case -EINVAL:
684 default:
685 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
686 break;
687 }
688 pthread_mutex_unlock(&channel->lock);
689 goto error_add_stream_nosignal;
690 }
691
692 new_stream->wait_fd = fd;
693 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
694 &new_stream->max_sb_size);
695 if (ret < 0) {
696 pthread_mutex_unlock(&channel->lock);
697 ERR("Failed to get kernel maximal subbuffer size");
698 goto error_add_stream_nosignal;
699 }
700
701 consumer_stream_update_channel_attributes(new_stream,
702 channel);
703 switch (channel->output) {
704 case CONSUMER_CHANNEL_SPLICE:
705 new_stream->output = LTTNG_EVENT_SPLICE;
706 ret = utils_create_pipe(new_stream->splice_pipe);
707 if (ret < 0) {
708 pthread_mutex_unlock(&channel->lock);
709 goto error_add_stream_nosignal;
710 }
711 break;
712 case CONSUMER_CHANNEL_MMAP:
713 new_stream->output = LTTNG_EVENT_MMAP;
714 break;
715 default:
716 ERR("Stream output unknown %d", channel->output);
717 pthread_mutex_unlock(&channel->lock);
718 goto error_add_stream_nosignal;
719 }
720
721 /*
722 * We've just assigned the channel to the stream so increment the
723 * refcount right now. We don't need to increment the refcount for
724 * streams in no monitor because we handle manually the cleanup of
725 * those. It is very important to make sure there is NO prior
726 * consumer_del_stream() calls or else the refcount will be unbalanced.
727 */
728 if (channel->monitor) {
729 uatomic_inc(&new_stream->chan->refcount);
730 }
731
732 /*
733 * The buffer flush is done on the session daemon side for the kernel
734 * so no need for the stream "hangup_flush_done" variable to be
735 * tracked. This is important for a kernel stream since we don't rely
736 * on the flush state of the stream to read data. It's not the case for
737 * user space tracing.
738 */
739 new_stream->hangup_flush_done = 0;
740
741 health_code_update();
742
743 pthread_mutex_lock(&new_stream->lock);
744 if (ctx->on_recv_stream) {
745 ret = ctx->on_recv_stream(new_stream);
746 if (ret < 0) {
747 pthread_mutex_unlock(&new_stream->lock);
748 pthread_mutex_unlock(&channel->lock);
749 consumer_stream_free(new_stream);
750 goto error_add_stream_nosignal;
751 }
752 }
753 health_code_update();
754
755 if (new_stream->metadata_flag) {
756 channel->metadata_stream = new_stream;
757 }
758
759 /* Do not monitor this stream. */
760 if (!channel->monitor) {
761 DBG("Kernel consumer add stream %s in no monitor mode with "
762 "relayd id %" PRIu64, new_stream->name,
763 new_stream->net_seq_idx);
764 cds_list_add(&new_stream->send_node, &channel->streams.head);
765 pthread_mutex_unlock(&new_stream->lock);
766 pthread_mutex_unlock(&channel->lock);
767 goto end_add_stream;
768 }
769
770 /* Send stream to relayd if the stream has an ID. */
771 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
772 ret = consumer_send_relayd_stream(new_stream,
773 new_stream->chan->pathname);
774 if (ret < 0) {
775 pthread_mutex_unlock(&new_stream->lock);
776 pthread_mutex_unlock(&channel->lock);
777 consumer_stream_free(new_stream);
778 goto error_add_stream_nosignal;
779 }
780
781 /*
782 * If adding an extra stream to an already
783 * existing channel (e.g. cpu hotplug), we need
784 * to send the "streams_sent" command to relayd.
785 */
786 if (channel->streams_sent_to_relayd) {
787 ret = consumer_send_relayd_streams_sent(
788 new_stream->net_seq_idx);
789 if (ret < 0) {
790 pthread_mutex_unlock(&new_stream->lock);
791 pthread_mutex_unlock(&channel->lock);
792 goto error_add_stream_nosignal;
793 }
794 }
795 }
796 pthread_mutex_unlock(&new_stream->lock);
797 pthread_mutex_unlock(&channel->lock);
798
799 /* Get the right pipe where the stream will be sent. */
800 if (new_stream->metadata_flag) {
801 consumer_add_metadata_stream(new_stream);
802 stream_pipe = ctx->consumer_metadata_pipe;
803 } else {
804 consumer_add_data_stream(new_stream);
805 stream_pipe = ctx->consumer_data_pipe;
806 }
807
808 /* Visible to other threads */
809 new_stream->globally_visible = 1;
810
811 health_code_update();
812
813 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
814 if (ret < 0) {
815 ERR("Consumer write %s stream to pipe %d",
816 new_stream->metadata_flag ? "metadata" : "data",
817 lttng_pipe_get_writefd(stream_pipe));
818 if (new_stream->metadata_flag) {
819 consumer_del_stream_for_metadata(new_stream);
820 } else {
821 consumer_del_stream_for_data(new_stream);
822 }
823 goto error_add_stream_nosignal;
824 }
825
826 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
827 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
828 end_add_stream:
829 break;
830 error_add_stream_nosignal:
831 goto end_nosignal;
832 error_add_stream_fatal:
833 goto error_fatal;
834 }
835 case LTTNG_CONSUMER_STREAMS_SENT:
836 {
837 struct lttng_consumer_channel *channel;
838
839 /*
840 * Get stream's channel reference. Needed when adding the stream to the
841 * global hash table.
842 */
843 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
844 if (!channel) {
845 /*
846 * We could not find the channel. Can happen if cpu hotplug
847 * happens while tearing down.
848 */
849 ERR("Unable to find channel key %" PRIu64,
850 msg.u.sent_streams.channel_key);
851 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
852 }
853
854 health_code_update();
855
856 /*
857 * Send status code to session daemon.
858 */
859 ret = consumer_send_status_msg(sock, ret_code);
860 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
861 /* Somehow, the session daemon is not responding anymore. */
862 goto error_streams_sent_nosignal;
863 }
864
865 health_code_update();
866
867 /*
868 * We should not send this message if we don't monitor the
869 * streams in this channel.
870 */
871 if (!channel->monitor) {
872 goto end_error_streams_sent;
873 }
874
875 health_code_update();
876 /* Send stream to relayd if the stream has an ID. */
877 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
878 ret = consumer_send_relayd_streams_sent(
879 msg.u.sent_streams.net_seq_idx);
880 if (ret < 0) {
881 goto error_streams_sent_nosignal;
882 }
883 channel->streams_sent_to_relayd = true;
884 }
885 end_error_streams_sent:
886 break;
887 error_streams_sent_nosignal:
888 goto end_nosignal;
889 }
890 case LTTNG_CONSUMER_UPDATE_STREAM:
891 {
892 rcu_read_unlock();
893 return -ENOSYS;
894 }
895 case LTTNG_CONSUMER_DESTROY_RELAYD:
896 {
897 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
898 struct consumer_relayd_sock_pair *relayd;
899
900 DBG("Kernel consumer destroying relayd %" PRIu64, index);
901
902 /* Get relayd reference if exists. */
903 relayd = consumer_find_relayd(index);
904 if (relayd == NULL) {
905 DBG("Unable to find relayd %" PRIu64, index);
906 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
907 }
908
909 /*
910 * Each relayd socket pair has a refcount of stream attached to it
911 * which tells if the relayd is still active or not depending on the
912 * refcount value.
913 *
914 * This will set the destroy flag of the relayd object and destroy it
915 * if the refcount reaches zero when called.
916 *
917 * The destroy can happen either here or when a stream fd hangs up.
918 */
919 if (relayd) {
920 consumer_flag_relayd_for_destroy(relayd);
921 }
922
923 health_code_update();
924
925 ret = consumer_send_status_msg(sock, ret_code);
926 if (ret < 0) {
927 /* Somehow, the session daemon is not responding anymore. */
928 goto error_fatal;
929 }
930
931 goto end_nosignal;
932 }
933 case LTTNG_CONSUMER_DATA_PENDING:
934 {
935 int32_t ret;
936 uint64_t id = msg.u.data_pending.session_id;
937
938 DBG("Kernel consumer data pending command for id %" PRIu64, id);
939
940 ret = consumer_data_pending(id);
941
942 health_code_update();
943
944 /* Send back returned value to session daemon */
945 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
946 if (ret < 0) {
947 PERROR("send data pending ret code");
948 goto error_fatal;
949 }
950
951 /*
952 * No need to send back a status message since the data pending
953 * returned value is the response.
954 */
955 break;
956 }
957 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
958 {
959 struct lttng_consumer_channel *channel;
960 uint64_t key = msg.u.snapshot_channel.key;
961
962 channel = consumer_find_channel(key);
963 if (!channel) {
964 ERR("Channel %" PRIu64 " not found", key);
965 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
966 } else {
967 pthread_mutex_lock(&channel->lock);
968 if (msg.u.snapshot_channel.metadata == 1) {
969 ret = lttng_kconsumer_snapshot_metadata(channel, key,
970 msg.u.snapshot_channel.pathname,
971 msg.u.snapshot_channel.relayd_id, ctx);
972 if (ret < 0) {
973 ERR("Snapshot metadata failed");
974 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
975 }
976 } else {
977 ret = lttng_kconsumer_snapshot_channel(channel, key,
978 msg.u.snapshot_channel.pathname,
979 msg.u.snapshot_channel.relayd_id,
980 msg.u.snapshot_channel.nb_packets_per_stream,
981 ctx);
982 if (ret < 0) {
983 ERR("Snapshot channel failed");
984 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
985 }
986 }
987 pthread_mutex_unlock(&channel->lock);
988 }
989 health_code_update();
990
991 ret = consumer_send_status_msg(sock, ret_code);
992 if (ret < 0) {
993 /* Somehow, the session daemon is not responding anymore. */
994 goto end_nosignal;
995 }
996 break;
997 }
998 case LTTNG_CONSUMER_DESTROY_CHANNEL:
999 {
1000 uint64_t key = msg.u.destroy_channel.key;
1001 struct lttng_consumer_channel *channel;
1002
1003 channel = consumer_find_channel(key);
1004 if (!channel) {
1005 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
1006 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1007 }
1008
1009 health_code_update();
1010
1011 ret = consumer_send_status_msg(sock, ret_code);
1012 if (ret < 0) {
1013 /* Somehow, the session daemon is not responding anymore. */
1014 goto end_destroy_channel;
1015 }
1016
1017 health_code_update();
1018
1019 /* Stop right now if no channel was found. */
1020 if (!channel) {
1021 goto end_destroy_channel;
1022 }
1023
1024 /*
1025 * This command should ONLY be issued for channel with streams set in
1026 * no monitor mode.
1027 */
1028 assert(!channel->monitor);
1029
1030 /*
1031 * The refcount should ALWAYS be 0 in the case of a channel in no
1032 * monitor mode.
1033 */
1034 assert(!uatomic_sub_return(&channel->refcount, 1));
1035
1036 consumer_del_channel(channel);
1037 end_destroy_channel:
1038 goto end_nosignal;
1039 }
1040 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1041 {
1042 ssize_t ret;
1043 uint64_t count;
1044 struct lttng_consumer_channel *channel;
1045 uint64_t id = msg.u.discarded_events.session_id;
1046 uint64_t key = msg.u.discarded_events.channel_key;
1047
1048 DBG("Kernel consumer discarded events command for session id %"
1049 PRIu64 ", channel key %" PRIu64, id, key);
1050
1051 channel = consumer_find_channel(key);
1052 if (!channel) {
1053 ERR("Kernel consumer discarded events channel %"
1054 PRIu64 " not found", key);
1055 count = 0;
1056 } else {
1057 count = channel->discarded_events;
1058 }
1059
1060 health_code_update();
1061
1062 /* Send back returned value to session daemon */
1063 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1064 if (ret < 0) {
1065 PERROR("send discarded events");
1066 goto error_fatal;
1067 }
1068
1069 break;
1070 }
1071 case LTTNG_CONSUMER_LOST_PACKETS:
1072 {
1073 ssize_t ret;
1074 uint64_t count;
1075 struct lttng_consumer_channel *channel;
1076 uint64_t id = msg.u.lost_packets.session_id;
1077 uint64_t key = msg.u.lost_packets.channel_key;
1078
1079 DBG("Kernel consumer lost packets command for session id %"
1080 PRIu64 ", channel key %" PRIu64, id, key);
1081
1082 channel = consumer_find_channel(key);
1083 if (!channel) {
1084 ERR("Kernel consumer lost packets channel %"
1085 PRIu64 " not found", key);
1086 count = 0;
1087 } else {
1088 count = channel->lost_packets;
1089 }
1090
1091 health_code_update();
1092
1093 /* Send back returned value to session daemon */
1094 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1095 if (ret < 0) {
1096 PERROR("send lost packets");
1097 goto error_fatal;
1098 }
1099
1100 break;
1101 }
1102 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1103 {
1104 int channel_monitor_pipe;
1105
1106 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1107 /* Successfully received the command's type. */
1108 ret = consumer_send_status_msg(sock, ret_code);
1109 if (ret < 0) {
1110 goto error_fatal;
1111 }
1112
1113 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1114 1);
1115 if (ret != sizeof(channel_monitor_pipe)) {
1116 ERR("Failed to receive channel monitor pipe");
1117 goto error_fatal;
1118 }
1119
1120 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1121 ret = consumer_timer_thread_set_channel_monitor_pipe(
1122 channel_monitor_pipe);
1123 if (!ret) {
1124 int flags;
1125
1126 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1127 /* Set the pipe as non-blocking. */
1128 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1129 if (ret == -1) {
1130 PERROR("fcntl get flags of the channel monitoring pipe");
1131 goto error_fatal;
1132 }
1133 flags = ret;
1134
1135 ret = fcntl(channel_monitor_pipe, F_SETFL,
1136 flags | O_NONBLOCK);
1137 if (ret == -1) {
1138 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1139 goto error_fatal;
1140 }
1141 DBG("Channel monitor pipe set as non-blocking");
1142 } else {
1143 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1144 }
1145 ret = consumer_send_status_msg(sock, ret_code);
1146 if (ret < 0) {
1147 goto error_fatal;
1148 }
1149 break;
1150 }
1151 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1152 {
1153 struct lttng_consumer_channel *channel;
1154 uint64_t key = msg.u.rotate_channel.key;
1155
1156 DBG("Consumer rotate channel %" PRIu64, key);
1157
1158 channel = consumer_find_channel(key);
1159 if (!channel) {
1160 ERR("Channel %" PRIu64 " not found", key);
1161 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1162 } else {
1163 /*
1164 * Sample the rotate position of all the streams in this channel.
1165 */
1166 ret = lttng_consumer_rotate_channel(channel, key,
1167 msg.u.rotate_channel.relayd_id,
1168 msg.u.rotate_channel.metadata,
1169 ctx);
1170 if (ret < 0) {
1171 ERR("Rotate channel failed");
1172 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1173 }
1174
1175 health_code_update();
1176 }
1177 ret = consumer_send_status_msg(sock, ret_code);
1178 if (ret < 0) {
1179 /* Somehow, the session daemon is not responding anymore. */
1180 goto error_rotate_channel;
1181 }
1182 if (channel) {
1183 /* Rotate the streams that are ready right now. */
1184 ret = lttng_consumer_rotate_ready_streams(
1185 channel, key, ctx);
1186 if (ret < 0) {
1187 ERR("Rotate ready streams failed");
1188 }
1189 }
1190 break;
1191 error_rotate_channel:
1192 goto end_nosignal;
1193 }
1194 case LTTNG_CONSUMER_INIT:
1195 {
1196 ret_code = lttng_consumer_init_command(ctx,
1197 msg.u.init.sessiond_uuid);
1198 health_code_update();
1199 ret = consumer_send_status_msg(sock, ret_code);
1200 if (ret < 0) {
1201 /* Somehow, the session daemon is not responding anymore. */
1202 goto end_nosignal;
1203 }
1204 break;
1205 }
1206 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1207 {
1208 const struct lttng_credentials credentials = {
1209 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1210 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1211 };
1212 const bool is_local_trace =
1213 !msg.u.create_trace_chunk.relayd_id.is_set;
1214 const uint64_t relayd_id =
1215 msg.u.create_trace_chunk.relayd_id.value;
1216 const char *chunk_override_name =
1217 *msg.u.create_trace_chunk.override_name ?
1218 msg.u.create_trace_chunk.override_name :
1219 NULL;
1220 LTTNG_OPTIONAL(struct lttng_directory_handle) chunk_directory_handle =
1221 LTTNG_OPTIONAL_INIT;
1222
1223 /*
1224 * The session daemon will only provide a chunk directory file
1225 * descriptor for local traces.
1226 */
1227 if (is_local_trace) {
1228 int chunk_dirfd;
1229
1230 /* Acnowledge the reception of the command. */
1231 ret = consumer_send_status_msg(sock,
1232 LTTCOMM_CONSUMERD_SUCCESS);
1233 if (ret < 0) {
1234 /* Somehow, the session daemon is not responding anymore. */
1235 goto end_nosignal;
1236 }
1237
1238 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1239 if (ret != sizeof(chunk_dirfd)) {
1240 ERR("Failed to receive trace chunk directory file descriptor");
1241 goto error_fatal;
1242 }
1243
1244 DBG("Received trace chunk directory fd (%d)",
1245 chunk_dirfd);
1246 ret = lttng_directory_handle_init_from_dirfd(
1247 &chunk_directory_handle.value,
1248 chunk_dirfd);
1249 if (ret) {
1250 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1251 if (close(chunk_dirfd)) {
1252 PERROR("Failed to close chunk directory file descriptor");
1253 }
1254 goto error_fatal;
1255 }
1256 chunk_directory_handle.is_set = true;
1257 }
1258
1259 ret_code = lttng_consumer_create_trace_chunk(
1260 !is_local_trace ? &relayd_id : NULL,
1261 msg.u.create_trace_chunk.session_id,
1262 msg.u.create_trace_chunk.chunk_id,
1263 (time_t) msg.u.create_trace_chunk
1264 .creation_timestamp,
1265 chunk_override_name,
1266 msg.u.create_trace_chunk.credentials.is_set ?
1267 &credentials :
1268 NULL,
1269 chunk_directory_handle.is_set ?
1270 &chunk_directory_handle.value :
1271 NULL);
1272
1273 if (chunk_directory_handle.is_set) {
1274 lttng_directory_handle_fini(
1275 &chunk_directory_handle.value);
1276 }
1277 goto end_msg_sessiond;
1278 }
1279 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1280 {
1281 enum lttng_trace_chunk_command_type close_command =
1282 msg.u.close_trace_chunk.close_command.value;
1283 const uint64_t relayd_id =
1284 msg.u.close_trace_chunk.relayd_id.value;
1285 struct lttcomm_consumer_close_trace_chunk_reply reply;
1286 char path[LTTNG_PATH_MAX];
1287
1288 ret_code = lttng_consumer_close_trace_chunk(
1289 msg.u.close_trace_chunk.relayd_id.is_set ?
1290 &relayd_id :
1291 NULL,
1292 msg.u.close_trace_chunk.session_id,
1293 msg.u.close_trace_chunk.chunk_id,
1294 (time_t) msg.u.close_trace_chunk.close_timestamp,
1295 msg.u.close_trace_chunk.close_command.is_set ?
1296 &close_command :
1297 NULL, path);
1298 reply.ret_code = ret_code;
1299 reply.path_length = strlen(path) + 1;
1300 ret = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1301 if (ret != sizeof(reply)) {
1302 goto error_fatal;
1303 }
1304 ret = lttcomm_send_unix_sock(sock, path, reply.path_length);
1305 if (ret != reply.path_length) {
1306 goto error_fatal;
1307 }
1308 goto end_nosignal;
1309 }
1310 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1311 {
1312 const uint64_t relayd_id =
1313 msg.u.trace_chunk_exists.relayd_id.value;
1314
1315 ret_code = lttng_consumer_trace_chunk_exists(
1316 msg.u.trace_chunk_exists.relayd_id.is_set ?
1317 &relayd_id : NULL,
1318 msg.u.trace_chunk_exists.session_id,
1319 msg.u.trace_chunk_exists.chunk_id);
1320 goto end_msg_sessiond;
1321 }
1322 default:
1323 goto end_nosignal;
1324 }
1325
1326 end_nosignal:
1327 /*
1328 * Return 1 to indicate success since the 0 value can be a socket
1329 * shutdown during the recv() or send() call.
1330 */
1331 ret = 1;
1332 goto end;
1333 error_fatal:
1334 /* This will issue a consumer stop. */
1335 ret = -1;
1336 goto end;
1337 end_msg_sessiond:
1338 /*
1339 * The returned value here is not useful since either way we'll return 1 to
1340 * the caller because the session daemon socket management is done
1341 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1342 */
1343 ret = consumer_send_status_msg(sock, ret_code);
1344 if (ret < 0) {
1345 goto error_fatal;
1346 }
1347 ret = 1;
1348 end:
1349 health_code_update();
1350 rcu_read_unlock();
1351 return ret;
1352 }
1353
1354 /*
1355 * Populate index values of a kernel stream. Values are set in big endian order.
1356 *
1357 * Return 0 on success or else a negative value.
1358 */
1359 static int get_index_values(struct ctf_packet_index *index, int infd)
1360 {
1361 int ret;
1362 uint64_t packet_size, content_size, timestamp_begin, timestamp_end,
1363 events_discarded, stream_id, stream_instance_id,
1364 packet_seq_num;
1365
1366 ret = kernctl_get_timestamp_begin(infd, &timestamp_begin);
1367 if (ret < 0) {
1368 PERROR("kernctl_get_timestamp_begin");
1369 goto error;
1370 }
1371
1372 ret = kernctl_get_timestamp_end(infd, &timestamp_end);
1373 if (ret < 0) {
1374 PERROR("kernctl_get_timestamp_end");
1375 goto error;
1376 }
1377
1378 ret = kernctl_get_events_discarded(infd, &events_discarded);
1379 if (ret < 0) {
1380 PERROR("kernctl_get_events_discarded");
1381 goto error;
1382 }
1383
1384 ret = kernctl_get_content_size(infd, &content_size);
1385 if (ret < 0) {
1386 PERROR("kernctl_get_content_size");
1387 goto error;
1388 }
1389
1390 ret = kernctl_get_packet_size(infd, &packet_size);
1391 if (ret < 0) {
1392 PERROR("kernctl_get_packet_size");
1393 goto error;
1394 }
1395
1396 ret = kernctl_get_stream_id(infd, &stream_id);
1397 if (ret < 0) {
1398 PERROR("kernctl_get_stream_id");
1399 goto error;
1400 }
1401
1402 ret = kernctl_get_instance_id(infd, &stream_instance_id);
1403 if (ret < 0) {
1404 if (ret == -ENOTTY) {
1405 /* Command not implemented by lttng-modules. */
1406 stream_instance_id = -1ULL;
1407 } else {
1408 PERROR("kernctl_get_instance_id");
1409 goto error;
1410 }
1411 }
1412
1413 ret = kernctl_get_sequence_number(infd, &packet_seq_num);
1414 if (ret < 0) {
1415 if (ret == -ENOTTY) {
1416 /* Command not implemented by lttng-modules. */
1417 packet_seq_num = -1ULL;
1418 ret = 0;
1419 } else {
1420 PERROR("kernctl_get_sequence_number");
1421 goto error;
1422 }
1423 }
1424 index->packet_seq_num = htobe64(index->packet_seq_num);
1425
1426 *index = (typeof(*index)) {
1427 .offset = index->offset,
1428 .packet_size = htobe64(packet_size),
1429 .content_size = htobe64(content_size),
1430 .timestamp_begin = htobe64(timestamp_begin),
1431 .timestamp_end = htobe64(timestamp_end),
1432 .events_discarded = htobe64(events_discarded),
1433 .stream_id = htobe64(stream_id),
1434 .stream_instance_id = htobe64(stream_instance_id),
1435 .packet_seq_num = htobe64(packet_seq_num),
1436 };
1437
1438 error:
1439 return ret;
1440 }
1441 /*
1442 * Sync metadata meaning request them to the session daemon and snapshot to the
1443 * metadata thread can consumer them.
1444 *
1445 * Metadata stream lock MUST be acquired.
1446 *
1447 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1448 * is empty or a negative value on error.
1449 */
1450 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1451 {
1452 int ret;
1453
1454 assert(metadata);
1455
1456 ret = kernctl_buffer_flush(metadata->wait_fd);
1457 if (ret < 0) {
1458 ERR("Failed to flush kernel stream");
1459 goto end;
1460 }
1461
1462 ret = kernctl_snapshot(metadata->wait_fd);
1463 if (ret < 0) {
1464 if (ret != -EAGAIN) {
1465 ERR("Sync metadata, taking kernel snapshot failed.");
1466 goto end;
1467 }
1468 DBG("Sync metadata, no new kernel metadata");
1469 /* No new metadata, exit. */
1470 ret = ENODATA;
1471 goto end;
1472 }
1473
1474 end:
1475 return ret;
1476 }
1477
1478 static
1479 int update_stream_stats(struct lttng_consumer_stream *stream)
1480 {
1481 int ret;
1482 uint64_t seq, discarded;
1483
1484 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1485 if (ret < 0) {
1486 if (ret == -ENOTTY) {
1487 /* Command not implemented by lttng-modules. */
1488 seq = -1ULL;
1489 stream->sequence_number_unavailable = true;
1490 } else {
1491 PERROR("kernctl_get_sequence_number");
1492 goto end;
1493 }
1494 }
1495
1496 /*
1497 * Start the sequence when we extract the first packet in case we don't
1498 * start at 0 (for example if a consumer is not connected to the
1499 * session immediately after the beginning).
1500 */
1501 if (stream->last_sequence_number == -1ULL) {
1502 stream->last_sequence_number = seq;
1503 } else if (seq > stream->last_sequence_number) {
1504 stream->chan->lost_packets += seq -
1505 stream->last_sequence_number - 1;
1506 } else {
1507 /* seq <= last_sequence_number */
1508 ERR("Sequence number inconsistent : prev = %" PRIu64
1509 ", current = %" PRIu64,
1510 stream->last_sequence_number, seq);
1511 ret = -1;
1512 goto end;
1513 }
1514 stream->last_sequence_number = seq;
1515
1516 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1517 if (ret < 0) {
1518 PERROR("kernctl_get_events_discarded");
1519 goto end;
1520 }
1521 if (discarded < stream->last_discarded_events) {
1522 /*
1523 * Overflow has occurred. We assume only one wrap-around
1524 * has occurred.
1525 */
1526 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1527 stream->last_discarded_events + discarded;
1528 } else {
1529 stream->chan->discarded_events += discarded -
1530 stream->last_discarded_events;
1531 }
1532 stream->last_discarded_events = discarded;
1533 ret = 0;
1534
1535 end:
1536 return ret;
1537 }
1538
1539 /*
1540 * Check if the local version of the metadata stream matches with the version
1541 * of the metadata stream in the kernel. If it was updated, set the reset flag
1542 * on the stream.
1543 */
1544 static
1545 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1546 {
1547 int ret;
1548 uint64_t cur_version;
1549
1550 ret = kernctl_get_metadata_version(infd, &cur_version);
1551 if (ret < 0) {
1552 if (ret == -ENOTTY) {
1553 /*
1554 * LTTng-modules does not implement this
1555 * command.
1556 */
1557 ret = 0;
1558 goto end;
1559 }
1560 ERR("Failed to get the metadata version");
1561 goto end;
1562 }
1563
1564 if (stream->metadata_version == cur_version) {
1565 ret = 0;
1566 goto end;
1567 }
1568
1569 DBG("New metadata version detected");
1570 stream->metadata_version = cur_version;
1571 stream->reset_metadata_flag = 1;
1572 ret = 0;
1573
1574 end:
1575 return ret;
1576 }
1577
1578 /*
1579 * Consume data on a file descriptor and write it on a trace file.
1580 * The stream and channel locks must be held by the caller.
1581 */
1582 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1583 struct lttng_consumer_local_data *ctx)
1584 {
1585 unsigned long len, subbuf_size, padding;
1586 int err, write_index = 1, rotation_ret;
1587 ssize_t ret = 0;
1588 int infd = stream->wait_fd;
1589 struct ctf_packet_index index = {};
1590
1591 DBG("In read_subbuffer (infd : %d)", infd);
1592
1593 /*
1594 * If the stream was flagged to be ready for rotation before we extract the
1595 * next packet, rotate it now.
1596 */
1597 if (stream->rotate_ready) {
1598 DBG("Rotate stream before extracting data");
1599 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1600 if (rotation_ret < 0) {
1601 ERR("Stream rotation error");
1602 ret = -1;
1603 goto error;
1604 }
1605 }
1606
1607 /* Get the next subbuffer */
1608 err = kernctl_get_next_subbuf(infd);
1609 if (err != 0) {
1610 /*
1611 * This is a debug message even for single-threaded consumer,
1612 * because poll() have more relaxed criterions than get subbuf,
1613 * so get_subbuf may fail for short race windows where poll()
1614 * would issue wakeups.
1615 */
1616 DBG("Reserving sub buffer failed (everything is normal, "
1617 "it is due to concurrency)");
1618 ret = err;
1619 goto error;
1620 }
1621
1622 /* Get the full subbuffer size including padding */
1623 err = kernctl_get_padded_subbuf_size(infd, &len);
1624 if (err != 0) {
1625 PERROR("Getting sub-buffer len failed.");
1626 err = kernctl_put_subbuf(infd);
1627 if (err != 0) {
1628 if (err == -EFAULT) {
1629 PERROR("Error in unreserving sub buffer\n");
1630 } else if (err == -EIO) {
1631 /* Should never happen with newer LTTng versions */
1632 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1633 }
1634 ret = err;
1635 goto error;
1636 }
1637 ret = err;
1638 goto error;
1639 }
1640
1641 if (!stream->metadata_flag) {
1642 ret = get_index_values(&index, infd);
1643 if (ret < 0) {
1644 err = kernctl_put_subbuf(infd);
1645 if (err != 0) {
1646 if (err == -EFAULT) {
1647 PERROR("Error in unreserving sub buffer\n");
1648 } else if (err == -EIO) {
1649 /* Should never happen with newer LTTng versions */
1650 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1651 }
1652 ret = err;
1653 goto error;
1654 }
1655 goto error;
1656 }
1657 ret = update_stream_stats(stream);
1658 if (ret < 0) {
1659 err = kernctl_put_subbuf(infd);
1660 if (err != 0) {
1661 if (err == -EFAULT) {
1662 PERROR("Error in unreserving sub buffer\n");
1663 } else if (err == -EIO) {
1664 /* Should never happen with newer LTTng versions */
1665 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1666 }
1667 ret = err;
1668 goto error;
1669 }
1670 goto error;
1671 }
1672 } else {
1673 write_index = 0;
1674 ret = metadata_stream_check_version(infd, stream);
1675 if (ret < 0) {
1676 err = kernctl_put_subbuf(infd);
1677 if (err != 0) {
1678 if (err == -EFAULT) {
1679 PERROR("Error in unreserving sub buffer\n");
1680 } else if (err == -EIO) {
1681 /* Should never happen with newer LTTng versions */
1682 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1683 }
1684 ret = err;
1685 goto error;
1686 }
1687 goto error;
1688 }
1689 }
1690
1691 switch (stream->chan->output) {
1692 case CONSUMER_CHANNEL_SPLICE:
1693 /*
1694 * XXX: The lttng-modules splice "actor" does not handle copying
1695 * partial pages hence only using the subbuffer size without the
1696 * padding makes the splice fail.
1697 */
1698 subbuf_size = len;
1699 padding = 0;
1700
1701 /* splice the subbuffer to the tracefile */
1702 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1703 padding, &index);
1704 /*
1705 * XXX: Splice does not support network streaming so the return value
1706 * is simply checked against subbuf_size and not like the mmap() op.
1707 */
1708 if (ret != subbuf_size) {
1709 /*
1710 * display the error but continue processing to try
1711 * to release the subbuffer
1712 */
1713 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1714 ret, subbuf_size);
1715 write_index = 0;
1716 }
1717 break;
1718 case CONSUMER_CHANNEL_MMAP:
1719 {
1720 const char *subbuf_addr;
1721 struct lttng_buffer_view subbuf_view;
1722
1723 /* Get subbuffer size without padding */
1724 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1725 if (err != 0) {
1726 PERROR("Getting sub-buffer len failed.");
1727 err = kernctl_put_subbuf(infd);
1728 if (err != 0) {
1729 if (err == -EFAULT) {
1730 PERROR("Error in unreserving sub buffer\n");
1731 } else if (err == -EIO) {
1732 /* Should never happen with newer LTTng versions */
1733 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1734 }
1735 ret = err;
1736 goto error;
1737 }
1738 ret = err;
1739 goto error;
1740 }
1741
1742 ret = get_current_subbuf_addr(stream, &subbuf_addr);
1743 if (ret) {
1744 goto error_put_subbuf;
1745 }
1746
1747 /* Make sure the tracer is not gone mad on us! */
1748 assert(len >= subbuf_size);
1749
1750 padding = len - subbuf_size;
1751
1752 subbuf_view = lttng_buffer_view_init(subbuf_addr, 0, len);
1753
1754 /* write the subbuffer to the tracefile */
1755 ret = lttng_consumer_on_read_subbuffer_mmap(
1756 ctx, stream, &subbuf_view, padding, &index);
1757 /*
1758 * The mmap operation should write subbuf_size amount of data
1759 * when network streaming or the full padding (len) size when we
1760 * are _not_ streaming.
1761 */
1762 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1763 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1764 /*
1765 * Display the error but continue processing to try to release the
1766 * subbuffer. This is a DBG statement since this is possible to
1767 * happen without being a critical error.
1768 */
1769 DBG("Error writing to tracefile "
1770 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1771 ret, len, subbuf_size);
1772 write_index = 0;
1773 }
1774 break;
1775 }
1776 default:
1777 ERR("Unknown output method");
1778 ret = -EPERM;
1779 }
1780 error_put_subbuf:
1781 err = kernctl_put_next_subbuf(infd);
1782 if (err != 0) {
1783 if (err == -EFAULT) {
1784 PERROR("Error in unreserving sub buffer\n");
1785 } else if (err == -EIO) {
1786 /* Should never happen with newer LTTng versions */
1787 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1788 }
1789 ret = err;
1790 goto error;
1791 }
1792
1793 /* Write index if needed. */
1794 if (!write_index) {
1795 goto rotate;
1796 }
1797
1798 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1799 /*
1800 * In live, block until all the metadata is sent.
1801 */
1802 pthread_mutex_lock(&stream->metadata_timer_lock);
1803 assert(!stream->missed_metadata_flush);
1804 stream->waiting_on_metadata = true;
1805 pthread_mutex_unlock(&stream->metadata_timer_lock);
1806
1807 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1808
1809 pthread_mutex_lock(&stream->metadata_timer_lock);
1810 stream->waiting_on_metadata = false;
1811 if (stream->missed_metadata_flush) {
1812 stream->missed_metadata_flush = false;
1813 pthread_mutex_unlock(&stream->metadata_timer_lock);
1814 (void) consumer_flush_kernel_index(stream);
1815 } else {
1816 pthread_mutex_unlock(&stream->metadata_timer_lock);
1817 }
1818 if (err < 0) {
1819 goto error;
1820 }
1821 }
1822
1823 err = consumer_stream_write_index(stream, &index);
1824 if (err < 0) {
1825 goto error;
1826 }
1827
1828 rotate:
1829 /*
1830 * After extracting the packet, we check if the stream is now ready to be
1831 * rotated and perform the action immediately.
1832 */
1833 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1834 if (rotation_ret == 1) {
1835 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1836 if (rotation_ret < 0) {
1837 ERR("Stream rotation error");
1838 ret = -1;
1839 goto error;
1840 }
1841 } else if (rotation_ret < 0) {
1842 ERR("Checking if stream is ready to rotate");
1843 ret = -1;
1844 goto error;
1845 }
1846
1847 error:
1848 return ret;
1849 }
1850
1851 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1852 {
1853 int ret;
1854
1855 assert(stream);
1856
1857 /*
1858 * Don't create anything if this is set for streaming or if there is
1859 * no current trace chunk on the parent channel.
1860 */
1861 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1862 stream->chan->trace_chunk) {
1863 ret = consumer_stream_create_output_files(stream, true);
1864 if (ret) {
1865 goto error;
1866 }
1867 }
1868
1869 if (stream->output == LTTNG_EVENT_MMAP) {
1870 /* get the len of the mmap region */
1871 unsigned long mmap_len;
1872
1873 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1874 if (ret != 0) {
1875 PERROR("kernctl_get_mmap_len");
1876 goto error_close_fd;
1877 }
1878 stream->mmap_len = (size_t) mmap_len;
1879
1880 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1881 MAP_PRIVATE, stream->wait_fd, 0);
1882 if (stream->mmap_base == MAP_FAILED) {
1883 PERROR("Error mmaping");
1884 ret = -1;
1885 goto error_close_fd;
1886 }
1887 }
1888
1889 /* we return 0 to let the library handle the FD internally */
1890 return 0;
1891
1892 error_close_fd:
1893 if (stream->out_fd >= 0) {
1894 int err;
1895
1896 err = close(stream->out_fd);
1897 assert(!err);
1898 stream->out_fd = -1;
1899 }
1900 error:
1901 return ret;
1902 }
1903
1904 /*
1905 * Check if data is still being extracted from the buffers for a specific
1906 * stream. Consumer data lock MUST be acquired before calling this function
1907 * and the stream lock.
1908 *
1909 * Return 1 if the traced data are still getting read else 0 meaning that the
1910 * data is available for trace viewer reading.
1911 */
1912 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1913 {
1914 int ret;
1915
1916 assert(stream);
1917
1918 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1919 ret = 0;
1920 goto end;
1921 }
1922
1923 ret = kernctl_get_next_subbuf(stream->wait_fd);
1924 if (ret == 0) {
1925 /* There is still data so let's put back this subbuffer. */
1926 ret = kernctl_put_subbuf(stream->wait_fd);
1927 assert(ret == 0);
1928 ret = 1; /* Data is pending */
1929 goto end;
1930 }
1931
1932 /* Data is NOT pending and ready to be read. */
1933 ret = 0;
1934
1935 end:
1936 return ret;
1937 }
This page took 0.104554 seconds and 4 git commands to generate.