consumerd: pass channel instance to stream creation function
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "common/buffer-view.h"
21 #include <stdint.h>
22 #define _LGPL_SOURCE
23 #include <assert.h>
24 #include <poll.h>
25 #include <pthread.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/socket.h>
30 #include <sys/types.h>
31 #include <inttypes.h>
32 #include <unistd.h>
33 #include <sys/stat.h>
34
35 #include <bin/lttng-consumerd/health-consumerd.h>
36 #include <common/common.h>
37 #include <common/kernel-ctl/kernel-ctl.h>
38 #include <common/sessiond-comm/sessiond-comm.h>
39 #include <common/sessiond-comm/relayd.h>
40 #include <common/compat/fcntl.h>
41 #include <common/compat/endian.h>
42 #include <common/pipe.h>
43 #include <common/relayd/relayd.h>
44 #include <common/utils.h>
45 #include <common/consumer/consumer-stream.h>
46 #include <common/index/index.h>
47 #include <common/consumer/consumer-timer.h>
48 #include <common/optional.h>
49
50 #include "kernel-consumer.h"
51
52 extern struct lttng_consumer_global_data consumer_data;
53 extern int consumer_poll_timeout;
54
55 /*
56 * Take a snapshot for a specific fd
57 *
58 * Returns 0 on success, < 0 on error
59 */
60 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
61 {
62 int ret = 0;
63 int infd = stream->wait_fd;
64
65 ret = kernctl_snapshot(infd);
66 /*
67 * -EAGAIN is not an error, it just means that there is no data to
68 * be read.
69 */
70 if (ret != 0 && ret != -EAGAIN) {
71 PERROR("Getting sub-buffer snapshot.");
72 }
73
74 return ret;
75 }
76
77 /*
78 * Sample consumed and produced positions for a specific fd.
79 *
80 * Returns 0 on success, < 0 on error.
81 */
82 int lttng_kconsumer_sample_snapshot_positions(
83 struct lttng_consumer_stream *stream)
84 {
85 assert(stream);
86
87 return kernctl_snapshot_sample_positions(stream->wait_fd);
88 }
89
90 /*
91 * Get the produced position
92 *
93 * Returns 0 on success, < 0 on error
94 */
95 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
96 unsigned long *pos)
97 {
98 int ret;
99 int infd = stream->wait_fd;
100
101 ret = kernctl_snapshot_get_produced(infd, pos);
102 if (ret != 0) {
103 PERROR("kernctl_snapshot_get_produced");
104 }
105
106 return ret;
107 }
108
109 /*
110 * Get the consumerd position
111 *
112 * Returns 0 on success, < 0 on error
113 */
114 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
115 unsigned long *pos)
116 {
117 int ret;
118 int infd = stream->wait_fd;
119
120 ret = kernctl_snapshot_get_consumed(infd, pos);
121 if (ret != 0) {
122 PERROR("kernctl_snapshot_get_consumed");
123 }
124
125 return ret;
126 }
127
128 static
129 int get_current_subbuf_addr(struct lttng_consumer_stream *stream,
130 const char **addr)
131 {
132 int ret;
133 unsigned long mmap_offset;
134 const char *mmap_base = stream->mmap_base;
135
136 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
137 if (ret < 0) {
138 PERROR("Failed to get mmap read offset");
139 goto error;
140 }
141
142 *addr = mmap_base + mmap_offset;
143 error:
144 return ret;
145 }
146
147 /*
148 * Take a snapshot of all the stream of a channel
149 * RCU read-side lock must be held across this function to ensure existence of
150 * channel. The channel lock must be held by the caller.
151 *
152 * Returns 0 on success, < 0 on error
153 */
154 static int lttng_kconsumer_snapshot_channel(
155 struct lttng_consumer_channel *channel,
156 uint64_t key, char *path, uint64_t relayd_id,
157 uint64_t nb_packets_per_stream,
158 struct lttng_consumer_local_data *ctx)
159 {
160 int ret;
161 struct lttng_consumer_stream *stream;
162
163 DBG("Kernel consumer snapshot channel %" PRIu64, key);
164
165 rcu_read_lock();
166
167 /* Splice is not supported yet for channel snapshot. */
168 if (channel->output != CONSUMER_CHANNEL_MMAP) {
169 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
170 channel->name);
171 ret = -1;
172 goto end;
173 }
174
175 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
176 unsigned long consumed_pos, produced_pos;
177
178 health_code_update();
179
180 /*
181 * Lock stream because we are about to change its state.
182 */
183 pthread_mutex_lock(&stream->lock);
184
185 assert(channel->trace_chunk);
186 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
187 /*
188 * Can't happen barring an internal error as the channel
189 * holds a reference to the trace chunk.
190 */
191 ERR("Failed to acquire reference to channel's trace chunk");
192 ret = -1;
193 goto end_unlock;
194 }
195 assert(!stream->trace_chunk);
196 stream->trace_chunk = channel->trace_chunk;
197
198 /*
199 * Assign the received relayd ID so we can use it for streaming. The streams
200 * are not visible to anyone so this is OK to change it.
201 */
202 stream->net_seq_idx = relayd_id;
203 channel->relayd_id = relayd_id;
204 if (relayd_id != (uint64_t) -1ULL) {
205 ret = consumer_send_relayd_stream(stream, path);
206 if (ret < 0) {
207 ERR("sending stream to relayd");
208 goto end_unlock;
209 }
210 } else {
211 ret = consumer_stream_create_output_files(stream,
212 false);
213 if (ret < 0) {
214 goto end_unlock;
215 }
216 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
217 stream->key);
218 }
219
220 ret = kernctl_buffer_flush_empty(stream->wait_fd);
221 if (ret < 0) {
222 /*
223 * Doing a buffer flush which does not take into
224 * account empty packets. This is not perfect
225 * for stream intersection, but required as a
226 * fall-back when "flush_empty" is not
227 * implemented by lttng-modules.
228 */
229 ret = kernctl_buffer_flush(stream->wait_fd);
230 if (ret < 0) {
231 ERR("Failed to flush kernel stream");
232 goto end_unlock;
233 }
234 goto end_unlock;
235 }
236
237 ret = lttng_kconsumer_take_snapshot(stream);
238 if (ret < 0) {
239 ERR("Taking kernel snapshot");
240 goto end_unlock;
241 }
242
243 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
244 if (ret < 0) {
245 ERR("Produced kernel snapshot position");
246 goto end_unlock;
247 }
248
249 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
250 if (ret < 0) {
251 ERR("Consumerd kernel snapshot position");
252 goto end_unlock;
253 }
254
255 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
256 produced_pos, nb_packets_per_stream,
257 stream->max_sb_size);
258
259 while ((long) (consumed_pos - produced_pos) < 0) {
260 ssize_t read_len;
261 unsigned long len, padded_len;
262 const char *subbuf_addr;
263 struct lttng_buffer_view subbuf_view;
264
265 health_code_update();
266 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
267
268 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
269 if (ret < 0) {
270 if (ret != -EAGAIN) {
271 PERROR("kernctl_get_subbuf snapshot");
272 goto end_unlock;
273 }
274 DBG("Kernel consumer get subbuf failed. Skipping it.");
275 consumed_pos += stream->max_sb_size;
276 stream->chan->lost_packets++;
277 continue;
278 }
279
280 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
281 if (ret < 0) {
282 ERR("Snapshot kernctl_get_subbuf_size");
283 goto error_put_subbuf;
284 }
285
286 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
287 if (ret < 0) {
288 ERR("Snapshot kernctl_get_padded_subbuf_size");
289 goto error_put_subbuf;
290 }
291
292 ret = get_current_subbuf_addr(stream, &subbuf_addr);
293 if (ret) {
294 goto error_put_subbuf;
295 }
296
297 subbuf_view = lttng_buffer_view_init(
298 subbuf_addr, 0, padded_len);
299 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx,
300 stream, &subbuf_view,
301 padded_len - len, NULL);
302 /*
303 * We write the padded len in local tracefiles but the data len
304 * when using a relay. Display the error but continue processing
305 * to try to release the subbuffer.
306 */
307 if (relayd_id != (uint64_t) -1ULL) {
308 if (read_len != len) {
309 ERR("Error sending to the relay (ret: %zd != len: %lu)",
310 read_len, len);
311 }
312 } else {
313 if (read_len != padded_len) {
314 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
315 read_len, padded_len);
316 }
317 }
318
319 ret = kernctl_put_subbuf(stream->wait_fd);
320 if (ret < 0) {
321 ERR("Snapshot kernctl_put_subbuf");
322 goto end_unlock;
323 }
324 consumed_pos += stream->max_sb_size;
325 }
326
327 if (relayd_id == (uint64_t) -1ULL) {
328 if (stream->out_fd >= 0) {
329 ret = close(stream->out_fd);
330 if (ret < 0) {
331 PERROR("Kernel consumer snapshot close out_fd");
332 goto end_unlock;
333 }
334 stream->out_fd = -1;
335 }
336 } else {
337 close_relayd_stream(stream);
338 stream->net_seq_idx = (uint64_t) -1ULL;
339 }
340 lttng_trace_chunk_put(stream->trace_chunk);
341 stream->trace_chunk = NULL;
342 pthread_mutex_unlock(&stream->lock);
343 }
344
345 /* All good! */
346 ret = 0;
347 goto end;
348
349 error_put_subbuf:
350 ret = kernctl_put_subbuf(stream->wait_fd);
351 if (ret < 0) {
352 ERR("Snapshot kernctl_put_subbuf error path");
353 }
354 end_unlock:
355 pthread_mutex_unlock(&stream->lock);
356 end:
357 rcu_read_unlock();
358 return ret;
359 }
360
361 /*
362 * Read the whole metadata available for a snapshot.
363 * RCU read-side lock must be held across this function to ensure existence of
364 * metadata_channel. The channel lock must be held by the caller.
365 *
366 * Returns 0 on success, < 0 on error
367 */
368 static int lttng_kconsumer_snapshot_metadata(
369 struct lttng_consumer_channel *metadata_channel,
370 uint64_t key, char *path, uint64_t relayd_id,
371 struct lttng_consumer_local_data *ctx)
372 {
373 int ret, use_relayd = 0;
374 ssize_t ret_read;
375 struct lttng_consumer_stream *metadata_stream;
376
377 assert(ctx);
378
379 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
380 key, path);
381
382 rcu_read_lock();
383
384 metadata_stream = metadata_channel->metadata_stream;
385 assert(metadata_stream);
386
387 pthread_mutex_lock(&metadata_stream->lock);
388 assert(metadata_channel->trace_chunk);
389 assert(metadata_stream->trace_chunk);
390
391 /* Flag once that we have a valid relayd for the stream. */
392 if (relayd_id != (uint64_t) -1ULL) {
393 use_relayd = 1;
394 }
395
396 if (use_relayd) {
397 ret = consumer_send_relayd_stream(metadata_stream, path);
398 if (ret < 0) {
399 goto error_snapshot;
400 }
401 } else {
402 ret = consumer_stream_create_output_files(metadata_stream,
403 false);
404 if (ret < 0) {
405 goto error_snapshot;
406 }
407 }
408
409 do {
410 health_code_update();
411
412 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
413 if (ret_read < 0) {
414 if (ret_read != -EAGAIN) {
415 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
416 ret_read);
417 ret = ret_read;
418 goto error_snapshot;
419 }
420 /* ret_read is negative at this point so we will exit the loop. */
421 continue;
422 }
423 } while (ret_read >= 0);
424
425 if (use_relayd) {
426 close_relayd_stream(metadata_stream);
427 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
428 } else {
429 if (metadata_stream->out_fd >= 0) {
430 ret = close(metadata_stream->out_fd);
431 if (ret < 0) {
432 PERROR("Kernel consumer snapshot metadata close out_fd");
433 /*
434 * Don't go on error here since the snapshot was successful at this
435 * point but somehow the close failed.
436 */
437 }
438 metadata_stream->out_fd = -1;
439 lttng_trace_chunk_put(metadata_stream->trace_chunk);
440 metadata_stream->trace_chunk = NULL;
441 }
442 }
443
444 ret = 0;
445 error_snapshot:
446 pthread_mutex_unlock(&metadata_stream->lock);
447 cds_list_del(&metadata_stream->send_node);
448 consumer_stream_destroy(metadata_stream, NULL);
449 metadata_channel->metadata_stream = NULL;
450 rcu_read_unlock();
451 return ret;
452 }
453
454 /*
455 * Receive command from session daemon and process it.
456 *
457 * Return 1 on success else a negative value or 0.
458 */
459 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
460 int sock, struct pollfd *consumer_sockpoll)
461 {
462 ssize_t ret;
463 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
464 struct lttcomm_consumer_msg msg;
465
466 health_code_update();
467
468 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
469 if (ret != sizeof(msg)) {
470 if (ret > 0) {
471 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
472 ret = -1;
473 }
474 return ret;
475 }
476
477 health_code_update();
478
479 /* Deprecated command */
480 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
481
482 health_code_update();
483
484 /* relayd needs RCU read-side protection */
485 rcu_read_lock();
486
487 switch (msg.cmd_type) {
488 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
489 {
490 /* Session daemon status message are handled in the following call. */
491 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
492 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
493 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
494 msg.u.relayd_sock.relayd_session_id);
495 goto end_nosignal;
496 }
497 case LTTNG_CONSUMER_ADD_CHANNEL:
498 {
499 struct lttng_consumer_channel *new_channel;
500 int ret_recv;
501 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
502
503 health_code_update();
504
505 /* First send a status message before receiving the fds. */
506 ret = consumer_send_status_msg(sock, ret_code);
507 if (ret < 0) {
508 /* Somehow, the session daemon is not responding anymore. */
509 goto error_fatal;
510 }
511
512 health_code_update();
513
514 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
515 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
516 msg.u.channel.session_id,
517 msg.u.channel.chunk_id.is_set ?
518 &chunk_id : NULL,
519 msg.u.channel.pathname,
520 msg.u.channel.name,
521 msg.u.channel.relayd_id, msg.u.channel.output,
522 msg.u.channel.tracefile_size,
523 msg.u.channel.tracefile_count, 0,
524 msg.u.channel.monitor,
525 msg.u.channel.live_timer_interval,
526 NULL, NULL);
527 if (new_channel == NULL) {
528 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
529 goto end_nosignal;
530 }
531 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
532 switch (msg.u.channel.output) {
533 case LTTNG_EVENT_SPLICE:
534 new_channel->output = CONSUMER_CHANNEL_SPLICE;
535 break;
536 case LTTNG_EVENT_MMAP:
537 new_channel->output = CONSUMER_CHANNEL_MMAP;
538 break;
539 default:
540 ERR("Channel output unknown %d", msg.u.channel.output);
541 goto end_nosignal;
542 }
543
544 /* Translate and save channel type. */
545 switch (msg.u.channel.type) {
546 case CONSUMER_CHANNEL_TYPE_DATA:
547 case CONSUMER_CHANNEL_TYPE_METADATA:
548 new_channel->type = msg.u.channel.type;
549 break;
550 default:
551 assert(0);
552 goto end_nosignal;
553 };
554
555 health_code_update();
556
557 if (ctx->on_recv_channel != NULL) {
558 ret_recv = ctx->on_recv_channel(new_channel);
559 if (ret_recv == 0) {
560 ret = consumer_add_channel(new_channel, ctx);
561 } else if (ret_recv < 0) {
562 goto end_nosignal;
563 }
564 } else {
565 ret = consumer_add_channel(new_channel, ctx);
566 }
567 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
568 int monitor_start_ret;
569
570 DBG("Consumer starting monitor timer");
571 consumer_timer_live_start(new_channel,
572 msg.u.channel.live_timer_interval);
573 monitor_start_ret = consumer_timer_monitor_start(
574 new_channel,
575 msg.u.channel.monitor_timer_interval);
576 if (monitor_start_ret < 0) {
577 ERR("Starting channel monitoring timer failed");
578 goto end_nosignal;
579 }
580
581 }
582
583 health_code_update();
584
585 /* If we received an error in add_channel, we need to report it. */
586 if (ret < 0) {
587 ret = consumer_send_status_msg(sock, ret);
588 if (ret < 0) {
589 goto error_fatal;
590 }
591 goto end_nosignal;
592 }
593
594 goto end_nosignal;
595 }
596 case LTTNG_CONSUMER_ADD_STREAM:
597 {
598 int fd;
599 struct lttng_pipe *stream_pipe;
600 struct lttng_consumer_stream *new_stream;
601 struct lttng_consumer_channel *channel;
602 int alloc_ret = 0;
603
604 /*
605 * Get stream's channel reference. Needed when adding the stream to the
606 * global hash table.
607 */
608 channel = consumer_find_channel(msg.u.stream.channel_key);
609 if (!channel) {
610 /*
611 * We could not find the channel. Can happen if cpu hotplug
612 * happens while tearing down.
613 */
614 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
615 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
616 }
617
618 health_code_update();
619
620 /* First send a status message before receiving the fds. */
621 ret = consumer_send_status_msg(sock, ret_code);
622 if (ret < 0) {
623 /* Somehow, the session daemon is not responding anymore. */
624 goto error_add_stream_fatal;
625 }
626
627 health_code_update();
628
629 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
630 /* Channel was not found. */
631 goto error_add_stream_nosignal;
632 }
633
634 /* Blocking call */
635 health_poll_entry();
636 ret = lttng_consumer_poll_socket(consumer_sockpoll);
637 health_poll_exit();
638 if (ret) {
639 goto error_add_stream_fatal;
640 }
641
642 health_code_update();
643
644 /* Get stream file descriptor from socket */
645 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
646 if (ret != sizeof(fd)) {
647 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
648 goto end;
649 }
650
651 health_code_update();
652
653 /*
654 * Send status code to session daemon only if the recv works. If the
655 * above recv() failed, the session daemon is notified through the
656 * error socket and the teardown is eventually done.
657 */
658 ret = consumer_send_status_msg(sock, ret_code);
659 if (ret < 0) {
660 /* Somehow, the session daemon is not responding anymore. */
661 goto error_add_stream_nosignal;
662 }
663
664 health_code_update();
665
666 pthread_mutex_lock(&channel->lock);
667 new_stream = consumer_allocate_stream(
668 channel,
669 channel->key,
670 fd,
671 channel->name,
672 channel->relayd_id,
673 channel->session_id,
674 channel->trace_chunk,
675 msg.u.stream.cpu,
676 &alloc_ret,
677 channel->type,
678 channel->monitor);
679 if (new_stream == NULL) {
680 switch (alloc_ret) {
681 case -ENOMEM:
682 case -EINVAL:
683 default:
684 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
685 break;
686 }
687 pthread_mutex_unlock(&channel->lock);
688 goto error_add_stream_nosignal;
689 }
690
691 new_stream->wait_fd = fd;
692 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
693 &new_stream->max_sb_size);
694 if (ret < 0) {
695 pthread_mutex_unlock(&channel->lock);
696 ERR("Failed to get kernel maximal subbuffer size");
697 goto error_add_stream_nosignal;
698 }
699
700 consumer_stream_update_channel_attributes(new_stream,
701 channel);
702 switch (channel->output) {
703 case CONSUMER_CHANNEL_SPLICE:
704 new_stream->output = LTTNG_EVENT_SPLICE;
705 ret = utils_create_pipe(new_stream->splice_pipe);
706 if (ret < 0) {
707 pthread_mutex_unlock(&channel->lock);
708 goto error_add_stream_nosignal;
709 }
710 break;
711 case CONSUMER_CHANNEL_MMAP:
712 new_stream->output = LTTNG_EVENT_MMAP;
713 break;
714 default:
715 ERR("Stream output unknown %d", channel->output);
716 pthread_mutex_unlock(&channel->lock);
717 goto error_add_stream_nosignal;
718 }
719
720 /*
721 * We've just assigned the channel to the stream so increment the
722 * refcount right now. We don't need to increment the refcount for
723 * streams in no monitor because we handle manually the cleanup of
724 * those. It is very important to make sure there is NO prior
725 * consumer_del_stream() calls or else the refcount will be unbalanced.
726 */
727 if (channel->monitor) {
728 uatomic_inc(&new_stream->chan->refcount);
729 }
730
731 /*
732 * The buffer flush is done on the session daemon side for the kernel
733 * so no need for the stream "hangup_flush_done" variable to be
734 * tracked. This is important for a kernel stream since we don't rely
735 * on the flush state of the stream to read data. It's not the case for
736 * user space tracing.
737 */
738 new_stream->hangup_flush_done = 0;
739
740 health_code_update();
741
742 pthread_mutex_lock(&new_stream->lock);
743 if (ctx->on_recv_stream) {
744 ret = ctx->on_recv_stream(new_stream);
745 if (ret < 0) {
746 pthread_mutex_unlock(&new_stream->lock);
747 pthread_mutex_unlock(&channel->lock);
748 consumer_stream_free(new_stream);
749 goto error_add_stream_nosignal;
750 }
751 }
752 health_code_update();
753
754 if (new_stream->metadata_flag) {
755 channel->metadata_stream = new_stream;
756 }
757
758 /* Do not monitor this stream. */
759 if (!channel->monitor) {
760 DBG("Kernel consumer add stream %s in no monitor mode with "
761 "relayd id %" PRIu64, new_stream->name,
762 new_stream->net_seq_idx);
763 cds_list_add(&new_stream->send_node, &channel->streams.head);
764 pthread_mutex_unlock(&new_stream->lock);
765 pthread_mutex_unlock(&channel->lock);
766 goto end_add_stream;
767 }
768
769 /* Send stream to relayd if the stream has an ID. */
770 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
771 ret = consumer_send_relayd_stream(new_stream,
772 new_stream->chan->pathname);
773 if (ret < 0) {
774 pthread_mutex_unlock(&new_stream->lock);
775 pthread_mutex_unlock(&channel->lock);
776 consumer_stream_free(new_stream);
777 goto error_add_stream_nosignal;
778 }
779
780 /*
781 * If adding an extra stream to an already
782 * existing channel (e.g. cpu hotplug), we need
783 * to send the "streams_sent" command to relayd.
784 */
785 if (channel->streams_sent_to_relayd) {
786 ret = consumer_send_relayd_streams_sent(
787 new_stream->net_seq_idx);
788 if (ret < 0) {
789 pthread_mutex_unlock(&new_stream->lock);
790 pthread_mutex_unlock(&channel->lock);
791 goto error_add_stream_nosignal;
792 }
793 }
794 }
795 pthread_mutex_unlock(&new_stream->lock);
796 pthread_mutex_unlock(&channel->lock);
797
798 /* Get the right pipe where the stream will be sent. */
799 if (new_stream->metadata_flag) {
800 consumer_add_metadata_stream(new_stream);
801 stream_pipe = ctx->consumer_metadata_pipe;
802 } else {
803 consumer_add_data_stream(new_stream);
804 stream_pipe = ctx->consumer_data_pipe;
805 }
806
807 /* Visible to other threads */
808 new_stream->globally_visible = 1;
809
810 health_code_update();
811
812 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
813 if (ret < 0) {
814 ERR("Consumer write %s stream to pipe %d",
815 new_stream->metadata_flag ? "metadata" : "data",
816 lttng_pipe_get_writefd(stream_pipe));
817 if (new_stream->metadata_flag) {
818 consumer_del_stream_for_metadata(new_stream);
819 } else {
820 consumer_del_stream_for_data(new_stream);
821 }
822 goto error_add_stream_nosignal;
823 }
824
825 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
826 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
827 end_add_stream:
828 break;
829 error_add_stream_nosignal:
830 goto end_nosignal;
831 error_add_stream_fatal:
832 goto error_fatal;
833 }
834 case LTTNG_CONSUMER_STREAMS_SENT:
835 {
836 struct lttng_consumer_channel *channel;
837
838 /*
839 * Get stream's channel reference. Needed when adding the stream to the
840 * global hash table.
841 */
842 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
843 if (!channel) {
844 /*
845 * We could not find the channel. Can happen if cpu hotplug
846 * happens while tearing down.
847 */
848 ERR("Unable to find channel key %" PRIu64,
849 msg.u.sent_streams.channel_key);
850 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
851 }
852
853 health_code_update();
854
855 /*
856 * Send status code to session daemon.
857 */
858 ret = consumer_send_status_msg(sock, ret_code);
859 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
860 /* Somehow, the session daemon is not responding anymore. */
861 goto error_streams_sent_nosignal;
862 }
863
864 health_code_update();
865
866 /*
867 * We should not send this message if we don't monitor the
868 * streams in this channel.
869 */
870 if (!channel->monitor) {
871 goto end_error_streams_sent;
872 }
873
874 health_code_update();
875 /* Send stream to relayd if the stream has an ID. */
876 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
877 ret = consumer_send_relayd_streams_sent(
878 msg.u.sent_streams.net_seq_idx);
879 if (ret < 0) {
880 goto error_streams_sent_nosignal;
881 }
882 channel->streams_sent_to_relayd = true;
883 }
884 end_error_streams_sent:
885 break;
886 error_streams_sent_nosignal:
887 goto end_nosignal;
888 }
889 case LTTNG_CONSUMER_UPDATE_STREAM:
890 {
891 rcu_read_unlock();
892 return -ENOSYS;
893 }
894 case LTTNG_CONSUMER_DESTROY_RELAYD:
895 {
896 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
897 struct consumer_relayd_sock_pair *relayd;
898
899 DBG("Kernel consumer destroying relayd %" PRIu64, index);
900
901 /* Get relayd reference if exists. */
902 relayd = consumer_find_relayd(index);
903 if (relayd == NULL) {
904 DBG("Unable to find relayd %" PRIu64, index);
905 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
906 }
907
908 /*
909 * Each relayd socket pair has a refcount of stream attached to it
910 * which tells if the relayd is still active or not depending on the
911 * refcount value.
912 *
913 * This will set the destroy flag of the relayd object and destroy it
914 * if the refcount reaches zero when called.
915 *
916 * The destroy can happen either here or when a stream fd hangs up.
917 */
918 if (relayd) {
919 consumer_flag_relayd_for_destroy(relayd);
920 }
921
922 health_code_update();
923
924 ret = consumer_send_status_msg(sock, ret_code);
925 if (ret < 0) {
926 /* Somehow, the session daemon is not responding anymore. */
927 goto error_fatal;
928 }
929
930 goto end_nosignal;
931 }
932 case LTTNG_CONSUMER_DATA_PENDING:
933 {
934 int32_t ret;
935 uint64_t id = msg.u.data_pending.session_id;
936
937 DBG("Kernel consumer data pending command for id %" PRIu64, id);
938
939 ret = consumer_data_pending(id);
940
941 health_code_update();
942
943 /* Send back returned value to session daemon */
944 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
945 if (ret < 0) {
946 PERROR("send data pending ret code");
947 goto error_fatal;
948 }
949
950 /*
951 * No need to send back a status message since the data pending
952 * returned value is the response.
953 */
954 break;
955 }
956 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
957 {
958 struct lttng_consumer_channel *channel;
959 uint64_t key = msg.u.snapshot_channel.key;
960
961 channel = consumer_find_channel(key);
962 if (!channel) {
963 ERR("Channel %" PRIu64 " not found", key);
964 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
965 } else {
966 pthread_mutex_lock(&channel->lock);
967 if (msg.u.snapshot_channel.metadata == 1) {
968 ret = lttng_kconsumer_snapshot_metadata(channel, key,
969 msg.u.snapshot_channel.pathname,
970 msg.u.snapshot_channel.relayd_id, ctx);
971 if (ret < 0) {
972 ERR("Snapshot metadata failed");
973 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
974 }
975 } else {
976 ret = lttng_kconsumer_snapshot_channel(channel, key,
977 msg.u.snapshot_channel.pathname,
978 msg.u.snapshot_channel.relayd_id,
979 msg.u.snapshot_channel.nb_packets_per_stream,
980 ctx);
981 if (ret < 0) {
982 ERR("Snapshot channel failed");
983 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
984 }
985 }
986 pthread_mutex_unlock(&channel->lock);
987 }
988 health_code_update();
989
990 ret = consumer_send_status_msg(sock, ret_code);
991 if (ret < 0) {
992 /* Somehow, the session daemon is not responding anymore. */
993 goto end_nosignal;
994 }
995 break;
996 }
997 case LTTNG_CONSUMER_DESTROY_CHANNEL:
998 {
999 uint64_t key = msg.u.destroy_channel.key;
1000 struct lttng_consumer_channel *channel;
1001
1002 channel = consumer_find_channel(key);
1003 if (!channel) {
1004 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
1005 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1006 }
1007
1008 health_code_update();
1009
1010 ret = consumer_send_status_msg(sock, ret_code);
1011 if (ret < 0) {
1012 /* Somehow, the session daemon is not responding anymore. */
1013 goto end_destroy_channel;
1014 }
1015
1016 health_code_update();
1017
1018 /* Stop right now if no channel was found. */
1019 if (!channel) {
1020 goto end_destroy_channel;
1021 }
1022
1023 /*
1024 * This command should ONLY be issued for channel with streams set in
1025 * no monitor mode.
1026 */
1027 assert(!channel->monitor);
1028
1029 /*
1030 * The refcount should ALWAYS be 0 in the case of a channel in no
1031 * monitor mode.
1032 */
1033 assert(!uatomic_sub_return(&channel->refcount, 1));
1034
1035 consumer_del_channel(channel);
1036 end_destroy_channel:
1037 goto end_nosignal;
1038 }
1039 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1040 {
1041 ssize_t ret;
1042 uint64_t count;
1043 struct lttng_consumer_channel *channel;
1044 uint64_t id = msg.u.discarded_events.session_id;
1045 uint64_t key = msg.u.discarded_events.channel_key;
1046
1047 DBG("Kernel consumer discarded events command for session id %"
1048 PRIu64 ", channel key %" PRIu64, id, key);
1049
1050 channel = consumer_find_channel(key);
1051 if (!channel) {
1052 ERR("Kernel consumer discarded events channel %"
1053 PRIu64 " not found", key);
1054 count = 0;
1055 } else {
1056 count = channel->discarded_events;
1057 }
1058
1059 health_code_update();
1060
1061 /* Send back returned value to session daemon */
1062 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1063 if (ret < 0) {
1064 PERROR("send discarded events");
1065 goto error_fatal;
1066 }
1067
1068 break;
1069 }
1070 case LTTNG_CONSUMER_LOST_PACKETS:
1071 {
1072 ssize_t ret;
1073 uint64_t count;
1074 struct lttng_consumer_channel *channel;
1075 uint64_t id = msg.u.lost_packets.session_id;
1076 uint64_t key = msg.u.lost_packets.channel_key;
1077
1078 DBG("Kernel consumer lost packets command for session id %"
1079 PRIu64 ", channel key %" PRIu64, id, key);
1080
1081 channel = consumer_find_channel(key);
1082 if (!channel) {
1083 ERR("Kernel consumer lost packets channel %"
1084 PRIu64 " not found", key);
1085 count = 0;
1086 } else {
1087 count = channel->lost_packets;
1088 }
1089
1090 health_code_update();
1091
1092 /* Send back returned value to session daemon */
1093 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1094 if (ret < 0) {
1095 PERROR("send lost packets");
1096 goto error_fatal;
1097 }
1098
1099 break;
1100 }
1101 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1102 {
1103 int channel_monitor_pipe;
1104
1105 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1106 /* Successfully received the command's type. */
1107 ret = consumer_send_status_msg(sock, ret_code);
1108 if (ret < 0) {
1109 goto error_fatal;
1110 }
1111
1112 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1113 1);
1114 if (ret != sizeof(channel_monitor_pipe)) {
1115 ERR("Failed to receive channel monitor pipe");
1116 goto error_fatal;
1117 }
1118
1119 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1120 ret = consumer_timer_thread_set_channel_monitor_pipe(
1121 channel_monitor_pipe);
1122 if (!ret) {
1123 int flags;
1124
1125 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1126 /* Set the pipe as non-blocking. */
1127 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1128 if (ret == -1) {
1129 PERROR("fcntl get flags of the channel monitoring pipe");
1130 goto error_fatal;
1131 }
1132 flags = ret;
1133
1134 ret = fcntl(channel_monitor_pipe, F_SETFL,
1135 flags | O_NONBLOCK);
1136 if (ret == -1) {
1137 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1138 goto error_fatal;
1139 }
1140 DBG("Channel monitor pipe set as non-blocking");
1141 } else {
1142 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1143 }
1144 ret = consumer_send_status_msg(sock, ret_code);
1145 if (ret < 0) {
1146 goto error_fatal;
1147 }
1148 break;
1149 }
1150 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1151 {
1152 struct lttng_consumer_channel *channel;
1153 uint64_t key = msg.u.rotate_channel.key;
1154
1155 DBG("Consumer rotate channel %" PRIu64, key);
1156
1157 channel = consumer_find_channel(key);
1158 if (!channel) {
1159 ERR("Channel %" PRIu64 " not found", key);
1160 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1161 } else {
1162 /*
1163 * Sample the rotate position of all the streams in this channel.
1164 */
1165 ret = lttng_consumer_rotate_channel(channel, key,
1166 msg.u.rotate_channel.relayd_id,
1167 msg.u.rotate_channel.metadata,
1168 ctx);
1169 if (ret < 0) {
1170 ERR("Rotate channel failed");
1171 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1172 }
1173
1174 health_code_update();
1175 }
1176 ret = consumer_send_status_msg(sock, ret_code);
1177 if (ret < 0) {
1178 /* Somehow, the session daemon is not responding anymore. */
1179 goto error_rotate_channel;
1180 }
1181 if (channel) {
1182 /* Rotate the streams that are ready right now. */
1183 ret = lttng_consumer_rotate_ready_streams(
1184 channel, key, ctx);
1185 if (ret < 0) {
1186 ERR("Rotate ready streams failed");
1187 }
1188 }
1189 break;
1190 error_rotate_channel:
1191 goto end_nosignal;
1192 }
1193 case LTTNG_CONSUMER_INIT:
1194 {
1195 ret_code = lttng_consumer_init_command(ctx,
1196 msg.u.init.sessiond_uuid);
1197 health_code_update();
1198 ret = consumer_send_status_msg(sock, ret_code);
1199 if (ret < 0) {
1200 /* Somehow, the session daemon is not responding anymore. */
1201 goto end_nosignal;
1202 }
1203 break;
1204 }
1205 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1206 {
1207 const struct lttng_credentials credentials = {
1208 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1209 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1210 };
1211 const bool is_local_trace =
1212 !msg.u.create_trace_chunk.relayd_id.is_set;
1213 const uint64_t relayd_id =
1214 msg.u.create_trace_chunk.relayd_id.value;
1215 const char *chunk_override_name =
1216 *msg.u.create_trace_chunk.override_name ?
1217 msg.u.create_trace_chunk.override_name :
1218 NULL;
1219 LTTNG_OPTIONAL(struct lttng_directory_handle) chunk_directory_handle =
1220 LTTNG_OPTIONAL_INIT;
1221
1222 /*
1223 * The session daemon will only provide a chunk directory file
1224 * descriptor for local traces.
1225 */
1226 if (is_local_trace) {
1227 int chunk_dirfd;
1228
1229 /* Acnowledge the reception of the command. */
1230 ret = consumer_send_status_msg(sock,
1231 LTTCOMM_CONSUMERD_SUCCESS);
1232 if (ret < 0) {
1233 /* Somehow, the session daemon is not responding anymore. */
1234 goto end_nosignal;
1235 }
1236
1237 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1238 if (ret != sizeof(chunk_dirfd)) {
1239 ERR("Failed to receive trace chunk directory file descriptor");
1240 goto error_fatal;
1241 }
1242
1243 DBG("Received trace chunk directory fd (%d)",
1244 chunk_dirfd);
1245 ret = lttng_directory_handle_init_from_dirfd(
1246 &chunk_directory_handle.value,
1247 chunk_dirfd);
1248 if (ret) {
1249 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1250 if (close(chunk_dirfd)) {
1251 PERROR("Failed to close chunk directory file descriptor");
1252 }
1253 goto error_fatal;
1254 }
1255 chunk_directory_handle.is_set = true;
1256 }
1257
1258 ret_code = lttng_consumer_create_trace_chunk(
1259 !is_local_trace ? &relayd_id : NULL,
1260 msg.u.create_trace_chunk.session_id,
1261 msg.u.create_trace_chunk.chunk_id,
1262 (time_t) msg.u.create_trace_chunk
1263 .creation_timestamp,
1264 chunk_override_name,
1265 msg.u.create_trace_chunk.credentials.is_set ?
1266 &credentials :
1267 NULL,
1268 chunk_directory_handle.is_set ?
1269 &chunk_directory_handle.value :
1270 NULL);
1271
1272 if (chunk_directory_handle.is_set) {
1273 lttng_directory_handle_fini(
1274 &chunk_directory_handle.value);
1275 }
1276 goto end_msg_sessiond;
1277 }
1278 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1279 {
1280 enum lttng_trace_chunk_command_type close_command =
1281 msg.u.close_trace_chunk.close_command.value;
1282 const uint64_t relayd_id =
1283 msg.u.close_trace_chunk.relayd_id.value;
1284 struct lttcomm_consumer_close_trace_chunk_reply reply;
1285 char path[LTTNG_PATH_MAX];
1286
1287 ret_code = lttng_consumer_close_trace_chunk(
1288 msg.u.close_trace_chunk.relayd_id.is_set ?
1289 &relayd_id :
1290 NULL,
1291 msg.u.close_trace_chunk.session_id,
1292 msg.u.close_trace_chunk.chunk_id,
1293 (time_t) msg.u.close_trace_chunk.close_timestamp,
1294 msg.u.close_trace_chunk.close_command.is_set ?
1295 &close_command :
1296 NULL, path);
1297 reply.ret_code = ret_code;
1298 reply.path_length = strlen(path) + 1;
1299 ret = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1300 if (ret != sizeof(reply)) {
1301 goto error_fatal;
1302 }
1303 ret = lttcomm_send_unix_sock(sock, path, reply.path_length);
1304 if (ret != reply.path_length) {
1305 goto error_fatal;
1306 }
1307 goto end_nosignal;
1308 }
1309 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1310 {
1311 const uint64_t relayd_id =
1312 msg.u.trace_chunk_exists.relayd_id.value;
1313
1314 ret_code = lttng_consumer_trace_chunk_exists(
1315 msg.u.trace_chunk_exists.relayd_id.is_set ?
1316 &relayd_id : NULL,
1317 msg.u.trace_chunk_exists.session_id,
1318 msg.u.trace_chunk_exists.chunk_id);
1319 goto end_msg_sessiond;
1320 }
1321 default:
1322 goto end_nosignal;
1323 }
1324
1325 end_nosignal:
1326 /*
1327 * Return 1 to indicate success since the 0 value can be a socket
1328 * shutdown during the recv() or send() call.
1329 */
1330 ret = 1;
1331 goto end;
1332 error_fatal:
1333 /* This will issue a consumer stop. */
1334 ret = -1;
1335 goto end;
1336 end_msg_sessiond:
1337 /*
1338 * The returned value here is not useful since either way we'll return 1 to
1339 * the caller because the session daemon socket management is done
1340 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1341 */
1342 ret = consumer_send_status_msg(sock, ret_code);
1343 if (ret < 0) {
1344 goto error_fatal;
1345 }
1346 ret = 1;
1347 end:
1348 health_code_update();
1349 rcu_read_unlock();
1350 return ret;
1351 }
1352
1353 /*
1354 * Populate index values of a kernel stream. Values are set in big endian order.
1355 *
1356 * Return 0 on success or else a negative value.
1357 */
1358 static int get_index_values(struct ctf_packet_index *index, int infd)
1359 {
1360 int ret;
1361 uint64_t packet_size, content_size, timestamp_begin, timestamp_end,
1362 events_discarded, stream_id, stream_instance_id,
1363 packet_seq_num;
1364
1365 ret = kernctl_get_timestamp_begin(infd, &timestamp_begin);
1366 if (ret < 0) {
1367 PERROR("kernctl_get_timestamp_begin");
1368 goto error;
1369 }
1370
1371 ret = kernctl_get_timestamp_end(infd, &timestamp_end);
1372 if (ret < 0) {
1373 PERROR("kernctl_get_timestamp_end");
1374 goto error;
1375 }
1376
1377 ret = kernctl_get_events_discarded(infd, &events_discarded);
1378 if (ret < 0) {
1379 PERROR("kernctl_get_events_discarded");
1380 goto error;
1381 }
1382
1383 ret = kernctl_get_content_size(infd, &content_size);
1384 if (ret < 0) {
1385 PERROR("kernctl_get_content_size");
1386 goto error;
1387 }
1388
1389 ret = kernctl_get_packet_size(infd, &packet_size);
1390 if (ret < 0) {
1391 PERROR("kernctl_get_packet_size");
1392 goto error;
1393 }
1394
1395 ret = kernctl_get_stream_id(infd, &stream_id);
1396 if (ret < 0) {
1397 PERROR("kernctl_get_stream_id");
1398 goto error;
1399 }
1400
1401 ret = kernctl_get_instance_id(infd, &stream_instance_id);
1402 if (ret < 0) {
1403 if (ret == -ENOTTY) {
1404 /* Command not implemented by lttng-modules. */
1405 stream_instance_id = -1ULL;
1406 } else {
1407 PERROR("kernctl_get_instance_id");
1408 goto error;
1409 }
1410 }
1411
1412 ret = kernctl_get_sequence_number(infd, &packet_seq_num);
1413 if (ret < 0) {
1414 if (ret == -ENOTTY) {
1415 /* Command not implemented by lttng-modules. */
1416 packet_seq_num = -1ULL;
1417 ret = 0;
1418 } else {
1419 PERROR("kernctl_get_sequence_number");
1420 goto error;
1421 }
1422 }
1423 index->packet_seq_num = htobe64(index->packet_seq_num);
1424
1425 *index = (typeof(*index)) {
1426 .offset = index->offset,
1427 .packet_size = htobe64(packet_size),
1428 .content_size = htobe64(content_size),
1429 .timestamp_begin = htobe64(timestamp_begin),
1430 .timestamp_end = htobe64(timestamp_end),
1431 .events_discarded = htobe64(events_discarded),
1432 .stream_id = htobe64(stream_id),
1433 .stream_instance_id = htobe64(stream_instance_id),
1434 .packet_seq_num = htobe64(packet_seq_num),
1435 };
1436
1437 error:
1438 return ret;
1439 }
1440 /*
1441 * Sync metadata meaning request them to the session daemon and snapshot to the
1442 * metadata thread can consumer them.
1443 *
1444 * Metadata stream lock MUST be acquired.
1445 *
1446 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1447 * is empty or a negative value on error.
1448 */
1449 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1450 {
1451 int ret;
1452
1453 assert(metadata);
1454
1455 ret = kernctl_buffer_flush(metadata->wait_fd);
1456 if (ret < 0) {
1457 ERR("Failed to flush kernel stream");
1458 goto end;
1459 }
1460
1461 ret = kernctl_snapshot(metadata->wait_fd);
1462 if (ret < 0) {
1463 if (ret != -EAGAIN) {
1464 ERR("Sync metadata, taking kernel snapshot failed.");
1465 goto end;
1466 }
1467 DBG("Sync metadata, no new kernel metadata");
1468 /* No new metadata, exit. */
1469 ret = ENODATA;
1470 goto end;
1471 }
1472
1473 end:
1474 return ret;
1475 }
1476
1477 static
1478 int update_stream_stats(struct lttng_consumer_stream *stream)
1479 {
1480 int ret;
1481 uint64_t seq, discarded;
1482
1483 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1484 if (ret < 0) {
1485 if (ret == -ENOTTY) {
1486 /* Command not implemented by lttng-modules. */
1487 seq = -1ULL;
1488 stream->sequence_number_unavailable = true;
1489 } else {
1490 PERROR("kernctl_get_sequence_number");
1491 goto end;
1492 }
1493 }
1494
1495 /*
1496 * Start the sequence when we extract the first packet in case we don't
1497 * start at 0 (for example if a consumer is not connected to the
1498 * session immediately after the beginning).
1499 */
1500 if (stream->last_sequence_number == -1ULL) {
1501 stream->last_sequence_number = seq;
1502 } else if (seq > stream->last_sequence_number) {
1503 stream->chan->lost_packets += seq -
1504 stream->last_sequence_number - 1;
1505 } else {
1506 /* seq <= last_sequence_number */
1507 ERR("Sequence number inconsistent : prev = %" PRIu64
1508 ", current = %" PRIu64,
1509 stream->last_sequence_number, seq);
1510 ret = -1;
1511 goto end;
1512 }
1513 stream->last_sequence_number = seq;
1514
1515 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1516 if (ret < 0) {
1517 PERROR("kernctl_get_events_discarded");
1518 goto end;
1519 }
1520 if (discarded < stream->last_discarded_events) {
1521 /*
1522 * Overflow has occurred. We assume only one wrap-around
1523 * has occurred.
1524 */
1525 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1526 stream->last_discarded_events + discarded;
1527 } else {
1528 stream->chan->discarded_events += discarded -
1529 stream->last_discarded_events;
1530 }
1531 stream->last_discarded_events = discarded;
1532 ret = 0;
1533
1534 end:
1535 return ret;
1536 }
1537
1538 /*
1539 * Check if the local version of the metadata stream matches with the version
1540 * of the metadata stream in the kernel. If it was updated, set the reset flag
1541 * on the stream.
1542 */
1543 static
1544 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1545 {
1546 int ret;
1547 uint64_t cur_version;
1548
1549 ret = kernctl_get_metadata_version(infd, &cur_version);
1550 if (ret < 0) {
1551 if (ret == -ENOTTY) {
1552 /*
1553 * LTTng-modules does not implement this
1554 * command.
1555 */
1556 ret = 0;
1557 goto end;
1558 }
1559 ERR("Failed to get the metadata version");
1560 goto end;
1561 }
1562
1563 if (stream->metadata_version == cur_version) {
1564 ret = 0;
1565 goto end;
1566 }
1567
1568 DBG("New metadata version detected");
1569 stream->metadata_version = cur_version;
1570 stream->reset_metadata_flag = 1;
1571 ret = 0;
1572
1573 end:
1574 return ret;
1575 }
1576
1577 /*
1578 * Consume data on a file descriptor and write it on a trace file.
1579 * The stream and channel locks must be held by the caller.
1580 */
1581 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1582 struct lttng_consumer_local_data *ctx)
1583 {
1584 unsigned long len, subbuf_size, padding;
1585 int err, write_index = 1, rotation_ret;
1586 ssize_t ret = 0;
1587 int infd = stream->wait_fd;
1588 struct ctf_packet_index index = {};
1589
1590 DBG("In read_subbuffer (infd : %d)", infd);
1591
1592 /*
1593 * If the stream was flagged to be ready for rotation before we extract the
1594 * next packet, rotate it now.
1595 */
1596 if (stream->rotate_ready) {
1597 DBG("Rotate stream before extracting data");
1598 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1599 if (rotation_ret < 0) {
1600 ERR("Stream rotation error");
1601 ret = -1;
1602 goto error;
1603 }
1604 }
1605
1606 /* Get the next subbuffer */
1607 err = kernctl_get_next_subbuf(infd);
1608 if (err != 0) {
1609 /*
1610 * This is a debug message even for single-threaded consumer,
1611 * because poll() have more relaxed criterions than get subbuf,
1612 * so get_subbuf may fail for short race windows where poll()
1613 * would issue wakeups.
1614 */
1615 DBG("Reserving sub buffer failed (everything is normal, "
1616 "it is due to concurrency)");
1617 ret = err;
1618 goto error;
1619 }
1620
1621 /* Get the full subbuffer size including padding */
1622 err = kernctl_get_padded_subbuf_size(infd, &len);
1623 if (err != 0) {
1624 PERROR("Getting sub-buffer len failed.");
1625 err = kernctl_put_subbuf(infd);
1626 if (err != 0) {
1627 if (err == -EFAULT) {
1628 PERROR("Error in unreserving sub buffer\n");
1629 } else if (err == -EIO) {
1630 /* Should never happen with newer LTTng versions */
1631 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1632 }
1633 ret = err;
1634 goto error;
1635 }
1636 ret = err;
1637 goto error;
1638 }
1639
1640 if (!stream->metadata_flag) {
1641 ret = get_index_values(&index, infd);
1642 if (ret < 0) {
1643 err = kernctl_put_subbuf(infd);
1644 if (err != 0) {
1645 if (err == -EFAULT) {
1646 PERROR("Error in unreserving sub buffer\n");
1647 } else if (err == -EIO) {
1648 /* Should never happen with newer LTTng versions */
1649 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1650 }
1651 ret = err;
1652 goto error;
1653 }
1654 goto error;
1655 }
1656 ret = update_stream_stats(stream);
1657 if (ret < 0) {
1658 err = kernctl_put_subbuf(infd);
1659 if (err != 0) {
1660 if (err == -EFAULT) {
1661 PERROR("Error in unreserving sub buffer\n");
1662 } else if (err == -EIO) {
1663 /* Should never happen with newer LTTng versions */
1664 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1665 }
1666 ret = err;
1667 goto error;
1668 }
1669 goto error;
1670 }
1671 } else {
1672 write_index = 0;
1673 ret = metadata_stream_check_version(infd, stream);
1674 if (ret < 0) {
1675 err = kernctl_put_subbuf(infd);
1676 if (err != 0) {
1677 if (err == -EFAULT) {
1678 PERROR("Error in unreserving sub buffer\n");
1679 } else if (err == -EIO) {
1680 /* Should never happen with newer LTTng versions */
1681 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1682 }
1683 ret = err;
1684 goto error;
1685 }
1686 goto error;
1687 }
1688 }
1689
1690 switch (stream->chan->output) {
1691 case CONSUMER_CHANNEL_SPLICE:
1692 /*
1693 * XXX: The lttng-modules splice "actor" does not handle copying
1694 * partial pages hence only using the subbuffer size without the
1695 * padding makes the splice fail.
1696 */
1697 subbuf_size = len;
1698 padding = 0;
1699
1700 /* splice the subbuffer to the tracefile */
1701 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1702 padding, &index);
1703 /*
1704 * XXX: Splice does not support network streaming so the return value
1705 * is simply checked against subbuf_size and not like the mmap() op.
1706 */
1707 if (ret != subbuf_size) {
1708 /*
1709 * display the error but continue processing to try
1710 * to release the subbuffer
1711 */
1712 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1713 ret, subbuf_size);
1714 write_index = 0;
1715 }
1716 break;
1717 case CONSUMER_CHANNEL_MMAP:
1718 {
1719 const char *subbuf_addr;
1720 struct lttng_buffer_view subbuf_view;
1721
1722 /* Get subbuffer size without padding */
1723 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1724 if (err != 0) {
1725 PERROR("Getting sub-buffer len failed.");
1726 err = kernctl_put_subbuf(infd);
1727 if (err != 0) {
1728 if (err == -EFAULT) {
1729 PERROR("Error in unreserving sub buffer\n");
1730 } else if (err == -EIO) {
1731 /* Should never happen with newer LTTng versions */
1732 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1733 }
1734 ret = err;
1735 goto error;
1736 }
1737 ret = err;
1738 goto error;
1739 }
1740
1741 ret = get_current_subbuf_addr(stream, &subbuf_addr);
1742 if (ret) {
1743 goto error_put_subbuf;
1744 }
1745
1746 /* Make sure the tracer is not gone mad on us! */
1747 assert(len >= subbuf_size);
1748
1749 padding = len - subbuf_size;
1750
1751 subbuf_view = lttng_buffer_view_init(subbuf_addr, 0, len);
1752
1753 /* write the subbuffer to the tracefile */
1754 ret = lttng_consumer_on_read_subbuffer_mmap(
1755 ctx, stream, &subbuf_view, padding, &index);
1756 /*
1757 * The mmap operation should write subbuf_size amount of data
1758 * when network streaming or the full padding (len) size when we
1759 * are _not_ streaming.
1760 */
1761 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1762 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1763 /*
1764 * Display the error but continue processing to try to release the
1765 * subbuffer. This is a DBG statement since this is possible to
1766 * happen without being a critical error.
1767 */
1768 DBG("Error writing to tracefile "
1769 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1770 ret, len, subbuf_size);
1771 write_index = 0;
1772 }
1773 break;
1774 }
1775 default:
1776 ERR("Unknown output method");
1777 ret = -EPERM;
1778 }
1779 error_put_subbuf:
1780 err = kernctl_put_next_subbuf(infd);
1781 if (err != 0) {
1782 if (err == -EFAULT) {
1783 PERROR("Error in unreserving sub buffer\n");
1784 } else if (err == -EIO) {
1785 /* Should never happen with newer LTTng versions */
1786 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1787 }
1788 ret = err;
1789 goto error;
1790 }
1791
1792 /* Write index if needed. */
1793 if (!write_index) {
1794 goto rotate;
1795 }
1796
1797 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1798 /*
1799 * In live, block until all the metadata is sent.
1800 */
1801 pthread_mutex_lock(&stream->metadata_timer_lock);
1802 assert(!stream->missed_metadata_flush);
1803 stream->waiting_on_metadata = true;
1804 pthread_mutex_unlock(&stream->metadata_timer_lock);
1805
1806 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1807
1808 pthread_mutex_lock(&stream->metadata_timer_lock);
1809 stream->waiting_on_metadata = false;
1810 if (stream->missed_metadata_flush) {
1811 stream->missed_metadata_flush = false;
1812 pthread_mutex_unlock(&stream->metadata_timer_lock);
1813 (void) consumer_flush_kernel_index(stream);
1814 } else {
1815 pthread_mutex_unlock(&stream->metadata_timer_lock);
1816 }
1817 if (err < 0) {
1818 goto error;
1819 }
1820 }
1821
1822 err = consumer_stream_write_index(stream, &index);
1823 if (err < 0) {
1824 goto error;
1825 }
1826
1827 rotate:
1828 /*
1829 * After extracting the packet, we check if the stream is now ready to be
1830 * rotated and perform the action immediately.
1831 */
1832 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1833 if (rotation_ret == 1) {
1834 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1835 if (rotation_ret < 0) {
1836 ERR("Stream rotation error");
1837 ret = -1;
1838 goto error;
1839 }
1840 } else if (rotation_ret < 0) {
1841 ERR("Checking if stream is ready to rotate");
1842 ret = -1;
1843 goto error;
1844 }
1845
1846 error:
1847 return ret;
1848 }
1849
1850 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1851 {
1852 int ret;
1853
1854 assert(stream);
1855
1856 /*
1857 * Don't create anything if this is set for streaming or if there is
1858 * no current trace chunk on the parent channel.
1859 */
1860 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1861 stream->chan->trace_chunk) {
1862 ret = consumer_stream_create_output_files(stream, true);
1863 if (ret) {
1864 goto error;
1865 }
1866 }
1867
1868 if (stream->output == LTTNG_EVENT_MMAP) {
1869 /* get the len of the mmap region */
1870 unsigned long mmap_len;
1871
1872 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1873 if (ret != 0) {
1874 PERROR("kernctl_get_mmap_len");
1875 goto error_close_fd;
1876 }
1877 stream->mmap_len = (size_t) mmap_len;
1878
1879 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1880 MAP_PRIVATE, stream->wait_fd, 0);
1881 if (stream->mmap_base == MAP_FAILED) {
1882 PERROR("Error mmaping");
1883 ret = -1;
1884 goto error_close_fd;
1885 }
1886 }
1887
1888 /* we return 0 to let the library handle the FD internally */
1889 return 0;
1890
1891 error_close_fd:
1892 if (stream->out_fd >= 0) {
1893 int err;
1894
1895 err = close(stream->out_fd);
1896 assert(!err);
1897 stream->out_fd = -1;
1898 }
1899 error:
1900 return ret;
1901 }
1902
1903 /*
1904 * Check if data is still being extracted from the buffers for a specific
1905 * stream. Consumer data lock MUST be acquired before calling this function
1906 * and the stream lock.
1907 *
1908 * Return 1 if the traced data are still getting read else 0 meaning that the
1909 * data is available for trace viewer reading.
1910 */
1911 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1912 {
1913 int ret;
1914
1915 assert(stream);
1916
1917 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1918 ret = 0;
1919 goto end;
1920 }
1921
1922 ret = kernctl_get_next_subbuf(stream->wait_fd);
1923 if (ret == 0) {
1924 /* There is still data so let's put back this subbuffer. */
1925 ret = kernctl_put_subbuf(stream->wait_fd);
1926 assert(ret == 0);
1927 ret = 1; /* Data is pending */
1928 goto end;
1929 }
1930
1931 /* Data is NOT pending and ready to be read. */
1932 ret = 0;
1933
1934 end:
1935 return ret;
1936 }
This page took 0.113884 seconds and 4 git commands to generate.