d6cefe4e54bdc144b6e810e5641aef052b4a5b9f
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "common/buffer-view.h"
21 #include <stdint.h>
22 #define _LGPL_SOURCE
23 #include <assert.h>
24 #include <poll.h>
25 #include <pthread.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/socket.h>
30 #include <sys/types.h>
31 #include <inttypes.h>
32 #include <unistd.h>
33 #include <sys/stat.h>
34
35 #include <bin/lttng-consumerd/health-consumerd.h>
36 #include <common/common.h>
37 #include <common/kernel-ctl/kernel-ctl.h>
38 #include <common/sessiond-comm/sessiond-comm.h>
39 #include <common/sessiond-comm/relayd.h>
40 #include <common/compat/fcntl.h>
41 #include <common/compat/endian.h>
42 #include <common/pipe.h>
43 #include <common/relayd/relayd.h>
44 #include <common/utils.h>
45 #include <common/consumer/consumer-stream.h>
46 #include <common/index/index.h>
47 #include <common/consumer/consumer-timer.h>
48 #include <common/optional.h>
49
50 #include "kernel-consumer.h"
51
52 extern struct lttng_consumer_global_data consumer_data;
53 extern int consumer_poll_timeout;
54
55 /*
56 * Take a snapshot for a specific fd
57 *
58 * Returns 0 on success, < 0 on error
59 */
60 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
61 {
62 int ret = 0;
63 int infd = stream->wait_fd;
64
65 ret = kernctl_snapshot(infd);
66 /*
67 * -EAGAIN is not an error, it just means that there is no data to
68 * be read.
69 */
70 if (ret != 0 && ret != -EAGAIN) {
71 PERROR("Getting sub-buffer snapshot.");
72 }
73
74 return ret;
75 }
76
77 /*
78 * Sample consumed and produced positions for a specific fd.
79 *
80 * Returns 0 on success, < 0 on error.
81 */
82 int lttng_kconsumer_sample_snapshot_positions(
83 struct lttng_consumer_stream *stream)
84 {
85 assert(stream);
86
87 return kernctl_snapshot_sample_positions(stream->wait_fd);
88 }
89
90 /*
91 * Get the produced position
92 *
93 * Returns 0 on success, < 0 on error
94 */
95 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
96 unsigned long *pos)
97 {
98 int ret;
99 int infd = stream->wait_fd;
100
101 ret = kernctl_snapshot_get_produced(infd, pos);
102 if (ret != 0) {
103 PERROR("kernctl_snapshot_get_produced");
104 }
105
106 return ret;
107 }
108
109 /*
110 * Get the consumerd position
111 *
112 * Returns 0 on success, < 0 on error
113 */
114 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
115 unsigned long *pos)
116 {
117 int ret;
118 int infd = stream->wait_fd;
119
120 ret = kernctl_snapshot_get_consumed(infd, pos);
121 if (ret != 0) {
122 PERROR("kernctl_snapshot_get_consumed");
123 }
124
125 return ret;
126 }
127
128 static
129 int get_current_subbuf_addr(struct lttng_consumer_stream *stream,
130 const char **addr)
131 {
132 int ret;
133 unsigned long mmap_offset;
134 const char *mmap_base = stream->mmap_base;
135
136 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
137 if (ret < 0) {
138 PERROR("Failed to get mmap read offset");
139 goto error;
140 }
141
142 *addr = mmap_base + mmap_offset;
143 error:
144 return ret;
145 }
146
147 /*
148 * Take a snapshot of all the stream of a channel
149 * RCU read-side lock must be held across this function to ensure existence of
150 * channel. The channel lock must be held by the caller.
151 *
152 * Returns 0 on success, < 0 on error
153 */
154 static int lttng_kconsumer_snapshot_channel(
155 struct lttng_consumer_channel *channel,
156 uint64_t key, char *path, uint64_t relayd_id,
157 uint64_t nb_packets_per_stream,
158 struct lttng_consumer_local_data *ctx)
159 {
160 int ret;
161 struct lttng_consumer_stream *stream;
162
163 DBG("Kernel consumer snapshot channel %" PRIu64, key);
164
165 rcu_read_lock();
166
167 /* Splice is not supported yet for channel snapshot. */
168 if (channel->output != CONSUMER_CHANNEL_MMAP) {
169 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
170 channel->name);
171 ret = -1;
172 goto end;
173 }
174
175 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
176 unsigned long consumed_pos, produced_pos;
177
178 health_code_update();
179
180 /*
181 * Lock stream because we are about to change its state.
182 */
183 pthread_mutex_lock(&stream->lock);
184
185 assert(channel->trace_chunk);
186 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
187 /*
188 * Can't happen barring an internal error as the channel
189 * holds a reference to the trace chunk.
190 */
191 ERR("Failed to acquire reference to channel's trace chunk");
192 ret = -1;
193 goto end_unlock;
194 }
195 assert(!stream->trace_chunk);
196 stream->trace_chunk = channel->trace_chunk;
197
198 /*
199 * Assign the received relayd ID so we can use it for streaming. The streams
200 * are not visible to anyone so this is OK to change it.
201 */
202 stream->net_seq_idx = relayd_id;
203 channel->relayd_id = relayd_id;
204 if (relayd_id != (uint64_t) -1ULL) {
205 ret = consumer_send_relayd_stream(stream, path);
206 if (ret < 0) {
207 ERR("sending stream to relayd");
208 goto end_unlock;
209 }
210 } else {
211 ret = consumer_stream_create_output_files(stream,
212 false);
213 if (ret < 0) {
214 goto end_unlock;
215 }
216 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
217 stream->key);
218 }
219
220 ret = kernctl_buffer_flush_empty(stream->wait_fd);
221 if (ret < 0) {
222 /*
223 * Doing a buffer flush which does not take into
224 * account empty packets. This is not perfect
225 * for stream intersection, but required as a
226 * fall-back when "flush_empty" is not
227 * implemented by lttng-modules.
228 */
229 ret = kernctl_buffer_flush(stream->wait_fd);
230 if (ret < 0) {
231 ERR("Failed to flush kernel stream");
232 goto end_unlock;
233 }
234 goto end_unlock;
235 }
236
237 ret = lttng_kconsumer_take_snapshot(stream);
238 if (ret < 0) {
239 ERR("Taking kernel snapshot");
240 goto end_unlock;
241 }
242
243 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
244 if (ret < 0) {
245 ERR("Produced kernel snapshot position");
246 goto end_unlock;
247 }
248
249 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
250 if (ret < 0) {
251 ERR("Consumerd kernel snapshot position");
252 goto end_unlock;
253 }
254
255 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
256 produced_pos, nb_packets_per_stream,
257 stream->max_sb_size);
258
259 while ((long) (consumed_pos - produced_pos) < 0) {
260 ssize_t read_len;
261 unsigned long len, padded_len;
262 const char *subbuf_addr;
263 struct lttng_buffer_view subbuf_view;
264
265 health_code_update();
266 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
267
268 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
269 if (ret < 0) {
270 if (ret != -EAGAIN) {
271 PERROR("kernctl_get_subbuf snapshot");
272 goto end_unlock;
273 }
274 DBG("Kernel consumer get subbuf failed. Skipping it.");
275 consumed_pos += stream->max_sb_size;
276 stream->chan->lost_packets++;
277 continue;
278 }
279
280 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
281 if (ret < 0) {
282 ERR("Snapshot kernctl_get_subbuf_size");
283 goto error_put_subbuf;
284 }
285
286 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
287 if (ret < 0) {
288 ERR("Snapshot kernctl_get_padded_subbuf_size");
289 goto error_put_subbuf;
290 }
291
292 ret = get_current_subbuf_addr(stream, &subbuf_addr);
293 if (ret) {
294 goto error_put_subbuf;
295 }
296
297 subbuf_view = lttng_buffer_view_init(
298 subbuf_addr, 0, padded_len);
299 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx,
300 stream, &subbuf_view,
301 padded_len - len, NULL);
302 /*
303 * We write the padded len in local tracefiles but the data len
304 * when using a relay. Display the error but continue processing
305 * to try to release the subbuffer.
306 */
307 if (relayd_id != (uint64_t) -1ULL) {
308 if (read_len != len) {
309 ERR("Error sending to the relay (ret: %zd != len: %lu)",
310 read_len, len);
311 }
312 } else {
313 if (read_len != padded_len) {
314 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
315 read_len, padded_len);
316 }
317 }
318
319 ret = kernctl_put_subbuf(stream->wait_fd);
320 if (ret < 0) {
321 ERR("Snapshot kernctl_put_subbuf");
322 goto end_unlock;
323 }
324 consumed_pos += stream->max_sb_size;
325 }
326
327 if (relayd_id == (uint64_t) -1ULL) {
328 if (stream->out_fd >= 0) {
329 ret = close(stream->out_fd);
330 if (ret < 0) {
331 PERROR("Kernel consumer snapshot close out_fd");
332 goto end_unlock;
333 }
334 stream->out_fd = -1;
335 }
336 } else {
337 close_relayd_stream(stream);
338 stream->net_seq_idx = (uint64_t) -1ULL;
339 }
340 lttng_trace_chunk_put(stream->trace_chunk);
341 stream->trace_chunk = NULL;
342 pthread_mutex_unlock(&stream->lock);
343 }
344
345 /* All good! */
346 ret = 0;
347 goto end;
348
349 error_put_subbuf:
350 ret = kernctl_put_subbuf(stream->wait_fd);
351 if (ret < 0) {
352 ERR("Snapshot kernctl_put_subbuf error path");
353 }
354 end_unlock:
355 pthread_mutex_unlock(&stream->lock);
356 end:
357 rcu_read_unlock();
358 return ret;
359 }
360
361 /*
362 * Read the whole metadata available for a snapshot.
363 * RCU read-side lock must be held across this function to ensure existence of
364 * metadata_channel. The channel lock must be held by the caller.
365 *
366 * Returns 0 on success, < 0 on error
367 */
368 static int lttng_kconsumer_snapshot_metadata(
369 struct lttng_consumer_channel *metadata_channel,
370 uint64_t key, char *path, uint64_t relayd_id,
371 struct lttng_consumer_local_data *ctx)
372 {
373 int ret, use_relayd = 0;
374 ssize_t ret_read;
375 struct lttng_consumer_stream *metadata_stream;
376
377 assert(ctx);
378
379 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
380 key, path);
381
382 rcu_read_lock();
383
384 metadata_stream = metadata_channel->metadata_stream;
385 assert(metadata_stream);
386
387 pthread_mutex_lock(&metadata_stream->lock);
388 assert(metadata_channel->trace_chunk);
389 assert(metadata_stream->trace_chunk);
390
391 /* Flag once that we have a valid relayd for the stream. */
392 if (relayd_id != (uint64_t) -1ULL) {
393 use_relayd = 1;
394 }
395
396 if (use_relayd) {
397 ret = consumer_send_relayd_stream(metadata_stream, path);
398 if (ret < 0) {
399 goto error_snapshot;
400 }
401 } else {
402 ret = consumer_stream_create_output_files(metadata_stream,
403 false);
404 if (ret < 0) {
405 goto error_snapshot;
406 }
407 }
408
409 do {
410 health_code_update();
411
412 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
413 if (ret_read < 0) {
414 if (ret_read != -EAGAIN) {
415 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
416 ret_read);
417 ret = ret_read;
418 goto error_snapshot;
419 }
420 /* ret_read is negative at this point so we will exit the loop. */
421 continue;
422 }
423 } while (ret_read >= 0);
424
425 if (use_relayd) {
426 close_relayd_stream(metadata_stream);
427 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
428 } else {
429 if (metadata_stream->out_fd >= 0) {
430 ret = close(metadata_stream->out_fd);
431 if (ret < 0) {
432 PERROR("Kernel consumer snapshot metadata close out_fd");
433 /*
434 * Don't go on error here since the snapshot was successful at this
435 * point but somehow the close failed.
436 */
437 }
438 metadata_stream->out_fd = -1;
439 lttng_trace_chunk_put(metadata_stream->trace_chunk);
440 metadata_stream->trace_chunk = NULL;
441 }
442 }
443
444 ret = 0;
445 error_snapshot:
446 pthread_mutex_unlock(&metadata_stream->lock);
447 cds_list_del(&metadata_stream->send_node);
448 consumer_stream_destroy(metadata_stream, NULL);
449 metadata_channel->metadata_stream = NULL;
450 rcu_read_unlock();
451 return ret;
452 }
453
454 /*
455 * Receive command from session daemon and process it.
456 *
457 * Return 1 on success else a negative value or 0.
458 */
459 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
460 int sock, struct pollfd *consumer_sockpoll)
461 {
462 ssize_t ret;
463 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
464 struct lttcomm_consumer_msg msg;
465
466 health_code_update();
467
468 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
469 if (ret != sizeof(msg)) {
470 if (ret > 0) {
471 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
472 ret = -1;
473 }
474 return ret;
475 }
476
477 health_code_update();
478
479 /* Deprecated command */
480 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
481
482 health_code_update();
483
484 /* relayd needs RCU read-side protection */
485 rcu_read_lock();
486
487 switch (msg.cmd_type) {
488 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
489 {
490 /* Session daemon status message are handled in the following call. */
491 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
492 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
493 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
494 msg.u.relayd_sock.relayd_session_id);
495 goto end_nosignal;
496 }
497 case LTTNG_CONSUMER_ADD_CHANNEL:
498 {
499 struct lttng_consumer_channel *new_channel;
500 int ret_recv;
501 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
502
503 health_code_update();
504
505 /* First send a status message before receiving the fds. */
506 ret = consumer_send_status_msg(sock, ret_code);
507 if (ret < 0) {
508 /* Somehow, the session daemon is not responding anymore. */
509 goto error_fatal;
510 }
511
512 health_code_update();
513
514 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
515 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
516 msg.u.channel.session_id,
517 msg.u.channel.chunk_id.is_set ?
518 &chunk_id : NULL,
519 msg.u.channel.pathname,
520 msg.u.channel.name,
521 msg.u.channel.relayd_id, msg.u.channel.output,
522 msg.u.channel.tracefile_size,
523 msg.u.channel.tracefile_count, 0,
524 msg.u.channel.monitor,
525 msg.u.channel.live_timer_interval,
526 NULL, NULL);
527 if (new_channel == NULL) {
528 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
529 goto end_nosignal;
530 }
531 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
532 switch (msg.u.channel.output) {
533 case LTTNG_EVENT_SPLICE:
534 new_channel->output = CONSUMER_CHANNEL_SPLICE;
535 break;
536 case LTTNG_EVENT_MMAP:
537 new_channel->output = CONSUMER_CHANNEL_MMAP;
538 break;
539 default:
540 ERR("Channel output unknown %d", msg.u.channel.output);
541 goto end_nosignal;
542 }
543
544 /* Translate and save channel type. */
545 switch (msg.u.channel.type) {
546 case CONSUMER_CHANNEL_TYPE_DATA:
547 case CONSUMER_CHANNEL_TYPE_METADATA:
548 new_channel->type = msg.u.channel.type;
549 break;
550 default:
551 assert(0);
552 goto end_nosignal;
553 };
554
555 health_code_update();
556
557 if (ctx->on_recv_channel != NULL) {
558 ret_recv = ctx->on_recv_channel(new_channel);
559 if (ret_recv == 0) {
560 ret = consumer_add_channel(new_channel, ctx);
561 } else if (ret_recv < 0) {
562 goto end_nosignal;
563 }
564 } else {
565 ret = consumer_add_channel(new_channel, ctx);
566 }
567 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
568 int monitor_start_ret;
569
570 DBG("Consumer starting monitor timer");
571 consumer_timer_live_start(new_channel,
572 msg.u.channel.live_timer_interval);
573 monitor_start_ret = consumer_timer_monitor_start(
574 new_channel,
575 msg.u.channel.monitor_timer_interval);
576 if (monitor_start_ret < 0) {
577 ERR("Starting channel monitoring timer failed");
578 goto end_nosignal;
579 }
580
581 }
582
583 health_code_update();
584
585 /* If we received an error in add_channel, we need to report it. */
586 if (ret < 0) {
587 ret = consumer_send_status_msg(sock, ret);
588 if (ret < 0) {
589 goto error_fatal;
590 }
591 goto end_nosignal;
592 }
593
594 goto end_nosignal;
595 }
596 case LTTNG_CONSUMER_ADD_STREAM:
597 {
598 int fd;
599 struct lttng_pipe *stream_pipe;
600 struct lttng_consumer_stream *new_stream;
601 struct lttng_consumer_channel *channel;
602 int alloc_ret = 0;
603
604 /*
605 * Get stream's channel reference. Needed when adding the stream to the
606 * global hash table.
607 */
608 channel = consumer_find_channel(msg.u.stream.channel_key);
609 if (!channel) {
610 /*
611 * We could not find the channel. Can happen if cpu hotplug
612 * happens while tearing down.
613 */
614 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
615 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
616 }
617
618 health_code_update();
619
620 /* First send a status message before receiving the fds. */
621 ret = consumer_send_status_msg(sock, ret_code);
622 if (ret < 0) {
623 /* Somehow, the session daemon is not responding anymore. */
624 goto error_add_stream_fatal;
625 }
626
627 health_code_update();
628
629 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
630 /* Channel was not found. */
631 goto error_add_stream_nosignal;
632 }
633
634 /* Blocking call */
635 health_poll_entry();
636 ret = lttng_consumer_poll_socket(consumer_sockpoll);
637 health_poll_exit();
638 if (ret) {
639 goto error_add_stream_fatal;
640 }
641
642 health_code_update();
643
644 /* Get stream file descriptor from socket */
645 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
646 if (ret != sizeof(fd)) {
647 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
648 goto end;
649 }
650
651 health_code_update();
652
653 /*
654 * Send status code to session daemon only if the recv works. If the
655 * above recv() failed, the session daemon is notified through the
656 * error socket and the teardown is eventually done.
657 */
658 ret = consumer_send_status_msg(sock, ret_code);
659 if (ret < 0) {
660 /* Somehow, the session daemon is not responding anymore. */
661 goto error_add_stream_nosignal;
662 }
663
664 health_code_update();
665
666 pthread_mutex_lock(&channel->lock);
667 new_stream = consumer_allocate_stream(channel->key,
668 fd,
669 channel->name,
670 channel->relayd_id,
671 channel->session_id,
672 channel->trace_chunk,
673 msg.u.stream.cpu,
674 &alloc_ret,
675 channel->type,
676 channel->monitor);
677 if (new_stream == NULL) {
678 switch (alloc_ret) {
679 case -ENOMEM:
680 case -EINVAL:
681 default:
682 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
683 break;
684 }
685 pthread_mutex_unlock(&channel->lock);
686 goto error_add_stream_nosignal;
687 }
688
689 new_stream->chan = channel;
690 new_stream->wait_fd = fd;
691 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
692 &new_stream->max_sb_size);
693 if (ret < 0) {
694 pthread_mutex_unlock(&channel->lock);
695 ERR("Failed to get kernel maximal subbuffer size");
696 goto error_add_stream_nosignal;
697 }
698
699 consumer_stream_update_channel_attributes(new_stream,
700 channel);
701 switch (channel->output) {
702 case CONSUMER_CHANNEL_SPLICE:
703 new_stream->output = LTTNG_EVENT_SPLICE;
704 ret = utils_create_pipe(new_stream->splice_pipe);
705 if (ret < 0) {
706 pthread_mutex_unlock(&channel->lock);
707 goto error_add_stream_nosignal;
708 }
709 break;
710 case CONSUMER_CHANNEL_MMAP:
711 new_stream->output = LTTNG_EVENT_MMAP;
712 break;
713 default:
714 ERR("Stream output unknown %d", channel->output);
715 pthread_mutex_unlock(&channel->lock);
716 goto error_add_stream_nosignal;
717 }
718
719 /*
720 * We've just assigned the channel to the stream so increment the
721 * refcount right now. We don't need to increment the refcount for
722 * streams in no monitor because we handle manually the cleanup of
723 * those. It is very important to make sure there is NO prior
724 * consumer_del_stream() calls or else the refcount will be unbalanced.
725 */
726 if (channel->monitor) {
727 uatomic_inc(&new_stream->chan->refcount);
728 }
729
730 /*
731 * The buffer flush is done on the session daemon side for the kernel
732 * so no need for the stream "hangup_flush_done" variable to be
733 * tracked. This is important for a kernel stream since we don't rely
734 * on the flush state of the stream to read data. It's not the case for
735 * user space tracing.
736 */
737 new_stream->hangup_flush_done = 0;
738
739 health_code_update();
740
741 pthread_mutex_lock(&new_stream->lock);
742 if (ctx->on_recv_stream) {
743 ret = ctx->on_recv_stream(new_stream);
744 if (ret < 0) {
745 pthread_mutex_unlock(&new_stream->lock);
746 pthread_mutex_unlock(&channel->lock);
747 consumer_stream_free(new_stream);
748 goto error_add_stream_nosignal;
749 }
750 }
751 health_code_update();
752
753 if (new_stream->metadata_flag) {
754 channel->metadata_stream = new_stream;
755 }
756
757 /* Do not monitor this stream. */
758 if (!channel->monitor) {
759 DBG("Kernel consumer add stream %s in no monitor mode with "
760 "relayd id %" PRIu64, new_stream->name,
761 new_stream->net_seq_idx);
762 cds_list_add(&new_stream->send_node, &channel->streams.head);
763 pthread_mutex_unlock(&new_stream->lock);
764 pthread_mutex_unlock(&channel->lock);
765 goto end_add_stream;
766 }
767
768 /* Send stream to relayd if the stream has an ID. */
769 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
770 ret = consumer_send_relayd_stream(new_stream,
771 new_stream->chan->pathname);
772 if (ret < 0) {
773 pthread_mutex_unlock(&new_stream->lock);
774 pthread_mutex_unlock(&channel->lock);
775 consumer_stream_free(new_stream);
776 goto error_add_stream_nosignal;
777 }
778
779 /*
780 * If adding an extra stream to an already
781 * existing channel (e.g. cpu hotplug), we need
782 * to send the "streams_sent" command to relayd.
783 */
784 if (channel->streams_sent_to_relayd) {
785 ret = consumer_send_relayd_streams_sent(
786 new_stream->net_seq_idx);
787 if (ret < 0) {
788 pthread_mutex_unlock(&new_stream->lock);
789 pthread_mutex_unlock(&channel->lock);
790 goto error_add_stream_nosignal;
791 }
792 }
793 }
794 pthread_mutex_unlock(&new_stream->lock);
795 pthread_mutex_unlock(&channel->lock);
796
797 /* Get the right pipe where the stream will be sent. */
798 if (new_stream->metadata_flag) {
799 consumer_add_metadata_stream(new_stream);
800 stream_pipe = ctx->consumer_metadata_pipe;
801 } else {
802 consumer_add_data_stream(new_stream);
803 stream_pipe = ctx->consumer_data_pipe;
804 }
805
806 /* Visible to other threads */
807 new_stream->globally_visible = 1;
808
809 health_code_update();
810
811 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
812 if (ret < 0) {
813 ERR("Consumer write %s stream to pipe %d",
814 new_stream->metadata_flag ? "metadata" : "data",
815 lttng_pipe_get_writefd(stream_pipe));
816 if (new_stream->metadata_flag) {
817 consumer_del_stream_for_metadata(new_stream);
818 } else {
819 consumer_del_stream_for_data(new_stream);
820 }
821 goto error_add_stream_nosignal;
822 }
823
824 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
825 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
826 end_add_stream:
827 break;
828 error_add_stream_nosignal:
829 goto end_nosignal;
830 error_add_stream_fatal:
831 goto error_fatal;
832 }
833 case LTTNG_CONSUMER_STREAMS_SENT:
834 {
835 struct lttng_consumer_channel *channel;
836
837 /*
838 * Get stream's channel reference. Needed when adding the stream to the
839 * global hash table.
840 */
841 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
842 if (!channel) {
843 /*
844 * We could not find the channel. Can happen if cpu hotplug
845 * happens while tearing down.
846 */
847 ERR("Unable to find channel key %" PRIu64,
848 msg.u.sent_streams.channel_key);
849 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
850 }
851
852 health_code_update();
853
854 /*
855 * Send status code to session daemon.
856 */
857 ret = consumer_send_status_msg(sock, ret_code);
858 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
859 /* Somehow, the session daemon is not responding anymore. */
860 goto error_streams_sent_nosignal;
861 }
862
863 health_code_update();
864
865 /*
866 * We should not send this message if we don't monitor the
867 * streams in this channel.
868 */
869 if (!channel->monitor) {
870 goto end_error_streams_sent;
871 }
872
873 health_code_update();
874 /* Send stream to relayd if the stream has an ID. */
875 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
876 ret = consumer_send_relayd_streams_sent(
877 msg.u.sent_streams.net_seq_idx);
878 if (ret < 0) {
879 goto error_streams_sent_nosignal;
880 }
881 channel->streams_sent_to_relayd = true;
882 }
883 end_error_streams_sent:
884 break;
885 error_streams_sent_nosignal:
886 goto end_nosignal;
887 }
888 case LTTNG_CONSUMER_UPDATE_STREAM:
889 {
890 rcu_read_unlock();
891 return -ENOSYS;
892 }
893 case LTTNG_CONSUMER_DESTROY_RELAYD:
894 {
895 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
896 struct consumer_relayd_sock_pair *relayd;
897
898 DBG("Kernel consumer destroying relayd %" PRIu64, index);
899
900 /* Get relayd reference if exists. */
901 relayd = consumer_find_relayd(index);
902 if (relayd == NULL) {
903 DBG("Unable to find relayd %" PRIu64, index);
904 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
905 }
906
907 /*
908 * Each relayd socket pair has a refcount of stream attached to it
909 * which tells if the relayd is still active or not depending on the
910 * refcount value.
911 *
912 * This will set the destroy flag of the relayd object and destroy it
913 * if the refcount reaches zero when called.
914 *
915 * The destroy can happen either here or when a stream fd hangs up.
916 */
917 if (relayd) {
918 consumer_flag_relayd_for_destroy(relayd);
919 }
920
921 health_code_update();
922
923 ret = consumer_send_status_msg(sock, ret_code);
924 if (ret < 0) {
925 /* Somehow, the session daemon is not responding anymore. */
926 goto error_fatal;
927 }
928
929 goto end_nosignal;
930 }
931 case LTTNG_CONSUMER_DATA_PENDING:
932 {
933 int32_t ret;
934 uint64_t id = msg.u.data_pending.session_id;
935
936 DBG("Kernel consumer data pending command for id %" PRIu64, id);
937
938 ret = consumer_data_pending(id);
939
940 health_code_update();
941
942 /* Send back returned value to session daemon */
943 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
944 if (ret < 0) {
945 PERROR("send data pending ret code");
946 goto error_fatal;
947 }
948
949 /*
950 * No need to send back a status message since the data pending
951 * returned value is the response.
952 */
953 break;
954 }
955 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
956 {
957 struct lttng_consumer_channel *channel;
958 uint64_t key = msg.u.snapshot_channel.key;
959
960 channel = consumer_find_channel(key);
961 if (!channel) {
962 ERR("Channel %" PRIu64 " not found", key);
963 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
964 } else {
965 pthread_mutex_lock(&channel->lock);
966 if (msg.u.snapshot_channel.metadata == 1) {
967 ret = lttng_kconsumer_snapshot_metadata(channel, key,
968 msg.u.snapshot_channel.pathname,
969 msg.u.snapshot_channel.relayd_id, ctx);
970 if (ret < 0) {
971 ERR("Snapshot metadata failed");
972 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
973 }
974 } else {
975 ret = lttng_kconsumer_snapshot_channel(channel, key,
976 msg.u.snapshot_channel.pathname,
977 msg.u.snapshot_channel.relayd_id,
978 msg.u.snapshot_channel.nb_packets_per_stream,
979 ctx);
980 if (ret < 0) {
981 ERR("Snapshot channel failed");
982 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
983 }
984 }
985 pthread_mutex_unlock(&channel->lock);
986 }
987 health_code_update();
988
989 ret = consumer_send_status_msg(sock, ret_code);
990 if (ret < 0) {
991 /* Somehow, the session daemon is not responding anymore. */
992 goto end_nosignal;
993 }
994 break;
995 }
996 case LTTNG_CONSUMER_DESTROY_CHANNEL:
997 {
998 uint64_t key = msg.u.destroy_channel.key;
999 struct lttng_consumer_channel *channel;
1000
1001 channel = consumer_find_channel(key);
1002 if (!channel) {
1003 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
1004 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1005 }
1006
1007 health_code_update();
1008
1009 ret = consumer_send_status_msg(sock, ret_code);
1010 if (ret < 0) {
1011 /* Somehow, the session daemon is not responding anymore. */
1012 goto end_destroy_channel;
1013 }
1014
1015 health_code_update();
1016
1017 /* Stop right now if no channel was found. */
1018 if (!channel) {
1019 goto end_destroy_channel;
1020 }
1021
1022 /*
1023 * This command should ONLY be issued for channel with streams set in
1024 * no monitor mode.
1025 */
1026 assert(!channel->monitor);
1027
1028 /*
1029 * The refcount should ALWAYS be 0 in the case of a channel in no
1030 * monitor mode.
1031 */
1032 assert(!uatomic_sub_return(&channel->refcount, 1));
1033
1034 consumer_del_channel(channel);
1035 end_destroy_channel:
1036 goto end_nosignal;
1037 }
1038 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1039 {
1040 ssize_t ret;
1041 uint64_t count;
1042 struct lttng_consumer_channel *channel;
1043 uint64_t id = msg.u.discarded_events.session_id;
1044 uint64_t key = msg.u.discarded_events.channel_key;
1045
1046 DBG("Kernel consumer discarded events command for session id %"
1047 PRIu64 ", channel key %" PRIu64, id, key);
1048
1049 channel = consumer_find_channel(key);
1050 if (!channel) {
1051 ERR("Kernel consumer discarded events channel %"
1052 PRIu64 " not found", key);
1053 count = 0;
1054 } else {
1055 count = channel->discarded_events;
1056 }
1057
1058 health_code_update();
1059
1060 /* Send back returned value to session daemon */
1061 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1062 if (ret < 0) {
1063 PERROR("send discarded events");
1064 goto error_fatal;
1065 }
1066
1067 break;
1068 }
1069 case LTTNG_CONSUMER_LOST_PACKETS:
1070 {
1071 ssize_t ret;
1072 uint64_t count;
1073 struct lttng_consumer_channel *channel;
1074 uint64_t id = msg.u.lost_packets.session_id;
1075 uint64_t key = msg.u.lost_packets.channel_key;
1076
1077 DBG("Kernel consumer lost packets command for session id %"
1078 PRIu64 ", channel key %" PRIu64, id, key);
1079
1080 channel = consumer_find_channel(key);
1081 if (!channel) {
1082 ERR("Kernel consumer lost packets channel %"
1083 PRIu64 " not found", key);
1084 count = 0;
1085 } else {
1086 count = channel->lost_packets;
1087 }
1088
1089 health_code_update();
1090
1091 /* Send back returned value to session daemon */
1092 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1093 if (ret < 0) {
1094 PERROR("send lost packets");
1095 goto error_fatal;
1096 }
1097
1098 break;
1099 }
1100 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1101 {
1102 int channel_monitor_pipe;
1103
1104 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1105 /* Successfully received the command's type. */
1106 ret = consumer_send_status_msg(sock, ret_code);
1107 if (ret < 0) {
1108 goto error_fatal;
1109 }
1110
1111 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1112 1);
1113 if (ret != sizeof(channel_monitor_pipe)) {
1114 ERR("Failed to receive channel monitor pipe");
1115 goto error_fatal;
1116 }
1117
1118 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1119 ret = consumer_timer_thread_set_channel_monitor_pipe(
1120 channel_monitor_pipe);
1121 if (!ret) {
1122 int flags;
1123
1124 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1125 /* Set the pipe as non-blocking. */
1126 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1127 if (ret == -1) {
1128 PERROR("fcntl get flags of the channel monitoring pipe");
1129 goto error_fatal;
1130 }
1131 flags = ret;
1132
1133 ret = fcntl(channel_monitor_pipe, F_SETFL,
1134 flags | O_NONBLOCK);
1135 if (ret == -1) {
1136 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1137 goto error_fatal;
1138 }
1139 DBG("Channel monitor pipe set as non-blocking");
1140 } else {
1141 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1142 }
1143 ret = consumer_send_status_msg(sock, ret_code);
1144 if (ret < 0) {
1145 goto error_fatal;
1146 }
1147 break;
1148 }
1149 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1150 {
1151 struct lttng_consumer_channel *channel;
1152 uint64_t key = msg.u.rotate_channel.key;
1153
1154 DBG("Consumer rotate channel %" PRIu64, key);
1155
1156 channel = consumer_find_channel(key);
1157 if (!channel) {
1158 ERR("Channel %" PRIu64 " not found", key);
1159 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1160 } else {
1161 /*
1162 * Sample the rotate position of all the streams in this channel.
1163 */
1164 ret = lttng_consumer_rotate_channel(channel, key,
1165 msg.u.rotate_channel.relayd_id,
1166 msg.u.rotate_channel.metadata,
1167 ctx);
1168 if (ret < 0) {
1169 ERR("Rotate channel failed");
1170 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1171 }
1172
1173 health_code_update();
1174 }
1175 ret = consumer_send_status_msg(sock, ret_code);
1176 if (ret < 0) {
1177 /* Somehow, the session daemon is not responding anymore. */
1178 goto error_rotate_channel;
1179 }
1180 if (channel) {
1181 /* Rotate the streams that are ready right now. */
1182 ret = lttng_consumer_rotate_ready_streams(
1183 channel, key, ctx);
1184 if (ret < 0) {
1185 ERR("Rotate ready streams failed");
1186 }
1187 }
1188 break;
1189 error_rotate_channel:
1190 goto end_nosignal;
1191 }
1192 case LTTNG_CONSUMER_INIT:
1193 {
1194 ret_code = lttng_consumer_init_command(ctx,
1195 msg.u.init.sessiond_uuid);
1196 health_code_update();
1197 ret = consumer_send_status_msg(sock, ret_code);
1198 if (ret < 0) {
1199 /* Somehow, the session daemon is not responding anymore. */
1200 goto end_nosignal;
1201 }
1202 break;
1203 }
1204 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1205 {
1206 const struct lttng_credentials credentials = {
1207 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1208 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1209 };
1210 const bool is_local_trace =
1211 !msg.u.create_trace_chunk.relayd_id.is_set;
1212 const uint64_t relayd_id =
1213 msg.u.create_trace_chunk.relayd_id.value;
1214 const char *chunk_override_name =
1215 *msg.u.create_trace_chunk.override_name ?
1216 msg.u.create_trace_chunk.override_name :
1217 NULL;
1218 LTTNG_OPTIONAL(struct lttng_directory_handle) chunk_directory_handle =
1219 LTTNG_OPTIONAL_INIT;
1220
1221 /*
1222 * The session daemon will only provide a chunk directory file
1223 * descriptor for local traces.
1224 */
1225 if (is_local_trace) {
1226 int chunk_dirfd;
1227
1228 /* Acnowledge the reception of the command. */
1229 ret = consumer_send_status_msg(sock,
1230 LTTCOMM_CONSUMERD_SUCCESS);
1231 if (ret < 0) {
1232 /* Somehow, the session daemon is not responding anymore. */
1233 goto end_nosignal;
1234 }
1235
1236 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1237 if (ret != sizeof(chunk_dirfd)) {
1238 ERR("Failed to receive trace chunk directory file descriptor");
1239 goto error_fatal;
1240 }
1241
1242 DBG("Received trace chunk directory fd (%d)",
1243 chunk_dirfd);
1244 ret = lttng_directory_handle_init_from_dirfd(
1245 &chunk_directory_handle.value,
1246 chunk_dirfd);
1247 if (ret) {
1248 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1249 if (close(chunk_dirfd)) {
1250 PERROR("Failed to close chunk directory file descriptor");
1251 }
1252 goto error_fatal;
1253 }
1254 chunk_directory_handle.is_set = true;
1255 }
1256
1257 ret_code = lttng_consumer_create_trace_chunk(
1258 !is_local_trace ? &relayd_id : NULL,
1259 msg.u.create_trace_chunk.session_id,
1260 msg.u.create_trace_chunk.chunk_id,
1261 (time_t) msg.u.create_trace_chunk
1262 .creation_timestamp,
1263 chunk_override_name,
1264 msg.u.create_trace_chunk.credentials.is_set ?
1265 &credentials :
1266 NULL,
1267 chunk_directory_handle.is_set ?
1268 &chunk_directory_handle.value :
1269 NULL);
1270
1271 if (chunk_directory_handle.is_set) {
1272 lttng_directory_handle_fini(
1273 &chunk_directory_handle.value);
1274 }
1275 goto end_msg_sessiond;
1276 }
1277 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1278 {
1279 enum lttng_trace_chunk_command_type close_command =
1280 msg.u.close_trace_chunk.close_command.value;
1281 const uint64_t relayd_id =
1282 msg.u.close_trace_chunk.relayd_id.value;
1283 struct lttcomm_consumer_close_trace_chunk_reply reply;
1284 char path[LTTNG_PATH_MAX];
1285
1286 ret_code = lttng_consumer_close_trace_chunk(
1287 msg.u.close_trace_chunk.relayd_id.is_set ?
1288 &relayd_id :
1289 NULL,
1290 msg.u.close_trace_chunk.session_id,
1291 msg.u.close_trace_chunk.chunk_id,
1292 (time_t) msg.u.close_trace_chunk.close_timestamp,
1293 msg.u.close_trace_chunk.close_command.is_set ?
1294 &close_command :
1295 NULL, path);
1296 reply.ret_code = ret_code;
1297 reply.path_length = strlen(path) + 1;
1298 ret = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1299 if (ret != sizeof(reply)) {
1300 goto error_fatal;
1301 }
1302 ret = lttcomm_send_unix_sock(sock, path, reply.path_length);
1303 if (ret != reply.path_length) {
1304 goto error_fatal;
1305 }
1306 goto end_nosignal;
1307 }
1308 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1309 {
1310 const uint64_t relayd_id =
1311 msg.u.trace_chunk_exists.relayd_id.value;
1312
1313 ret_code = lttng_consumer_trace_chunk_exists(
1314 msg.u.trace_chunk_exists.relayd_id.is_set ?
1315 &relayd_id : NULL,
1316 msg.u.trace_chunk_exists.session_id,
1317 msg.u.trace_chunk_exists.chunk_id);
1318 goto end_msg_sessiond;
1319 }
1320 default:
1321 goto end_nosignal;
1322 }
1323
1324 end_nosignal:
1325 /*
1326 * Return 1 to indicate success since the 0 value can be a socket
1327 * shutdown during the recv() or send() call.
1328 */
1329 ret = 1;
1330 goto end;
1331 error_fatal:
1332 /* This will issue a consumer stop. */
1333 ret = -1;
1334 goto end;
1335 end_msg_sessiond:
1336 /*
1337 * The returned value here is not useful since either way we'll return 1 to
1338 * the caller because the session daemon socket management is done
1339 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1340 */
1341 ret = consumer_send_status_msg(sock, ret_code);
1342 if (ret < 0) {
1343 goto error_fatal;
1344 }
1345 ret = 1;
1346 end:
1347 health_code_update();
1348 rcu_read_unlock();
1349 return ret;
1350 }
1351
1352 /*
1353 * Populate index values of a kernel stream. Values are set in big endian order.
1354 *
1355 * Return 0 on success or else a negative value.
1356 */
1357 static int get_index_values(struct ctf_packet_index *index, int infd)
1358 {
1359 int ret;
1360 uint64_t packet_size, content_size, timestamp_begin, timestamp_end,
1361 events_discarded, stream_id, stream_instance_id,
1362 packet_seq_num;
1363
1364 ret = kernctl_get_timestamp_begin(infd, &timestamp_begin);
1365 if (ret < 0) {
1366 PERROR("kernctl_get_timestamp_begin");
1367 goto error;
1368 }
1369
1370 ret = kernctl_get_timestamp_end(infd, &timestamp_end);
1371 if (ret < 0) {
1372 PERROR("kernctl_get_timestamp_end");
1373 goto error;
1374 }
1375
1376 ret = kernctl_get_events_discarded(infd, &events_discarded);
1377 if (ret < 0) {
1378 PERROR("kernctl_get_events_discarded");
1379 goto error;
1380 }
1381
1382 ret = kernctl_get_content_size(infd, &content_size);
1383 if (ret < 0) {
1384 PERROR("kernctl_get_content_size");
1385 goto error;
1386 }
1387
1388 ret = kernctl_get_packet_size(infd, &packet_size);
1389 if (ret < 0) {
1390 PERROR("kernctl_get_packet_size");
1391 goto error;
1392 }
1393
1394 ret = kernctl_get_stream_id(infd, &stream_id);
1395 if (ret < 0) {
1396 PERROR("kernctl_get_stream_id");
1397 goto error;
1398 }
1399
1400 ret = kernctl_get_instance_id(infd, &stream_instance_id);
1401 if (ret < 0) {
1402 if (ret == -ENOTTY) {
1403 /* Command not implemented by lttng-modules. */
1404 stream_instance_id = -1ULL;
1405 } else {
1406 PERROR("kernctl_get_instance_id");
1407 goto error;
1408 }
1409 }
1410
1411 ret = kernctl_get_sequence_number(infd, &packet_seq_num);
1412 if (ret < 0) {
1413 if (ret == -ENOTTY) {
1414 /* Command not implemented by lttng-modules. */
1415 packet_seq_num = -1ULL;
1416 ret = 0;
1417 } else {
1418 PERROR("kernctl_get_sequence_number");
1419 goto error;
1420 }
1421 }
1422 index->packet_seq_num = htobe64(index->packet_seq_num);
1423
1424 *index = (typeof(*index)) {
1425 .offset = index->offset,
1426 .packet_size = htobe64(packet_size),
1427 .content_size = htobe64(content_size),
1428 .timestamp_begin = htobe64(timestamp_begin),
1429 .timestamp_end = htobe64(timestamp_end),
1430 .events_discarded = htobe64(events_discarded),
1431 .stream_id = htobe64(stream_id),
1432 .stream_instance_id = htobe64(stream_instance_id),
1433 .packet_seq_num = htobe64(packet_seq_num),
1434 };
1435
1436 error:
1437 return ret;
1438 }
1439 /*
1440 * Sync metadata meaning request them to the session daemon and snapshot to the
1441 * metadata thread can consumer them.
1442 *
1443 * Metadata stream lock MUST be acquired.
1444 *
1445 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1446 * is empty or a negative value on error.
1447 */
1448 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1449 {
1450 int ret;
1451
1452 assert(metadata);
1453
1454 ret = kernctl_buffer_flush(metadata->wait_fd);
1455 if (ret < 0) {
1456 ERR("Failed to flush kernel stream");
1457 goto end;
1458 }
1459
1460 ret = kernctl_snapshot(metadata->wait_fd);
1461 if (ret < 0) {
1462 if (ret != -EAGAIN) {
1463 ERR("Sync metadata, taking kernel snapshot failed.");
1464 goto end;
1465 }
1466 DBG("Sync metadata, no new kernel metadata");
1467 /* No new metadata, exit. */
1468 ret = ENODATA;
1469 goto end;
1470 }
1471
1472 end:
1473 return ret;
1474 }
1475
1476 static
1477 int update_stream_stats(struct lttng_consumer_stream *stream)
1478 {
1479 int ret;
1480 uint64_t seq, discarded;
1481
1482 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1483 if (ret < 0) {
1484 if (ret == -ENOTTY) {
1485 /* Command not implemented by lttng-modules. */
1486 seq = -1ULL;
1487 stream->sequence_number_unavailable = true;
1488 } else {
1489 PERROR("kernctl_get_sequence_number");
1490 goto end;
1491 }
1492 }
1493
1494 /*
1495 * Start the sequence when we extract the first packet in case we don't
1496 * start at 0 (for example if a consumer is not connected to the
1497 * session immediately after the beginning).
1498 */
1499 if (stream->last_sequence_number == -1ULL) {
1500 stream->last_sequence_number = seq;
1501 } else if (seq > stream->last_sequence_number) {
1502 stream->chan->lost_packets += seq -
1503 stream->last_sequence_number - 1;
1504 } else {
1505 /* seq <= last_sequence_number */
1506 ERR("Sequence number inconsistent : prev = %" PRIu64
1507 ", current = %" PRIu64,
1508 stream->last_sequence_number, seq);
1509 ret = -1;
1510 goto end;
1511 }
1512 stream->last_sequence_number = seq;
1513
1514 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1515 if (ret < 0) {
1516 PERROR("kernctl_get_events_discarded");
1517 goto end;
1518 }
1519 if (discarded < stream->last_discarded_events) {
1520 /*
1521 * Overflow has occurred. We assume only one wrap-around
1522 * has occurred.
1523 */
1524 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1525 stream->last_discarded_events + discarded;
1526 } else {
1527 stream->chan->discarded_events += discarded -
1528 stream->last_discarded_events;
1529 }
1530 stream->last_discarded_events = discarded;
1531 ret = 0;
1532
1533 end:
1534 return ret;
1535 }
1536
1537 /*
1538 * Check if the local version of the metadata stream matches with the version
1539 * of the metadata stream in the kernel. If it was updated, set the reset flag
1540 * on the stream.
1541 */
1542 static
1543 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1544 {
1545 int ret;
1546 uint64_t cur_version;
1547
1548 ret = kernctl_get_metadata_version(infd, &cur_version);
1549 if (ret < 0) {
1550 if (ret == -ENOTTY) {
1551 /*
1552 * LTTng-modules does not implement this
1553 * command.
1554 */
1555 ret = 0;
1556 goto end;
1557 }
1558 ERR("Failed to get the metadata version");
1559 goto end;
1560 }
1561
1562 if (stream->metadata_version == cur_version) {
1563 ret = 0;
1564 goto end;
1565 }
1566
1567 DBG("New metadata version detected");
1568 stream->metadata_version = cur_version;
1569 stream->reset_metadata_flag = 1;
1570 ret = 0;
1571
1572 end:
1573 return ret;
1574 }
1575
1576 /*
1577 * Consume data on a file descriptor and write it on a trace file.
1578 * The stream and channel locks must be held by the caller.
1579 */
1580 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1581 struct lttng_consumer_local_data *ctx)
1582 {
1583 unsigned long len, subbuf_size, padding;
1584 int err, write_index = 1, rotation_ret;
1585 ssize_t ret = 0;
1586 int infd = stream->wait_fd;
1587 struct ctf_packet_index index = {};
1588
1589 DBG("In read_subbuffer (infd : %d)", infd);
1590
1591 /*
1592 * If the stream was flagged to be ready for rotation before we extract the
1593 * next packet, rotate it now.
1594 */
1595 if (stream->rotate_ready) {
1596 DBG("Rotate stream before extracting data");
1597 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1598 if (rotation_ret < 0) {
1599 ERR("Stream rotation error");
1600 ret = -1;
1601 goto error;
1602 }
1603 }
1604
1605 /* Get the next subbuffer */
1606 err = kernctl_get_next_subbuf(infd);
1607 if (err != 0) {
1608 /*
1609 * This is a debug message even for single-threaded consumer,
1610 * because poll() have more relaxed criterions than get subbuf,
1611 * so get_subbuf may fail for short race windows where poll()
1612 * would issue wakeups.
1613 */
1614 DBG("Reserving sub buffer failed (everything is normal, "
1615 "it is due to concurrency)");
1616 ret = err;
1617 goto error;
1618 }
1619
1620 /* Get the full subbuffer size including padding */
1621 err = kernctl_get_padded_subbuf_size(infd, &len);
1622 if (err != 0) {
1623 PERROR("Getting sub-buffer len failed.");
1624 err = kernctl_put_subbuf(infd);
1625 if (err != 0) {
1626 if (err == -EFAULT) {
1627 PERROR("Error in unreserving sub buffer\n");
1628 } else if (err == -EIO) {
1629 /* Should never happen with newer LTTng versions */
1630 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1631 }
1632 ret = err;
1633 goto error;
1634 }
1635 ret = err;
1636 goto error;
1637 }
1638
1639 if (!stream->metadata_flag) {
1640 ret = get_index_values(&index, infd);
1641 if (ret < 0) {
1642 err = kernctl_put_subbuf(infd);
1643 if (err != 0) {
1644 if (err == -EFAULT) {
1645 PERROR("Error in unreserving sub buffer\n");
1646 } else if (err == -EIO) {
1647 /* Should never happen with newer LTTng versions */
1648 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1649 }
1650 ret = err;
1651 goto error;
1652 }
1653 goto error;
1654 }
1655 ret = update_stream_stats(stream);
1656 if (ret < 0) {
1657 err = kernctl_put_subbuf(infd);
1658 if (err != 0) {
1659 if (err == -EFAULT) {
1660 PERROR("Error in unreserving sub buffer\n");
1661 } else if (err == -EIO) {
1662 /* Should never happen with newer LTTng versions */
1663 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1664 }
1665 ret = err;
1666 goto error;
1667 }
1668 goto error;
1669 }
1670 } else {
1671 write_index = 0;
1672 ret = metadata_stream_check_version(infd, stream);
1673 if (ret < 0) {
1674 err = kernctl_put_subbuf(infd);
1675 if (err != 0) {
1676 if (err == -EFAULT) {
1677 PERROR("Error in unreserving sub buffer\n");
1678 } else if (err == -EIO) {
1679 /* Should never happen with newer LTTng versions */
1680 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1681 }
1682 ret = err;
1683 goto error;
1684 }
1685 goto error;
1686 }
1687 }
1688
1689 switch (stream->chan->output) {
1690 case CONSUMER_CHANNEL_SPLICE:
1691 /*
1692 * XXX: The lttng-modules splice "actor" does not handle copying
1693 * partial pages hence only using the subbuffer size without the
1694 * padding makes the splice fail.
1695 */
1696 subbuf_size = len;
1697 padding = 0;
1698
1699 /* splice the subbuffer to the tracefile */
1700 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1701 padding, &index);
1702 /*
1703 * XXX: Splice does not support network streaming so the return value
1704 * is simply checked against subbuf_size and not like the mmap() op.
1705 */
1706 if (ret != subbuf_size) {
1707 /*
1708 * display the error but continue processing to try
1709 * to release the subbuffer
1710 */
1711 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1712 ret, subbuf_size);
1713 write_index = 0;
1714 }
1715 break;
1716 case CONSUMER_CHANNEL_MMAP:
1717 {
1718 const char *subbuf_addr;
1719 struct lttng_buffer_view subbuf_view;
1720
1721 /* Get subbuffer size without padding */
1722 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1723 if (err != 0) {
1724 PERROR("Getting sub-buffer len failed.");
1725 err = kernctl_put_subbuf(infd);
1726 if (err != 0) {
1727 if (err == -EFAULT) {
1728 PERROR("Error in unreserving sub buffer\n");
1729 } else if (err == -EIO) {
1730 /* Should never happen with newer LTTng versions */
1731 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1732 }
1733 ret = err;
1734 goto error;
1735 }
1736 ret = err;
1737 goto error;
1738 }
1739
1740 ret = get_current_subbuf_addr(stream, &subbuf_addr);
1741 if (ret) {
1742 goto error_put_subbuf;
1743 }
1744
1745 /* Make sure the tracer is not gone mad on us! */
1746 assert(len >= subbuf_size);
1747
1748 padding = len - subbuf_size;
1749
1750 subbuf_view = lttng_buffer_view_init(subbuf_addr, 0, len);
1751
1752 /* write the subbuffer to the tracefile */
1753 ret = lttng_consumer_on_read_subbuffer_mmap(
1754 ctx, stream, &subbuf_view, padding, &index);
1755 /*
1756 * The mmap operation should write subbuf_size amount of data
1757 * when network streaming or the full padding (len) size when we
1758 * are _not_ streaming.
1759 */
1760 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1761 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1762 /*
1763 * Display the error but continue processing to try to release the
1764 * subbuffer. This is a DBG statement since this is possible to
1765 * happen without being a critical error.
1766 */
1767 DBG("Error writing to tracefile "
1768 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1769 ret, len, subbuf_size);
1770 write_index = 0;
1771 }
1772 break;
1773 }
1774 default:
1775 ERR("Unknown output method");
1776 ret = -EPERM;
1777 }
1778 error_put_subbuf:
1779 err = kernctl_put_next_subbuf(infd);
1780 if (err != 0) {
1781 if (err == -EFAULT) {
1782 PERROR("Error in unreserving sub buffer\n");
1783 } else if (err == -EIO) {
1784 /* Should never happen with newer LTTng versions */
1785 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1786 }
1787 ret = err;
1788 goto error;
1789 }
1790
1791 /* Write index if needed. */
1792 if (!write_index) {
1793 goto rotate;
1794 }
1795
1796 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1797 /*
1798 * In live, block until all the metadata is sent.
1799 */
1800 pthread_mutex_lock(&stream->metadata_timer_lock);
1801 assert(!stream->missed_metadata_flush);
1802 stream->waiting_on_metadata = true;
1803 pthread_mutex_unlock(&stream->metadata_timer_lock);
1804
1805 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1806
1807 pthread_mutex_lock(&stream->metadata_timer_lock);
1808 stream->waiting_on_metadata = false;
1809 if (stream->missed_metadata_flush) {
1810 stream->missed_metadata_flush = false;
1811 pthread_mutex_unlock(&stream->metadata_timer_lock);
1812 (void) consumer_flush_kernel_index(stream);
1813 } else {
1814 pthread_mutex_unlock(&stream->metadata_timer_lock);
1815 }
1816 if (err < 0) {
1817 goto error;
1818 }
1819 }
1820
1821 err = consumer_stream_write_index(stream, &index);
1822 if (err < 0) {
1823 goto error;
1824 }
1825
1826 rotate:
1827 /*
1828 * After extracting the packet, we check if the stream is now ready to be
1829 * rotated and perform the action immediately.
1830 */
1831 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1832 if (rotation_ret == 1) {
1833 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1834 if (rotation_ret < 0) {
1835 ERR("Stream rotation error");
1836 ret = -1;
1837 goto error;
1838 }
1839 } else if (rotation_ret < 0) {
1840 ERR("Checking if stream is ready to rotate");
1841 ret = -1;
1842 goto error;
1843 }
1844
1845 error:
1846 return ret;
1847 }
1848
1849 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1850 {
1851 int ret;
1852
1853 assert(stream);
1854
1855 /*
1856 * Don't create anything if this is set for streaming or if there is
1857 * no current trace chunk on the parent channel.
1858 */
1859 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1860 stream->chan->trace_chunk) {
1861 ret = consumer_stream_create_output_files(stream, true);
1862 if (ret) {
1863 goto error;
1864 }
1865 }
1866
1867 if (stream->output == LTTNG_EVENT_MMAP) {
1868 /* get the len of the mmap region */
1869 unsigned long mmap_len;
1870
1871 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1872 if (ret != 0) {
1873 PERROR("kernctl_get_mmap_len");
1874 goto error_close_fd;
1875 }
1876 stream->mmap_len = (size_t) mmap_len;
1877
1878 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1879 MAP_PRIVATE, stream->wait_fd, 0);
1880 if (stream->mmap_base == MAP_FAILED) {
1881 PERROR("Error mmaping");
1882 ret = -1;
1883 goto error_close_fd;
1884 }
1885 }
1886
1887 /* we return 0 to let the library handle the FD internally */
1888 return 0;
1889
1890 error_close_fd:
1891 if (stream->out_fd >= 0) {
1892 int err;
1893
1894 err = close(stream->out_fd);
1895 assert(!err);
1896 stream->out_fd = -1;
1897 }
1898 error:
1899 return ret;
1900 }
1901
1902 /*
1903 * Check if data is still being extracted from the buffers for a specific
1904 * stream. Consumer data lock MUST be acquired before calling this function
1905 * and the stream lock.
1906 *
1907 * Return 1 if the traced data are still getting read else 0 meaning that the
1908 * data is available for trace viewer reading.
1909 */
1910 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1911 {
1912 int ret;
1913
1914 assert(stream);
1915
1916 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1917 ret = 0;
1918 goto end;
1919 }
1920
1921 ret = kernctl_get_next_subbuf(stream->wait_fd);
1922 if (ret == 0) {
1923 /* There is still data so let's put back this subbuffer. */
1924 ret = kernctl_put_subbuf(stream->wait_fd);
1925 assert(ret == 0);
1926 ret = 1; /* Data is pending */
1927 goto end;
1928 }
1929
1930 /* Data is NOT pending and ready to be read. */
1931 ret = 0;
1932
1933 end:
1934 return ret;
1935 }
This page took 0.070105 seconds and 3 git commands to generate.