Fix: race with the viewer and readiness of streams
[lttng-tools.git] / src / bin / lttng-relayd / live.c
... / ...
CommitLineData
1/*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19#define _GNU_SOURCE
20#include <getopt.h>
21#include <grp.h>
22#include <limits.h>
23#include <pthread.h>
24#include <signal.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <sys/mman.h>
29#include <sys/mount.h>
30#include <sys/resource.h>
31#include <sys/socket.h>
32#include <sys/stat.h>
33#include <sys/types.h>
34#include <sys/wait.h>
35#include <inttypes.h>
36#include <urcu/futex.h>
37#include <urcu/uatomic.h>
38#include <unistd.h>
39#include <fcntl.h>
40#include <config.h>
41
42#include <lttng/lttng.h>
43#include <common/common.h>
44#include <common/compat/poll.h>
45#include <common/compat/socket.h>
46#include <common/defaults.h>
47#include <common/futex.h>
48#include <common/sessiond-comm/sessiond-comm.h>
49#include <common/sessiond-comm/inet.h>
50#include <common/sessiond-comm/relayd.h>
51#include <common/uri.h>
52#include <common/utils.h>
53
54#include "cmd.h"
55#include "live.h"
56#include "lttng-relayd.h"
57#include "lttng-viewer.h"
58#include "utils.h"
59#include "health-relayd.h"
60
61static struct lttng_uri *live_uri;
62
63/*
64 * Quit pipe for all threads. This permits a single cancellation point
65 * for all threads when receiving an event on the pipe.
66 */
67static int live_thread_quit_pipe[2] = { -1, -1 };
68
69/*
70 * This pipe is used to inform the worker thread that a command is queued and
71 * ready to be processed.
72 */
73static int live_relay_cmd_pipe[2] = { -1, -1 };
74
75/* Shared between threads */
76static int live_dispatch_thread_exit;
77
78static pthread_t live_listener_thread;
79static pthread_t live_dispatcher_thread;
80static pthread_t live_worker_thread;
81
82/*
83 * Relay command queue.
84 *
85 * The live_thread_listener and live_thread_dispatcher communicate with this
86 * queue.
87 */
88static struct relay_cmd_queue viewer_cmd_queue;
89
90static uint64_t last_relay_viewer_session_id;
91
92/*
93 * Cleanup the daemon
94 */
95static
96void cleanup(void)
97{
98 DBG("Cleaning up");
99
100 free(live_uri);
101}
102
103/*
104 * Write to writable pipe used to notify a thread.
105 */
106static
107int notify_thread_pipe(int wpipe)
108{
109 ssize_t ret;
110
111 ret = lttng_write(wpipe, "!", 1);
112 if (ret < 1) {
113 PERROR("write poll pipe");
114 }
115
116 return (int) ret;
117}
118
119/*
120 * Stop all threads by closing the thread quit pipe.
121 */
122static
123void stop_threads(void)
124{
125 int ret;
126
127 /* Stopping all threads */
128 DBG("Terminating all live threads");
129 ret = notify_thread_pipe(live_thread_quit_pipe[1]);
130 if (ret < 0) {
131 ERR("write error on thread quit pipe");
132 }
133
134 /* Dispatch thread */
135 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
136 futex_nto1_wake(&viewer_cmd_queue.futex);
137}
138
139/*
140 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
141 */
142static
143int create_thread_poll_set(struct lttng_poll_event *events, int size)
144{
145 int ret;
146
147 if (events == NULL || size == 0) {
148 ret = -1;
149 goto error;
150 }
151
152 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
153 if (ret < 0) {
154 goto error;
155 }
156
157 /* Add quit pipe */
158 ret = lttng_poll_add(events, live_thread_quit_pipe[0], LPOLLIN);
159 if (ret < 0) {
160 goto error;
161 }
162
163 return 0;
164
165error:
166 return ret;
167}
168
169/*
170 * Check if the thread quit pipe was triggered.
171 *
172 * Return 1 if it was triggered else 0;
173 */
174static
175int check_thread_quit_pipe(int fd, uint32_t events)
176{
177 if (fd == live_thread_quit_pipe[0] && (events & LPOLLIN)) {
178 return 1;
179 }
180
181 return 0;
182}
183
184/*
185 * Create and init socket from uri.
186 */
187static
188struct lttcomm_sock *init_socket(struct lttng_uri *uri)
189{
190 int ret;
191 struct lttcomm_sock *sock = NULL;
192
193 sock = lttcomm_alloc_sock_from_uri(uri);
194 if (sock == NULL) {
195 ERR("Allocating socket");
196 goto error;
197 }
198
199 ret = lttcomm_create_sock(sock);
200 if (ret < 0) {
201 goto error;
202 }
203 DBG("Listening on sock %d for live", sock->fd);
204
205 ret = sock->ops->bind(sock);
206 if (ret < 0) {
207 goto error;
208 }
209
210 ret = sock->ops->listen(sock, -1);
211 if (ret < 0) {
212 goto error;
213
214 }
215
216 return sock;
217
218error:
219 if (sock) {
220 lttcomm_destroy_sock(sock);
221 }
222 return NULL;
223}
224
225/*
226 * This thread manages the listening for new connections on the network
227 */
228static
229void *thread_listener(void *data)
230{
231 int i, ret, pollfd, err = -1;
232 int val = 1;
233 uint32_t revents, nb_fd;
234 struct lttng_poll_event events;
235 struct lttcomm_sock *live_control_sock;
236
237 DBG("[thread] Relay live listener started");
238
239 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
240
241 health_code_update();
242
243 live_control_sock = init_socket(live_uri);
244 if (!live_control_sock) {
245 goto error_sock_control;
246 }
247
248 /*
249 * Pass 3 as size here for the thread quit pipe, control and data socket.
250 */
251 ret = create_thread_poll_set(&events, 2);
252 if (ret < 0) {
253 goto error_create_poll;
254 }
255
256 /* Add the control socket */
257 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
258 if (ret < 0) {
259 goto error_poll_add;
260 }
261
262 while (1) {
263 health_code_update();
264
265 DBG("Listener accepting live viewers connections");
266
267restart:
268 health_poll_entry();
269 ret = lttng_poll_wait(&events, -1);
270 health_poll_exit();
271 if (ret < 0) {
272 /*
273 * Restart interrupted system call.
274 */
275 if (errno == EINTR) {
276 goto restart;
277 }
278 goto error;
279 }
280 nb_fd = ret;
281
282 DBG("Relay new viewer connection received");
283 for (i = 0; i < nb_fd; i++) {
284 health_code_update();
285
286 /* Fetch once the poll data */
287 revents = LTTNG_POLL_GETEV(&events, i);
288 pollfd = LTTNG_POLL_GETFD(&events, i);
289
290 /* Thread quit pipe has been closed. Killing thread. */
291 ret = check_thread_quit_pipe(pollfd, revents);
292 if (ret) {
293 err = 0;
294 goto exit;
295 }
296
297 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
298 ERR("socket poll error");
299 goto error;
300 } else if (revents & LPOLLIN) {
301 /*
302 * Get allocated in this thread, enqueued to a global queue,
303 * dequeued and freed in the worker thread.
304 */
305 struct relay_command *relay_cmd;
306 struct lttcomm_sock *newsock;
307
308 relay_cmd = zmalloc(sizeof(*relay_cmd));
309 if (!relay_cmd) {
310 PERROR("relay command zmalloc");
311 goto error;
312 }
313
314 assert(pollfd == live_control_sock->fd);
315 newsock = live_control_sock->ops->accept(live_control_sock);
316 if (!newsock) {
317 PERROR("accepting control sock");
318 free(relay_cmd);
319 goto error;
320 }
321 DBG("Relay viewer connection accepted socket %d", newsock->fd);
322 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
323 sizeof(int));
324 if (ret < 0) {
325 PERROR("setsockopt inet");
326 lttcomm_destroy_sock(newsock);
327 free(relay_cmd);
328 goto error;
329 }
330 relay_cmd->sock = newsock;
331
332 /*
333 * Lock free enqueue the request.
334 */
335 cds_wfq_enqueue(&viewer_cmd_queue.queue, &relay_cmd->node);
336
337 /*
338 * Wake the dispatch queue futex. Implicit memory
339 * barrier with the exchange in cds_wfq_enqueue.
340 */
341 futex_nto1_wake(&viewer_cmd_queue.futex);
342 }
343 }
344 }
345
346exit:
347error:
348error_poll_add:
349 lttng_poll_clean(&events);
350error_create_poll:
351 if (live_control_sock->fd >= 0) {
352 ret = live_control_sock->ops->close(live_control_sock);
353 if (ret) {
354 PERROR("close");
355 }
356 }
357 lttcomm_destroy_sock(live_control_sock);
358error_sock_control:
359 if (err) {
360 health_error();
361 DBG("Live viewer listener thread exited with error");
362 }
363 health_unregister(health_relayd);
364 DBG("Live viewer listener thread cleanup complete");
365 stop_threads();
366 return NULL;
367}
368
369/*
370 * This thread manages the dispatching of the requests to worker threads
371 */
372static
373void *thread_dispatcher(void *data)
374{
375 int err = -1;
376 ssize_t ret;
377 struct cds_wfq_node *node;
378 struct relay_command *relay_cmd = NULL;
379
380 DBG("[thread] Live viewer relay dispatcher started");
381
382 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
383
384 health_code_update();
385
386 while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
387 health_code_update();
388
389 /* Atomically prepare the queue futex */
390 futex_nto1_prepare(&viewer_cmd_queue.futex);
391
392 do {
393 health_code_update();
394
395 /* Dequeue commands */
396 node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue);
397 if (node == NULL) {
398 DBG("Woken up but nothing in the live-viewer "
399 "relay command queue");
400 /* Continue thread execution */
401 break;
402 }
403
404 relay_cmd = caa_container_of(node, struct relay_command, node);
405 DBG("Dispatching viewer request waiting on sock %d",
406 relay_cmd->sock->fd);
407
408 /*
409 * Inform worker thread of the new request. This call is blocking
410 * so we can be assured that the data will be read at some point in
411 * time or wait to the end of the world :)
412 */
413 ret = lttng_write(live_relay_cmd_pipe[1], relay_cmd,
414 sizeof(*relay_cmd));
415 free(relay_cmd);
416 if (ret < sizeof(struct relay_command)) {
417 PERROR("write cmd pipe");
418 goto error;
419 }
420 } while (node != NULL);
421
422 /* Futex wait on queue. Blocking call on futex() */
423 health_poll_entry();
424 futex_nto1_wait(&viewer_cmd_queue.futex);
425 health_poll_exit();
426 }
427
428 /* Normal exit, no error */
429 err = 0;
430
431error:
432 if (err) {
433 health_error();
434 ERR("Health error occurred in %s", __func__);
435 }
436 health_unregister(health_relayd);
437 DBG("Live viewer dispatch thread dying");
438 stop_threads();
439 return NULL;
440}
441
442/*
443 * Establish connection with the viewer and check the versions.
444 *
445 * Return 0 on success or else negative value.
446 */
447static
448int viewer_connect(struct relay_command *cmd)
449{
450 int ret;
451 struct lttng_viewer_connect reply, msg;
452
453 assert(cmd);
454
455 cmd->version_check_done = 1;
456
457 health_code_update();
458
459 /* Get version from the other side. */
460 ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0);
461 if (ret < 0 || ret != sizeof(msg)) {
462 if (ret == 0) {
463 /* Orderly shutdown. Not necessary to print an error. */
464 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
465 } else {
466 ERR("Relay failed to receive the version values.");
467 }
468 ret = -1;
469 goto end;
470 }
471
472 health_code_update();
473
474 reply.major = RELAYD_VERSION_COMM_MAJOR;
475 reply.minor = RELAYD_VERSION_COMM_MINOR;
476
477 /* Major versions must be the same */
478 if (reply.major != be32toh(msg.major)) {
479 DBG("Incompatible major versions (%u vs %u)", reply.major,
480 be32toh(msg.major));
481 ret = -1;
482 goto end;
483 }
484
485 cmd->major = reply.major;
486 /* We adapt to the lowest compatible version */
487 if (reply.minor <= be32toh(msg.minor)) {
488 cmd->minor = reply.minor;
489 } else {
490 cmd->minor = be32toh(msg.minor);
491 }
492
493 if (be32toh(msg.type) == VIEWER_CLIENT_COMMAND) {
494 cmd->type = RELAY_VIEWER_COMMAND;
495 } else if (be32toh(msg.type) == VIEWER_CLIENT_NOTIFICATION) {
496 cmd->type = RELAY_VIEWER_NOTIFICATION;
497 } else {
498 ERR("Unknown connection type : %u", be32toh(msg.type));
499 ret = -1;
500 goto end;
501 }
502
503 reply.major = htobe32(reply.major);
504 reply.minor = htobe32(reply.minor);
505 if (cmd->type == RELAY_VIEWER_COMMAND) {
506 reply.viewer_session_id = htobe64(++last_relay_viewer_session_id);
507 }
508
509 health_code_update();
510
511 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
512 sizeof(struct lttng_viewer_connect), 0);
513 if (ret < 0) {
514 ERR("Relay sending version");
515 }
516
517 health_code_update();
518
519 DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor);
520 ret = 0;
521
522end:
523 return ret;
524}
525
526/*
527 * Send the viewer the list of current sessions.
528 *
529 * Return 0 on success or else a negative value.
530 */
531static
532int viewer_list_sessions(struct relay_command *cmd,
533 struct lttng_ht *sessions_ht)
534{
535 int ret;
536 struct lttng_viewer_list_sessions session_list;
537 unsigned long count;
538 long approx_before, approx_after;
539 struct lttng_ht_node_ulong *node;
540 struct lttng_ht_iter iter;
541 struct lttng_viewer_session send_session;
542 struct relay_session *session;
543
544 DBG("List sessions received");
545
546 if (cmd->version_check_done == 0) {
547 ERR("Trying to list sessions before version check");
548 ret = -1;
549 goto end_no_session;
550 }
551
552 rcu_read_lock();
553 cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after);
554 session_list.sessions_count = htobe32(count);
555
556 health_code_update();
557
558 ret = cmd->sock->ops->sendmsg(cmd->sock, &session_list,
559 sizeof(session_list), 0);
560 if (ret < 0) {
561 ERR("Relay sending sessions list");
562 goto end_unlock;
563 }
564
565 health_code_update();
566
567 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, node, node) {
568 health_code_update();
569
570 node = lttng_ht_iter_get_node_ulong(&iter);
571 if (!node) {
572 goto end_unlock;
573 }
574 session = caa_container_of(node, struct relay_session, session_n);
575
576 strncpy(send_session.session_name, session->session_name,
577 sizeof(send_session.session_name));
578 strncpy(send_session.hostname, session->hostname,
579 sizeof(send_session.hostname));
580 send_session.id = htobe64(session->id);
581 send_session.live_timer = htobe32(session->live_timer);
582 send_session.clients = htobe32(session->viewer_attached);
583 send_session.streams = htobe32(session->stream_count);
584
585 health_code_update();
586
587 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_session,
588 sizeof(send_session), 0);
589 if (ret < 0) {
590 ERR("Relay sending session info");
591 goto end_unlock;
592 }
593 }
594 health_code_update();
595
596 rcu_read_unlock();
597 ret = 0;
598 goto end;
599
600end_unlock:
601 rcu_read_unlock();
602
603end:
604end_no_session:
605 return ret;
606}
607
608/*
609 * Open index file using a given viewer stream.
610 *
611 * Return 0 on success or else a negative value.
612 */
613static int open_index(struct relay_viewer_stream *stream)
614{
615 int ret;
616 char fullpath[PATH_MAX];
617 struct ctf_packet_index_file_hdr hdr;
618
619 if (stream->tracefile_count > 0) {
620 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s_%"
621 PRIu64 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
622 stream->channel_name, stream->tracefile_count_current);
623 } else {
624 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s"
625 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
626 stream->channel_name);
627 }
628 if (ret < 0) {
629 PERROR("snprintf index path");
630 goto error;
631 }
632
633 DBG("Opening index file %s in read only", fullpath);
634 ret = open(fullpath, O_RDONLY);
635 if (ret < 0) {
636 if (errno == ENOENT) {
637 ret = -ENOENT;
638 goto error;
639 } else {
640 PERROR("opening index in read-only");
641 }
642 goto error;
643 }
644 stream->index_read_fd = ret;
645 DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret);
646
647 ret = lttng_read(stream->index_read_fd, &hdr, sizeof(hdr));
648 if (ret < sizeof(hdr)) {
649 PERROR("Reading index header");
650 goto error;
651 }
652 if (be32toh(hdr.magic) != CTF_INDEX_MAGIC) {
653 ERR("Invalid header magic");
654 ret = -1;
655 goto error;
656 }
657 if (be32toh(hdr.index_major) != CTF_INDEX_MAJOR ||
658 be32toh(hdr.index_minor) != CTF_INDEX_MINOR) {
659 ERR("Invalid header version");
660 ret = -1;
661 goto error;
662 }
663 ret = 0;
664
665error:
666 return ret;
667}
668
669/*
670 * Allocate and init a new viewer_stream.
671 *
672 * Copies the values from the stream passed in parameter and insert the new
673 * stream in the viewer_streams_ht.
674 *
675 * MUST be called with rcu_read_lock held.
676 *
677 * Returns 0 on success or a negative value on error.
678 */
679static
680int init_viewer_stream(struct relay_stream *stream, int seek_last)
681{
682 int ret;
683 struct relay_viewer_stream *viewer_stream;
684
685 assert(stream);
686
687 viewer_stream = zmalloc(sizeof(*viewer_stream));
688 if (!viewer_stream) {
689 PERROR("relay viewer stream zmalloc");
690 ret = -1;
691 goto error;
692 }
693 viewer_stream->session_id = stream->session->id;
694 viewer_stream->stream_handle = stream->stream_handle;
695 viewer_stream->path_name = strndup(stream->path_name,
696 LTTNG_VIEWER_PATH_MAX);
697 viewer_stream->channel_name = strndup(stream->channel_name,
698 LTTNG_VIEWER_NAME_MAX);
699 viewer_stream->tracefile_count = stream->tracefile_count;
700 viewer_stream->metadata_flag = stream->metadata_flag;
701 viewer_stream->tracefile_count_last = -1ULL;
702 if (seek_last) {
703 viewer_stream->tracefile_count_current =
704 stream->tracefile_count_current;
705 } else {
706 viewer_stream->tracefile_count_current =
707 stream->oldest_tracefile_id;
708 }
709
710 viewer_stream->ctf_trace = stream->ctf_trace;
711 if (viewer_stream->metadata_flag) {
712 viewer_stream->ctf_trace->viewer_metadata_stream =
713 viewer_stream;
714 }
715 uatomic_inc(&viewer_stream->ctf_trace->refcount);
716
717 lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle);
718 lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n);
719
720 viewer_stream->index_read_fd = -1;
721 viewer_stream->read_fd = -1;
722
723 /*
724 * This is to avoid a race between the initialization of this object and
725 * the close of the given stream. If the stream is unable to find this
726 * viewer stream when closing, this copy will at least take the latest
727 * value.
728 * We also need that for the seek_last.
729 */
730 viewer_stream->total_index_received = stream->total_index_received;
731
732 /*
733 * If we never received an index for the current stream, delay
734 * the opening of the index, otherwise open it right now.
735 */
736 if (viewer_stream->tracefile_count_current ==
737 stream->tracefile_count_current &&
738 viewer_stream->total_index_received == 0) {
739 viewer_stream->index_read_fd = -1;
740 } else {
741 ret = open_index(viewer_stream);
742 if (ret < 0) {
743 goto error;
744 }
745 }
746
747 if (seek_last && viewer_stream->index_read_fd > 0) {
748 ret = lseek(viewer_stream->index_read_fd,
749 viewer_stream->total_index_received *
750 sizeof(struct ctf_packet_index),
751 SEEK_CUR);
752 if (ret < 0) {
753 goto error;
754 }
755 viewer_stream->last_sent_index =
756 viewer_stream->total_index_received;
757 }
758
759 ret = 0;
760
761error:
762 return ret;
763}
764
765/*
766 * Rotate a stream to the next tracefile.
767 *
768 * Returns 0 on success, 1 on EOF, a negative value on error.
769 */
770static
771int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream,
772 struct relay_stream *stream)
773{
774 int ret;
775 uint64_t tracefile_id;
776
777 assert(viewer_stream);
778
779 tracefile_id = (viewer_stream->tracefile_count_current + 1) %
780 viewer_stream->tracefile_count;
781 /*
782 * Detect the last tracefile to open.
783 */
784 if (viewer_stream->tracefile_count_last != -1ULL &&
785 viewer_stream->tracefile_count_last ==
786 viewer_stream->tracefile_count_current) {
787 ret = 1;
788 goto end;
789 }
790
791 if (stream) {
792 pthread_mutex_lock(&stream->viewer_stream_rotation_lock);
793 }
794 /*
795 * The writer and the reader are not working in the same
796 * tracefile, we can read up to EOF, we don't care about the
797 * total_index_received.
798 */
799 if (!stream || (stream->tracefile_count_current != tracefile_id)) {
800 viewer_stream->close_write_flag = 1;
801 } else {
802 /*
803 * We are opening a file that is still open in write, make
804 * sure we limit our reading to the number of indexes
805 * received.
806 */
807 viewer_stream->close_write_flag = 0;
808 if (stream) {
809 viewer_stream->total_index_received =
810 stream->total_index_received;
811 }
812 }
813 viewer_stream->tracefile_count_current = tracefile_id;
814
815 ret = close(viewer_stream->index_read_fd);
816 if (ret < 0) {
817 PERROR("close index file %d",
818 viewer_stream->index_read_fd);
819 }
820 viewer_stream->index_read_fd = -1;
821 ret = close(viewer_stream->read_fd);
822 if (ret < 0) {
823 PERROR("close tracefile %d",
824 viewer_stream->read_fd);
825 }
826 viewer_stream->read_fd = -1;
827
828 pthread_mutex_lock(&viewer_stream->overwrite_lock);
829 viewer_stream->abort_flag = 0;
830 pthread_mutex_unlock(&viewer_stream->overwrite_lock);
831
832 viewer_stream->index_read_fd = -1;
833 viewer_stream->read_fd = -1;
834
835 if (stream) {
836 pthread_mutex_unlock(&stream->viewer_stream_rotation_lock);
837 }
838 ret = open_index(viewer_stream);
839 if (ret < 0) {
840 goto error;
841 }
842
843 ret = 0;
844
845end:
846error:
847 return ret;
848}
849
850/*
851 * Send the viewer the list of current sessions.
852 */
853static
854int viewer_attach_session(struct relay_command *cmd,
855 struct lttng_ht *sessions_ht)
856{
857 int ret, send_streams = 0;
858 uint32_t nb_streams = 0, nb_streams_ready = 0;
859 struct lttng_viewer_attach_session_request request;
860 struct lttng_viewer_attach_session_response response;
861 struct lttng_viewer_stream send_stream;
862 struct relay_stream *stream;
863 struct relay_viewer_stream *viewer_stream;
864 struct lttng_ht_node_ulong *node;
865 struct lttng_ht_node_u64 *node64;
866 struct lttng_ht_iter iter;
867 struct relay_session *session;
868 int seek_last = 0;
869
870 assert(cmd);
871 assert(sessions_ht);
872
873 DBG("Attach session received");
874
875 if (cmd->version_check_done == 0) {
876 ERR("Trying to attach session before version check");
877 ret = -1;
878 goto end_no_session;
879 }
880
881 health_code_update();
882
883 ret = cmd->sock->ops->recvmsg(cmd->sock, &request, sizeof(request), 0);
884 if (ret < 0 || ret != sizeof(request)) {
885 if (ret == 0) {
886 /* Orderly shutdown. Not necessary to print an error. */
887 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
888 } else {
889 ERR("Relay failed to receive the attach parameters.");
890 }
891 ret = -1;
892 goto error;
893 }
894
895 health_code_update();
896
897 rcu_read_lock();
898 lttng_ht_lookup(sessions_ht,
899 (void *)((unsigned long) be64toh(request.session_id)), &iter);
900 node = lttng_ht_iter_get_node_ulong(&iter);
901 if (node == NULL) {
902 DBG("Relay session %" PRIu64 " not found",
903 be64toh(request.session_id));
904 response.status = htobe32(VIEWER_ATTACH_UNK);
905 goto send_reply;
906 }
907
908 session = caa_container_of(node, struct relay_session, session_n);
909 if (cmd->session_id == session->id) {
910 /* Same viewer already attached, just send the stream list. */
911 send_streams = 1;
912 response.status = htobe32(VIEWER_ATTACH_OK);
913 } else if (session->viewer_attached != 0) {
914 DBG("Already a viewer attached");
915 response.status = htobe32(VIEWER_ATTACH_ALREADY);
916 goto send_reply;
917 } else if (session->live_timer == 0) {
918 DBG("Not live session");
919 response.status = htobe32(VIEWER_ATTACH_NOT_LIVE);
920 goto send_reply;
921 } else {
922 session->viewer_attached++;
923 send_streams = 1;
924 response.status = htobe32(VIEWER_ATTACH_OK);
925 cmd->session_id = session->id;
926 cmd->session = session;
927 }
928
929 switch (be32toh(request.seek)) {
930 case VIEWER_SEEK_BEGINNING:
931 /* Default behaviour. */
932 break;
933 case VIEWER_SEEK_LAST:
934 seek_last = 1;
935 break;
936 default:
937 ERR("Wrong seek parameter");
938 response.status = htobe32(VIEWER_ATTACH_SEEK_ERR);
939 send_streams = 0;
940 goto send_reply;
941 }
942
943 if (send_streams) {
944 /* We should only be there if we have a session to attach to. */
945 assert(session);
946
947 /*
948 * Fill the viewer_streams_ht to count the number of streams
949 * ready to be sent and avoid concurrency issues on the
950 * relay_streams_ht and don't rely on a total session stream count.
951 */
952 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, node, node) {
953 struct relay_viewer_stream *vstream;
954
955 health_code_update();
956
957 node = lttng_ht_iter_get_node_ulong(&iter);
958 if (!node) {
959 continue;
960 }
961 stream = caa_container_of(node, struct relay_stream, stream_n);
962 if (stream->session != cmd->session) {
963 continue;
964 }
965 nb_streams++;
966
967 /*
968 * Don't send streams with no ctf_trace, they are not
969 * ready to be read.
970 */
971 if (!stream->ctf_trace || !stream->viewer_ready) {
972 continue;
973 }
974 nb_streams_ready++;
975
976 vstream = live_find_viewer_stream_by_id(stream->stream_handle);
977 if (!vstream) {
978 ret = init_viewer_stream(stream, seek_last);
979 if (ret < 0) {
980 goto end_unlock;
981 }
982 }
983 }
984
985 /* We must have the same amount of existing stream and ready stream. */
986 if (nb_streams != nb_streams_ready) {
987 nb_streams = 0;
988 }
989 response.streams_count = htobe32(nb_streams);
990 }
991
992send_reply:
993 health_code_update();
994 ret = cmd->sock->ops->sendmsg(cmd->sock, &response, sizeof(response), 0);
995 if (ret < 0) {
996 ERR("Relay sending viewer attach response");
997 goto end_unlock;
998 }
999 health_code_update();
1000
1001 /*
1002 * Unknown or empty session, just return gracefully, the viewer knows what
1003 * is happening.
1004 */
1005 if (!send_streams || !nb_streams) {
1006 ret = 0;
1007 goto end_unlock;
1008 }
1009
1010 /* We should only be there if we have a session to attach to. */
1011 assert(session);
1012 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
1013 health_code_update();
1014
1015 node64 = lttng_ht_iter_get_node_u64(&iter);
1016 if (!node64) {
1017 continue;
1018 }
1019 viewer_stream = caa_container_of(node64, struct relay_viewer_stream,
1020 stream_n);
1021 if (viewer_stream->session_id != cmd->session->id) {
1022 continue;
1023 }
1024
1025 send_stream.id = htobe64(viewer_stream->stream_handle);
1026 send_stream.ctf_trace_id = htobe64(viewer_stream->ctf_trace->id);
1027 send_stream.metadata_flag = htobe32(viewer_stream->metadata_flag);
1028 strncpy(send_stream.path_name, viewer_stream->path_name,
1029 sizeof(send_stream.path_name));
1030 strncpy(send_stream.channel_name, viewer_stream->channel_name,
1031 sizeof(send_stream.channel_name));
1032
1033 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_stream,
1034 sizeof(send_stream), 0);
1035 if (ret < 0) {
1036 ERR("Relay sending stream %" PRIu64, viewer_stream->stream_handle);
1037 goto end_unlock;
1038 }
1039 DBG("Sent stream %" PRIu64 " to viewer", viewer_stream->stream_handle);
1040 }
1041 ret = 0;
1042
1043end_unlock:
1044 rcu_read_unlock();
1045end_no_session:
1046error:
1047 return ret;
1048}
1049
1050/*
1051 * Get viewer stream from stream id.
1052 *
1053 * RCU read side lock MUST be acquired.
1054 */
1055struct relay_viewer_stream *live_find_viewer_stream_by_id(uint64_t stream_id)
1056{
1057 struct lttng_ht_node_u64 *node;
1058 struct lttng_ht_iter iter;
1059 struct relay_viewer_stream *stream = NULL;
1060
1061 lttng_ht_lookup(viewer_streams_ht, &stream_id, &iter);
1062 node = lttng_ht_iter_get_node_u64(&iter);
1063 if (node == NULL) {
1064 DBG("Relay viewer stream %" PRIu64 " not found", stream_id);
1065 goto end;
1066 }
1067 stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
1068
1069end:
1070 return stream;
1071}
1072
1073static
1074void deferred_free_viewer_stream(struct rcu_head *head)
1075{
1076 struct relay_viewer_stream *stream =
1077 caa_container_of(head, struct relay_viewer_stream, rcu_node);
1078
1079 free(stream->path_name);
1080 free(stream->channel_name);
1081 free(stream);
1082}
1083
1084static
1085void delete_viewer_stream(struct relay_viewer_stream *vstream)
1086{
1087 int delret;
1088 struct lttng_ht_iter iter;
1089
1090 iter.iter.node = &vstream->stream_n.node;
1091 delret = lttng_ht_del(viewer_streams_ht, &iter);
1092 assert(!delret);
1093}
1094
1095static
1096void destroy_viewer_stream(struct relay_viewer_stream *vstream)
1097{
1098 unsigned long ret_ref;
1099 int ret;
1100
1101 assert(vstream);
1102 ret_ref = uatomic_add_return(&vstream->ctf_trace->refcount, -1);
1103 assert(ret_ref >= 0);
1104
1105 if (vstream->read_fd >= 0) {
1106 ret = close(vstream->read_fd);
1107 if (ret < 0) {
1108 PERROR("close read_fd");
1109 }
1110 }
1111 if (vstream->index_read_fd >= 0) {
1112 ret = close(vstream->index_read_fd);
1113 if (ret < 0) {
1114 PERROR("close index_read_fd");
1115 }
1116 }
1117
1118 /*
1119 * If the only stream left in the HT is the metadata stream,
1120 * we need to remove it because we won't detect a EOF for this
1121 * stream.
1122 */
1123 if (ret_ref == 1 && vstream->ctf_trace->metadata_stream) {
1124 destroy_viewer_stream(vstream->ctf_trace->viewer_metadata_stream);
1125 vstream->ctf_trace->metadata_stream = NULL;
1126 DBG("Freeing ctf_trace %" PRIu64, vstream->ctf_trace->id);
1127 /*
1128 * The streaming-side is already closed and we can't receive a new
1129 * stream concurrently at this point (since the session is being
1130 * destroyed), so when we detect the refcount equals 0, we are the
1131 * only owners of the ctf_trace and we can free it ourself.
1132 */
1133 free(vstream->ctf_trace);
1134 }
1135
1136 call_rcu(&vstream->rcu_node, deferred_free_viewer_stream);
1137}
1138
1139/*
1140 * Send the next index for a stream.
1141 *
1142 * Return 0 on success or else a negative value.
1143 */
1144static
1145int viewer_get_next_index(struct relay_command *cmd,
1146 struct lttng_ht *sessions_ht)
1147{
1148 int ret;
1149 struct lttng_viewer_get_next_index request_index;
1150 struct lttng_viewer_index viewer_index;
1151 struct ctf_packet_index packet_index;
1152 struct relay_viewer_stream *vstream;
1153 struct relay_stream *rstream;
1154
1155 assert(cmd);
1156 assert(sessions_ht);
1157
1158 DBG("Viewer get next index");
1159
1160 if (cmd->version_check_done == 0) {
1161 ERR("Trying to request index before version check");
1162 ret = -1;
1163 goto end_no_session;
1164 }
1165
1166 health_code_update();
1167 ret = cmd->sock->ops->recvmsg(cmd->sock, &request_index,
1168 sizeof(request_index), 0);
1169 if (ret < 0 || ret != sizeof(request_index)) {
1170 ret = -1;
1171 ERR("Relay didn't receive the whole packet");
1172 goto end;
1173 }
1174 health_code_update();
1175
1176 rcu_read_lock();
1177 vstream = live_find_viewer_stream_by_id(be64toh(request_index.stream_id));
1178 if (!vstream) {
1179 ret = -1;
1180 goto end_unlock;
1181 }
1182
1183 memset(&viewer_index, 0, sizeof(viewer_index));
1184
1185 /*
1186 * The viewer should not ask for index on metadata stream.
1187 */
1188 if (vstream->metadata_flag) {
1189 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1190 goto send_reply;
1191 }
1192
1193 /* First time, we open the index file */
1194 if (vstream->index_read_fd < 0) {
1195 ret = open_index(vstream);
1196 if (ret == -ENOENT) {
1197 /*
1198 * The index is created only when the first data packet arrives, it
1199 * might not be ready at the beginning of the session
1200 */
1201 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1202 goto send_reply;
1203 } else if (ret < 0) {
1204 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1205 goto send_reply;
1206 }
1207 }
1208
1209 rstream = relay_stream_find_by_id(vstream->stream_handle);
1210 if (rstream) {
1211 if (vstream->abort_flag) {
1212 /* Rotate on abort (overwrite). */
1213 DBG("Viewer rotate because of overwrite");
1214 ret = rotate_viewer_stream(vstream, rstream);
1215 if (ret < 0) {
1216 goto end_unlock;
1217 } else if (ret == 1) {
1218 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1219 delete_viewer_stream(vstream);
1220 destroy_viewer_stream(vstream);
1221 goto send_reply;
1222 }
1223 }
1224 pthread_mutex_lock(&rstream->viewer_stream_rotation_lock);
1225 if (rstream->tracefile_count_current == vstream->tracefile_count_current) {
1226 if (rstream->beacon_ts_end != -1ULL &&
1227 vstream->last_sent_index == rstream->total_index_received) {
1228 viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
1229 viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
1230 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1231 goto send_reply;
1232 /*
1233 * Reader and writer are working in the same tracefile, so we care
1234 * about the number of index received and sent. Otherwise, we read
1235 * up to EOF.
1236 */
1237 } else if (rstream->total_index_received <= vstream->last_sent_index
1238 && !vstream->close_write_flag) {
1239 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1240 /* No new index to send, retry later. */
1241 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1242 goto send_reply;
1243 }
1244 }
1245 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1246 } else if (!rstream && vstream->close_write_flag &&
1247 vstream->total_index_received == vstream->last_sent_index) {
1248 /* Last index sent and current tracefile closed in write */
1249 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1250 delete_viewer_stream(vstream);
1251 destroy_viewer_stream(vstream);
1252 goto send_reply;
1253 } else {
1254 vstream->close_write_flag = 1;
1255 }
1256
1257 if (!vstream->ctf_trace->metadata_received ||
1258 vstream->ctf_trace->metadata_received >
1259 vstream->ctf_trace->metadata_sent) {
1260 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1261 }
1262
1263 pthread_mutex_lock(&vstream->overwrite_lock);
1264 if (vstream->abort_flag) {
1265 /*
1266 * The file is being overwritten by the writer, we cannot
1267 * use it.
1268 */
1269 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1270 pthread_mutex_unlock(&vstream->overwrite_lock);
1271 ret = rotate_viewer_stream(vstream, rstream);
1272 if (ret < 0) {
1273 goto end_unlock;
1274 } else if (ret == 1) {
1275 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1276 delete_viewer_stream(vstream);
1277 destroy_viewer_stream(vstream);
1278 goto send_reply;
1279 }
1280 goto send_reply;
1281 }
1282 ret = lttng_read(vstream->index_read_fd, &packet_index,
1283 sizeof(packet_index));
1284 pthread_mutex_unlock(&vstream->overwrite_lock);
1285 if (ret < sizeof(packet_index)) {
1286 /*
1287 * The tracefile is closed in write, so we read up to EOF.
1288 */
1289 if (vstream->close_write_flag == 1) {
1290 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1291 /* Rotate on normal EOF */
1292 ret = rotate_viewer_stream(vstream, rstream);
1293 if (ret < 0) {
1294 goto end_unlock;
1295 } else if (ret == 1) {
1296 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1297 delete_viewer_stream(vstream);
1298 destroy_viewer_stream(vstream);
1299 goto send_reply;
1300 }
1301 } else {
1302 PERROR("Relay reading index file %d",
1303 vstream->index_read_fd);
1304 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1305 }
1306 goto send_reply;
1307 } else {
1308 viewer_index.status = htobe32(VIEWER_INDEX_OK);
1309 vstream->last_sent_index++;
1310 }
1311
1312 /*
1313 * Indexes are stored in big endian, no need to switch before sending.
1314 */
1315 viewer_index.offset = packet_index.offset;
1316 viewer_index.packet_size = packet_index.packet_size;
1317 viewer_index.content_size = packet_index.content_size;
1318 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1319 viewer_index.timestamp_end = packet_index.timestamp_end;
1320 viewer_index.events_discarded = packet_index.events_discarded;
1321 viewer_index.stream_id = packet_index.stream_id;
1322
1323send_reply:
1324 viewer_index.flags = htobe32(viewer_index.flags);
1325 health_code_update();
1326 ret = cmd->sock->ops->sendmsg(cmd->sock, &viewer_index,
1327 sizeof(viewer_index), 0);
1328 if (ret < 0) {
1329 ERR("Relay index to viewer");
1330 goto end_unlock;
1331 }
1332 health_code_update();
1333
1334 DBG("Index %" PRIu64 "for stream %" PRIu64 "sent",
1335 vstream->last_sent_index, vstream->stream_handle);
1336
1337end_unlock:
1338 rcu_read_unlock();
1339
1340end_no_session:
1341end:
1342 return ret;
1343}
1344
1345/*
1346 * Send the next index for a stream
1347 *
1348 * Return 0 on success or else a negative value.
1349 */
1350static
1351int viewer_get_packet(struct relay_command *cmd)
1352{
1353 int ret, send_data = 0;
1354 char *data = NULL;
1355 uint32_t len = 0;
1356 ssize_t read_len;
1357 struct lttng_viewer_get_packet get_packet_info;
1358 struct lttng_viewer_trace_packet reply;
1359 struct relay_viewer_stream *stream;
1360
1361 assert(cmd);
1362
1363 DBG2("Relay get data packet");
1364
1365 if (cmd->version_check_done == 0) {
1366 ERR("Trying to get packet before version check");
1367 ret = -1;
1368 goto end;
1369 }
1370
1371 health_code_update();
1372 ret = cmd->sock->ops->recvmsg(cmd->sock, &get_packet_info,
1373 sizeof(get_packet_info), 0);
1374 if (ret < 0 || ret != sizeof(get_packet_info)) {
1375 ret = -1;
1376 ERR("Relay didn't receive the whole packet");
1377 goto end;
1378 }
1379 health_code_update();
1380
1381 /* From this point on, the error label can be reached. */
1382 memset(&reply, 0, sizeof(reply));
1383
1384 rcu_read_lock();
1385 stream = live_find_viewer_stream_by_id(be64toh(get_packet_info.stream_id));
1386 if (!stream) {
1387 goto error;
1388 }
1389 assert(stream->ctf_trace);
1390
1391 /*
1392 * First time we read this stream, we need open the tracefile, we should
1393 * only arrive here if an index has already been sent to the viewer, so the
1394 * tracefile must exist, if it does not it is a fatal error.
1395 */
1396 if (stream->read_fd < 0) {
1397 char fullpath[PATH_MAX];
1398
1399 if (stream->tracefile_count > 0) {
1400 ret = snprintf(fullpath, PATH_MAX, "%s/%s_%" PRIu64, stream->path_name,
1401 stream->channel_name,
1402 stream->tracefile_count_current);
1403 } else {
1404 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1405 stream->channel_name);
1406 }
1407 if (ret < 0) {
1408 goto error;
1409 }
1410 ret = open(fullpath, O_RDONLY);
1411 if (ret < 0) {
1412 PERROR("Relay opening trace file");
1413 goto error;
1414 }
1415 stream->read_fd = ret;
1416 }
1417
1418 if (!stream->ctf_trace->metadata_received ||
1419 stream->ctf_trace->metadata_received >
1420 stream->ctf_trace->metadata_sent) {
1421 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1422 reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1423 goto send_reply;
1424 }
1425
1426 len = be32toh(get_packet_info.len);
1427 data = zmalloc(len);
1428 if (!data) {
1429 PERROR("relay data zmalloc");
1430 goto error;
1431 }
1432
1433 ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET);
1434 if (ret < 0) {
1435 /*
1436 * If the read fd was closed by the streaming side, the
1437 * abort_flag will be set to 1, otherwise it is an error.
1438 */
1439 if (stream->abort_flag == 0) {
1440 PERROR("lseek");
1441 goto error;
1442 }
1443 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1444 goto send_reply;
1445 }
1446 read_len = lttng_read(stream->read_fd, data, len);
1447 if (read_len < len) {
1448 /*
1449 * If the read fd was closed by the streaming side, the
1450 * abort_flag will be set to 1, otherwise it is an error.
1451 */
1452 if (stream->abort_flag == 0) {
1453 PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
1454 stream->read_fd,
1455 be64toh(get_packet_info.offset));
1456 goto error;
1457 } else {
1458 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1459 goto send_reply;
1460 }
1461 }
1462 reply.status = htobe32(VIEWER_GET_PACKET_OK);
1463 reply.len = htobe32(len);
1464 send_data = 1;
1465 goto send_reply;
1466
1467error:
1468 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1469
1470send_reply:
1471 reply.flags = htobe32(reply.flags);
1472
1473 health_code_update();
1474 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1475 if (ret < 0) {
1476 ERR("Relay data header to viewer");
1477 goto end_unlock;
1478 }
1479 health_code_update();
1480
1481 if (send_data) {
1482 health_code_update();
1483 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1484 if (ret < 0) {
1485 ERR("Relay send data to viewer");
1486 goto end_unlock;
1487 }
1488 health_code_update();
1489 }
1490
1491 DBG("Sent %u bytes for stream %" PRIu64, len,
1492 be64toh(get_packet_info.stream_id));
1493
1494end_unlock:
1495 free(data);
1496 rcu_read_unlock();
1497
1498end:
1499 return ret;
1500}
1501
1502/*
1503 * Send the session's metadata
1504 *
1505 * Return 0 on success else a negative value.
1506 */
1507static
1508int viewer_get_metadata(struct relay_command *cmd)
1509{
1510 int ret = 0;
1511 ssize_t read_len;
1512 uint64_t len = 0;
1513 char *data = NULL;
1514 struct lttng_viewer_get_metadata request;
1515 struct lttng_viewer_metadata_packet reply;
1516 struct relay_viewer_stream *stream;
1517
1518 assert(cmd);
1519
1520 DBG("Relay get metadata");
1521
1522 if (cmd->version_check_done == 0) {
1523 ERR("Trying to get metadata before version check");
1524 ret = -1;
1525 goto end;
1526 }
1527
1528 health_code_update();
1529 ret = cmd->sock->ops->recvmsg(cmd->sock, &request,
1530 sizeof(request), 0);
1531 if (ret < 0 || ret != sizeof(request)) {
1532 ret = -1;
1533 ERR("Relay didn't receive the whole packet");
1534 goto end;
1535 }
1536 health_code_update();
1537
1538 rcu_read_lock();
1539 stream = live_find_viewer_stream_by_id(be64toh(request.stream_id));
1540 if (!stream || !stream->metadata_flag) {
1541 ERR("Invalid metadata stream");
1542 goto error;
1543 }
1544 assert(stream->ctf_trace);
1545 assert(stream->ctf_trace->metadata_sent <=
1546 stream->ctf_trace->metadata_received);
1547
1548 len = stream->ctf_trace->metadata_received -
1549 stream->ctf_trace->metadata_sent;
1550 if (len == 0) {
1551 reply.status = htobe32(VIEWER_NO_NEW_METADATA);
1552 goto send_reply;
1553 }
1554
1555 /* first time, we open the metadata file */
1556 if (stream->read_fd < 0) {
1557 char fullpath[PATH_MAX];
1558
1559 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1560 stream->channel_name);
1561 if (ret < 0) {
1562 goto error;
1563 }
1564 ret = open(fullpath, O_RDONLY);
1565 if (ret < 0) {
1566 PERROR("Relay opening metadata file");
1567 goto error;
1568 }
1569 stream->read_fd = ret;
1570 }
1571
1572 reply.len = htobe64(len);
1573 data = zmalloc(len);
1574 if (!data) {
1575 PERROR("viewer metadata zmalloc");
1576 goto error;
1577 }
1578
1579 read_len = lttng_read(stream->read_fd, data, len);
1580 if (read_len < len) {
1581 PERROR("Relay reading metadata file");
1582 goto error;
1583 }
1584 stream->ctf_trace->metadata_sent += read_len;
1585 reply.status = htobe32(VIEWER_METADATA_OK);
1586 goto send_reply;
1587
1588error:
1589 reply.status = htobe32(VIEWER_METADATA_ERR);
1590
1591send_reply:
1592 health_code_update();
1593 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1594 if (ret < 0) {
1595 ERR("Relay data header to viewer");
1596 goto end_unlock;
1597 }
1598 health_code_update();
1599
1600 if (len > 0) {
1601 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1602 if (ret < 0) {
1603 ERR("Relay send data to viewer");
1604 goto end_unlock;
1605 }
1606 }
1607
1608 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
1609 be64toh(request.stream_id));
1610
1611 DBG("Metadata sent");
1612
1613end_unlock:
1614 free(data);
1615 rcu_read_unlock();
1616end:
1617 return ret;
1618}
1619
1620/*
1621 * live_relay_unknown_command: send -1 if received unknown command
1622 */
1623static
1624void live_relay_unknown_command(struct relay_command *cmd)
1625{
1626 struct lttcomm_relayd_generic_reply reply;
1627 int ret;
1628
1629 reply.ret_code = htobe32(LTTNG_ERR_UNK);
1630 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
1631 sizeof(struct lttcomm_relayd_generic_reply), 0);
1632 if (ret < 0) {
1633 ERR("Relay sending unknown command");
1634 }
1635}
1636
1637/*
1638 * Process the commands received on the control socket
1639 */
1640static
1641int process_control(struct lttng_viewer_cmd *recv_hdr,
1642 struct relay_command *cmd, struct lttng_ht *sessions_ht)
1643{
1644 int ret = 0;
1645
1646 switch (be32toh(recv_hdr->cmd)) {
1647 case VIEWER_CONNECT:
1648 ret = viewer_connect(cmd);
1649 break;
1650 case VIEWER_LIST_SESSIONS:
1651 ret = viewer_list_sessions(cmd, sessions_ht);
1652 break;
1653 case VIEWER_ATTACH_SESSION:
1654 ret = viewer_attach_session(cmd, sessions_ht);
1655 break;
1656 case VIEWER_GET_NEXT_INDEX:
1657 ret = viewer_get_next_index(cmd, sessions_ht);
1658 break;
1659 case VIEWER_GET_PACKET:
1660 ret = viewer_get_packet(cmd);
1661 break;
1662 case VIEWER_GET_METADATA:
1663 ret = viewer_get_metadata(cmd);
1664 break;
1665 default:
1666 ERR("Received unknown viewer command (%u)", be32toh(recv_hdr->cmd));
1667 live_relay_unknown_command(cmd);
1668 ret = -1;
1669 goto end;
1670 }
1671
1672end:
1673 return ret;
1674}
1675
1676static
1677void cleanup_poll_connection(struct lttng_poll_event *events, int pollfd)
1678{
1679 int ret;
1680
1681 assert(events);
1682
1683 lttng_poll_del(events, pollfd);
1684
1685 ret = close(pollfd);
1686 if (ret < 0) {
1687 ERR("Closing pollfd %d", pollfd);
1688 }
1689}
1690
1691/*
1692 * Create and add connection to the given hash table.
1693 *
1694 * Return poll add value or else -1 on error.
1695 */
1696static
1697int add_connection(int fd, struct lttng_poll_event *events,
1698 struct lttng_ht *relay_connections_ht)
1699{
1700 int ret;
1701 struct relay_command *relay_connection;
1702
1703 assert(events);
1704 assert(relay_connections_ht);
1705
1706 relay_connection = zmalloc(sizeof(struct relay_command));
1707 if (relay_connection == NULL) {
1708 PERROR("Relay command zmalloc");
1709 goto error;
1710 }
1711
1712 ret = lttng_read(fd, relay_connection, sizeof(*relay_connection));
1713 if (ret < sizeof(*relay_connection)) {
1714 PERROR("read relay cmd pipe");
1715 goto error_read;
1716 }
1717
1718 lttng_ht_node_init_ulong(&relay_connection->sock_n,
1719 (unsigned long) relay_connection->sock->fd);
1720 rcu_read_lock();
1721 lttng_ht_add_unique_ulong(relay_connections_ht,
1722 &relay_connection->sock_n);
1723 rcu_read_unlock();
1724
1725 return lttng_poll_add(events, relay_connection->sock->fd,
1726 LPOLLIN | LPOLLRDHUP);
1727
1728error_read:
1729 free(relay_connection);
1730error:
1731 return -1;
1732}
1733
1734static
1735void deferred_free_connection(struct rcu_head *head)
1736{
1737 struct relay_command *relay_connection =
1738 caa_container_of(head, struct relay_command, rcu_node);
1739
1740 if (relay_connection->session &&
1741 relay_connection->session->viewer_attached > 0) {
1742 relay_connection->session->viewer_attached--;
1743 }
1744 lttcomm_destroy_sock(relay_connection->sock);
1745 free(relay_connection);
1746}
1747
1748/*
1749 * Delete all streams for a specific session ID.
1750 */
1751static
1752void viewer_del_streams(uint64_t session_id)
1753{
1754 struct relay_viewer_stream *stream;
1755 struct lttng_ht_iter iter;
1756
1757 rcu_read_lock();
1758 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, stream,
1759 stream_n.node) {
1760 health_code_update();
1761
1762 if (stream->session_id != session_id) {
1763 continue;
1764 }
1765
1766 delete_viewer_stream(stream);
1767 assert(stream->ctf_trace);
1768
1769 if (stream->metadata_flag) {
1770 /*
1771 * The metadata viewer stream is destroyed once the refcount on the
1772 * ctf trace goes to 0 in the destroy stream function thus there is
1773 * no explicit call to that function here.
1774 */
1775 stream->ctf_trace->metadata_sent = 0;
1776 stream->ctf_trace->viewer_metadata_stream = NULL;
1777 } else {
1778 destroy_viewer_stream(stream);
1779 }
1780 }
1781 rcu_read_unlock();
1782}
1783
1784/*
1785 * Delete and free a connection.
1786 *
1787 * RCU read side lock MUST be acquired.
1788 */
1789static
1790void del_connection(struct lttng_ht *relay_connections_ht,
1791 struct lttng_ht_iter *iter, struct relay_command *relay_connection)
1792{
1793 int ret;
1794
1795 assert(relay_connections_ht);
1796 assert(iter);
1797 assert(relay_connection);
1798
1799 DBG("Cleaning connection of session ID %" PRIu64,
1800 relay_connection->session_id);
1801
1802 ret = lttng_ht_del(relay_connections_ht, iter);
1803 assert(!ret);
1804
1805 viewer_del_streams(relay_connection->session_id);
1806
1807 call_rcu(&relay_connection->rcu_node, deferred_free_connection);
1808}
1809
1810/*
1811 * This thread does the actual work
1812 */
1813static
1814void *thread_worker(void *data)
1815{
1816 int ret, err = -1;
1817 uint32_t nb_fd;
1818 struct relay_command *relay_connection;
1819 struct lttng_poll_event events;
1820 struct lttng_ht *relay_connections_ht;
1821 struct lttng_ht_node_ulong *node;
1822 struct lttng_ht_iter iter;
1823 struct lttng_viewer_cmd recv_hdr;
1824 struct relay_local_data *relay_ctx = (struct relay_local_data *) data;
1825 struct lttng_ht *sessions_ht = relay_ctx->sessions_ht;
1826
1827 DBG("[thread] Live viewer relay worker started");
1828
1829 rcu_register_thread();
1830
1831 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
1832
1833 /* table of connections indexed on socket */
1834 relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1835 if (!relay_connections_ht) {
1836 goto relay_connections_ht_error;
1837 }
1838
1839 ret = create_thread_poll_set(&events, 2);
1840 if (ret < 0) {
1841 goto error_poll_create;
1842 }
1843
1844 ret = lttng_poll_add(&events, live_relay_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1845 if (ret < 0) {
1846 goto error;
1847 }
1848
1849restart:
1850 while (1) {
1851 int i;
1852
1853 health_code_update();
1854
1855 /* Infinite blocking call, waiting for transmission */
1856 DBG3("Relayd live viewer worker thread polling...");
1857 health_poll_entry();
1858 ret = lttng_poll_wait(&events, -1);
1859 health_poll_exit();
1860 if (ret < 0) {
1861 /*
1862 * Restart interrupted system call.
1863 */
1864 if (errno == EINTR) {
1865 goto restart;
1866 }
1867 goto error;
1868 }
1869
1870 nb_fd = ret;
1871
1872 /*
1873 * Process control. The control connection is prioritised so we don't
1874 * starve it with high throughput tracing data on the data
1875 * connection.
1876 */
1877 for (i = 0; i < nb_fd; i++) {
1878 /* Fetch once the poll data */
1879 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1880 int pollfd = LTTNG_POLL_GETFD(&events, i);
1881
1882 health_code_update();
1883
1884 /* Thread quit pipe has been closed. Killing thread. */
1885 ret = check_thread_quit_pipe(pollfd, revents);
1886 if (ret) {
1887 err = 0;
1888 goto exit;
1889 }
1890
1891 /* Inspect the relay cmd pipe for new connection */
1892 if (pollfd == live_relay_cmd_pipe[0]) {
1893 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1894 ERR("Relay live pipe error");
1895 goto error;
1896 } else if (revents & LPOLLIN) {
1897 DBG("Relay live viewer command received");
1898 ret = add_connection(live_relay_cmd_pipe[0],
1899 &events, relay_connections_ht);
1900 if (ret < 0) {
1901 goto error;
1902 }
1903 }
1904 } else if (revents) {
1905 rcu_read_lock();
1906 lttng_ht_lookup(relay_connections_ht,
1907 (void *)((unsigned long) pollfd), &iter);
1908 node = lttng_ht_iter_get_node_ulong(&iter);
1909 if (node == NULL) {
1910 DBG2("Relay viewer sock %d not found", pollfd);
1911 rcu_read_unlock();
1912 goto error;
1913 }
1914 relay_connection = caa_container_of(node, struct relay_command,
1915 sock_n);
1916
1917 if (revents & (LPOLLERR)) {
1918 cleanup_poll_connection(&events, pollfd);
1919 del_connection(relay_connections_ht, &iter,
1920 relay_connection);
1921 } else if (revents & (LPOLLHUP | LPOLLRDHUP)) {
1922 DBG("Viewer socket %d hung up", pollfd);
1923 cleanup_poll_connection(&events, pollfd);
1924 del_connection(relay_connections_ht, &iter,
1925 relay_connection);
1926 } else if (revents & LPOLLIN) {
1927 ret = relay_connection->sock->ops->recvmsg(
1928 relay_connection->sock, &recv_hdr,
1929 sizeof(struct lttng_viewer_cmd),
1930 0);
1931 /* connection closed */
1932 if (ret <= 0) {
1933 cleanup_poll_connection(&events, pollfd);
1934 del_connection(relay_connections_ht, &iter,
1935 relay_connection);
1936 DBG("Viewer control connection closed with %d",
1937 pollfd);
1938 } else {
1939 if (relay_connection->session) {
1940 DBG2("Relay viewer worker receiving data for "
1941 "session: %" PRIu64,
1942 relay_connection->session->id);
1943 }
1944 ret = process_control(&recv_hdr, relay_connection,
1945 sessions_ht);
1946 if (ret < 0) {
1947 /* Clear the session on error. */
1948 cleanup_poll_connection(&events, pollfd);
1949 del_connection(relay_connections_ht, &iter,
1950 relay_connection);
1951 DBG("Viewer connection closed with %d", pollfd);
1952 }
1953 }
1954 }
1955 rcu_read_unlock();
1956 }
1957 }
1958 }
1959
1960exit:
1961error:
1962 lttng_poll_clean(&events);
1963
1964 /* empty the hash table and free the memory */
1965 rcu_read_lock();
1966 cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) {
1967 health_code_update();
1968
1969 node = lttng_ht_iter_get_node_ulong(&iter);
1970 if (!node) {
1971 continue;
1972 }
1973
1974 relay_connection = caa_container_of(node, struct relay_command,
1975 sock_n);
1976 del_connection(relay_connections_ht, &iter, relay_connection);
1977 }
1978 rcu_read_unlock();
1979error_poll_create:
1980 lttng_ht_destroy(relay_connections_ht);
1981relay_connections_ht_error:
1982 /* Close relay cmd pipes */
1983 utils_close_pipe(live_relay_cmd_pipe);
1984 if (err) {
1985 DBG("Viewer worker thread exited with error");
1986 }
1987 DBG("Viewer worker thread cleanup complete");
1988 if (err) {
1989 health_error();
1990 ERR("Health error occurred in %s", __func__);
1991 }
1992 health_unregister(health_relayd);
1993 stop_threads();
1994 rcu_unregister_thread();
1995 return NULL;
1996}
1997
1998/*
1999 * Create the relay command pipe to wake thread_manage_apps.
2000 * Closed in cleanup().
2001 */
2002static int create_relay_cmd_pipe(void)
2003{
2004 int ret;
2005
2006 ret = utils_create_pipe_cloexec(live_relay_cmd_pipe);
2007
2008 return ret;
2009}
2010
2011void live_stop_threads(void)
2012{
2013 int ret;
2014 void *status;
2015
2016 stop_threads();
2017
2018 ret = pthread_join(live_listener_thread, &status);
2019 if (ret != 0) {
2020 PERROR("pthread_join live listener");
2021 goto error; /* join error, exit without cleanup */
2022 }
2023
2024 ret = pthread_join(live_worker_thread, &status);
2025 if (ret != 0) {
2026 PERROR("pthread_join live worker");
2027 goto error; /* join error, exit without cleanup */
2028 }
2029
2030 ret = pthread_join(live_dispatcher_thread, &status);
2031 if (ret != 0) {
2032 PERROR("pthread_join live dispatcher");
2033 goto error; /* join error, exit without cleanup */
2034 }
2035
2036 cleanup();
2037
2038error:
2039 return;
2040}
2041
2042/*
2043 * main
2044 */
2045int live_start_threads(struct lttng_uri *uri,
2046 struct relay_local_data *relay_ctx, int quit_pipe[2])
2047{
2048 int ret = 0;
2049 void *status;
2050 int is_root;
2051
2052 assert(uri);
2053 live_uri = uri;
2054
2055 live_thread_quit_pipe[0] = quit_pipe[0];
2056 live_thread_quit_pipe[1] = quit_pipe[1];
2057
2058 /* Check if daemon is UID = 0 */
2059 is_root = !getuid();
2060
2061 if (!is_root) {
2062 if (live_uri->port < 1024) {
2063 ERR("Need to be root to use ports < 1024");
2064 ret = -1;
2065 goto exit;
2066 }
2067 }
2068
2069 /* Setup the thread apps communication pipe. */
2070 if ((ret = create_relay_cmd_pipe()) < 0) {
2071 goto exit;
2072 }
2073
2074 /* Init relay command queue. */
2075 cds_wfq_init(&viewer_cmd_queue.queue);
2076
2077 /* Set up max poll set size */
2078 lttng_poll_set_max_size();
2079
2080 /* Setup the dispatcher thread */
2081 ret = pthread_create(&live_dispatcher_thread, NULL,
2082 thread_dispatcher, (void *) NULL);
2083 if (ret != 0) {
2084 PERROR("pthread_create viewer dispatcher");
2085 goto exit_dispatcher;
2086 }
2087
2088 /* Setup the worker thread */
2089 ret = pthread_create(&live_worker_thread, NULL,
2090 thread_worker, relay_ctx);
2091 if (ret != 0) {
2092 PERROR("pthread_create viewer worker");
2093 goto exit_worker;
2094 }
2095
2096 /* Setup the listener thread */
2097 ret = pthread_create(&live_listener_thread, NULL,
2098 thread_listener, (void *) NULL);
2099 if (ret != 0) {
2100 PERROR("pthread_create viewer listener");
2101 goto exit_listener;
2102 }
2103
2104 ret = 0;
2105 goto end;
2106
2107exit_listener:
2108 ret = pthread_join(live_listener_thread, &status);
2109 if (ret != 0) {
2110 PERROR("pthread_join live listener");
2111 goto error; /* join error, exit without cleanup */
2112 }
2113
2114exit_worker:
2115 ret = pthread_join(live_worker_thread, &status);
2116 if (ret != 0) {
2117 PERROR("pthread_join live worker");
2118 goto error; /* join error, exit without cleanup */
2119 }
2120
2121exit_dispatcher:
2122 ret = pthread_join(live_dispatcher_thread, &status);
2123 if (ret != 0) {
2124 PERROR("pthread_join live dispatcher");
2125 goto error; /* join error, exit without cleanup */
2126 }
2127
2128exit:
2129 cleanup();
2130
2131end:
2132error:
2133 return ret;
2134}
This page took 0.03092 seconds and 4 git commands to generate.