1a279517a3cb8945767d312895068e43301003d6
[lttng-ust.git] / liblttng-ust / lttng-ust-comm.c
1 /*
2 * lttng-ust-comm.c
3 *
4 * Copyright (C) 2011 David Goulet <david.goulet@polymtl.ca>
5 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; only
10 * version 2.1 of the License.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #define _LGPL_SOURCE
23 #define _GNU_SOURCE
24 #include <sys/types.h>
25 #include <sys/socket.h>
26 #include <sys/mman.h>
27 #include <sys/stat.h>
28 #include <sys/types.h>
29 #include <sys/wait.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 #include <errno.h>
33 #include <pthread.h>
34 #include <semaphore.h>
35 #include <time.h>
36 #include <assert.h>
37 #include <signal.h>
38 #include <urcu/uatomic.h>
39 #include <urcu/futex.h>
40 #include <urcu/compiler.h>
41
42 #include <lttng/ust-events.h>
43 #include <lttng/ust-abi.h>
44 #include <lttng/ust.h>
45 #include <lttng/ust-error.h>
46 #include <lttng/ust-ctl.h>
47 #include <urcu/tls-compat.h>
48 #include <ust-comm.h>
49 #include <ust-fd.h>
50 #include <usterr-signal-safe.h>
51 #include <helper.h>
52 #include "tracepoint-internal.h"
53 #include "lttng-tracer-core.h"
54 #include "compat.h"
55 #include "../libringbuffer/tlsfixup.h"
56 #include "lttng-ust-statedump.h"
57 #include "clock.h"
58 #include "../libringbuffer/getcpu.h"
59 #include "getenv.h"
60
61 /*
62 * Has lttng ust comm constructor been called ?
63 */
64 static int initialized;
65
66 /*
67 * The ust_lock/ust_unlock lock is used as a communication thread mutex.
68 * Held when handling a command, also held by fork() to deal with
69 * removal of threads, and by exit path.
70 *
71 * The UST lock is the centralized mutex across UST tracing control and
72 * probe registration.
73 *
74 * ust_exit_mutex must never nest in ust_mutex.
75 *
76 * ust_fork_mutex must never nest in ust_mutex.
77 *
78 * ust_mutex_nest is a per-thread nesting counter, allowing the perf
79 * counter lazy initialization called by events within the statedump,
80 * which traces while the ust_mutex is held.
81 *
82 * ust_lock nests within the dynamic loader lock (within glibc) because
83 * it is taken within the library constructor.
84 *
85 * The ust fd tracker lock nests within the ust_mutex.
86 */
87 static pthread_mutex_t ust_mutex = PTHREAD_MUTEX_INITIALIZER;
88
89 /* Allow nesting the ust_mutex within the same thread. */
90 static DEFINE_URCU_TLS(int, ust_mutex_nest);
91
92 /*
93 * ust_exit_mutex protects thread_active variable wrt thread exit. It
94 * cannot be done by ust_mutex because pthread_cancel(), which takes an
95 * internal libc lock, cannot nest within ust_mutex.
96 *
97 * It never nests within a ust_mutex.
98 */
99 static pthread_mutex_t ust_exit_mutex = PTHREAD_MUTEX_INITIALIZER;
100
101 /*
102 * ust_fork_mutex protects base address statedump tracing against forks. It
103 * prevents the dynamic loader lock to be taken (by base address statedump
104 * tracing) while a fork is happening, thus preventing deadlock issues with
105 * the dynamic loader lock.
106 */
107 static pthread_mutex_t ust_fork_mutex = PTHREAD_MUTEX_INITIALIZER;
108
109 /* Should the ust comm thread quit ? */
110 static int lttng_ust_comm_should_quit;
111
112 /*
113 * This variable can be tested by applications to check whether
114 * lttng-ust is loaded. They simply have to define their own
115 * "lttng_ust_loaded" weak symbol, and test it. It is set to 1 by the
116 * library constructor.
117 */
118 int lttng_ust_loaded __attribute__((weak));
119
120 /*
121 * Return 0 on success, -1 if should quit.
122 * The lock is taken in both cases.
123 * Signal-safe.
124 */
125 int ust_lock(void)
126 {
127 sigset_t sig_all_blocked, orig_mask;
128 int ret, oldstate;
129
130 ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate);
131 if (ret) {
132 ERR("pthread_setcancelstate: %s", strerror(ret));
133 }
134 if (oldstate != PTHREAD_CANCEL_ENABLE) {
135 ERR("pthread_setcancelstate: unexpected oldstate");
136 }
137 sigfillset(&sig_all_blocked);
138 ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
139 if (ret) {
140 ERR("pthread_sigmask: %s", strerror(ret));
141 }
142 if (!URCU_TLS(ust_mutex_nest)++)
143 pthread_mutex_lock(&ust_mutex);
144 ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
145 if (ret) {
146 ERR("pthread_sigmask: %s", strerror(ret));
147 }
148 if (lttng_ust_comm_should_quit) {
149 return -1;
150 } else {
151 return 0;
152 }
153 }
154
155 /*
156 * ust_lock_nocheck() can be used in constructors/destructors, because
157 * they are already nested within the dynamic loader lock, and therefore
158 * have exclusive access against execution of liblttng-ust destructor.
159 * Signal-safe.
160 */
161 void ust_lock_nocheck(void)
162 {
163 sigset_t sig_all_blocked, orig_mask;
164 int ret, oldstate;
165
166 ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate);
167 if (ret) {
168 ERR("pthread_setcancelstate: %s", strerror(ret));
169 }
170 if (oldstate != PTHREAD_CANCEL_ENABLE) {
171 ERR("pthread_setcancelstate: unexpected oldstate");
172 }
173 sigfillset(&sig_all_blocked);
174 ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
175 if (ret) {
176 ERR("pthread_sigmask: %s", strerror(ret));
177 }
178 if (!URCU_TLS(ust_mutex_nest)++)
179 pthread_mutex_lock(&ust_mutex);
180 ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
181 if (ret) {
182 ERR("pthread_sigmask: %s", strerror(ret));
183 }
184 }
185
186 /*
187 * Signal-safe.
188 */
189 void ust_unlock(void)
190 {
191 sigset_t sig_all_blocked, orig_mask;
192 int ret, oldstate;
193
194 sigfillset(&sig_all_blocked);
195 ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
196 if (ret) {
197 ERR("pthread_sigmask: %s", strerror(ret));
198 }
199 if (!--URCU_TLS(ust_mutex_nest))
200 pthread_mutex_unlock(&ust_mutex);
201 ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
202 if (ret) {
203 ERR("pthread_sigmask: %s", strerror(ret));
204 }
205 ret = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
206 if (ret) {
207 ERR("pthread_setcancelstate: %s", strerror(ret));
208 }
209 if (oldstate != PTHREAD_CANCEL_DISABLE) {
210 ERR("pthread_setcancelstate: unexpected oldstate");
211 }
212 }
213
214 /*
215 * Wait for either of these before continuing to the main
216 * program:
217 * - the register_done message from sessiond daemon
218 * (will let the sessiond daemon enable sessions before main
219 * starts.)
220 * - sessiond daemon is not reachable.
221 * - timeout (ensuring applications are resilient to session
222 * daemon problems).
223 */
224 static sem_t constructor_wait;
225 /*
226 * Doing this for both the global and local sessiond.
227 */
228 enum {
229 sem_count_initial_value = 4,
230 };
231
232 static int sem_count = sem_count_initial_value;
233
234 /*
235 * Counting nesting within lttng-ust. Used to ensure that calling fork()
236 * from liblttng-ust does not execute the pre/post fork handlers.
237 */
238 static DEFINE_URCU_TLS(int, lttng_ust_nest_count);
239
240 /*
241 * Info about socket and associated listener thread.
242 */
243 struct sock_info {
244 const char *name;
245 pthread_t ust_listener; /* listener thread */
246 int root_handle;
247 int registration_done;
248 int allowed;
249 int global;
250 int thread_active;
251
252 char sock_path[PATH_MAX];
253 int socket;
254 int notify_socket;
255
256 char wait_shm_path[PATH_MAX];
257 char *wait_shm_mmap;
258 /* Keep track of lazy state dump not performed yet. */
259 int statedump_pending;
260 int initial_statedump_done;
261 };
262
263 /* Socket from app (connect) to session daemon (listen) for communication */
264 struct sock_info global_apps = {
265 .name = "global",
266 .global = 1,
267
268 .root_handle = -1,
269 .registration_done = 0,
270 .allowed = 0,
271 .thread_active = 0,
272
273 .sock_path = LTTNG_DEFAULT_RUNDIR "/" LTTNG_UST_SOCK_FILENAME,
274 .socket = -1,
275 .notify_socket = -1,
276
277 .wait_shm_path = "/" LTTNG_UST_WAIT_FILENAME,
278
279 .statedump_pending = 0,
280 .initial_statedump_done = 0,
281 };
282
283 /* TODO: allow global_apps_sock_path override */
284
285 struct sock_info local_apps = {
286 .name = "local",
287 .global = 0,
288 .root_handle = -1,
289 .registration_done = 0,
290 .allowed = 0, /* Check setuid bit first */
291 .thread_active = 0,
292
293 .socket = -1,
294 .notify_socket = -1,
295
296 .statedump_pending = 0,
297 .initial_statedump_done = 0,
298 };
299
300 static int wait_poll_fallback;
301
302 static const char *cmd_name_mapping[] = {
303 [ LTTNG_UST_RELEASE ] = "Release",
304 [ LTTNG_UST_SESSION ] = "Create Session",
305 [ LTTNG_UST_TRACER_VERSION ] = "Get Tracer Version",
306
307 [ LTTNG_UST_TRACEPOINT_LIST ] = "Create Tracepoint List",
308 [ LTTNG_UST_WAIT_QUIESCENT ] = "Wait for Quiescent State",
309 [ LTTNG_UST_REGISTER_DONE ] = "Registration Done",
310 [ LTTNG_UST_TRACEPOINT_FIELD_LIST ] = "Create Tracepoint Field List",
311
312 /* Session FD commands */
313 [ LTTNG_UST_CHANNEL ] = "Create Channel",
314 [ LTTNG_UST_SESSION_START ] = "Start Session",
315 [ LTTNG_UST_SESSION_STOP ] = "Stop Session",
316
317 /* Channel FD commands */
318 [ LTTNG_UST_STREAM ] = "Create Stream",
319 [ LTTNG_UST_EVENT ] = "Create Event",
320
321 /* Event and Channel FD commands */
322 [ LTTNG_UST_CONTEXT ] = "Create Context",
323 [ LTTNG_UST_FLUSH_BUFFER ] = "Flush Buffer",
324
325 /* Event, Channel and Session commands */
326 [ LTTNG_UST_ENABLE ] = "Enable",
327 [ LTTNG_UST_DISABLE ] = "Disable",
328
329 /* Tracepoint list commands */
330 [ LTTNG_UST_TRACEPOINT_LIST_GET ] = "List Next Tracepoint",
331 [ LTTNG_UST_TRACEPOINT_FIELD_LIST_GET ] = "List Next Tracepoint Field",
332
333 /* Event FD commands */
334 [ LTTNG_UST_FILTER ] = "Create Filter",
335 [ LTTNG_UST_EXCLUSION ] = "Add exclusions to event",
336 };
337
338 static const char *str_timeout;
339 static int got_timeout_env;
340
341 extern void lttng_ring_buffer_client_overwrite_init(void);
342 extern void lttng_ring_buffer_client_overwrite_rt_init(void);
343 extern void lttng_ring_buffer_client_discard_init(void);
344 extern void lttng_ring_buffer_client_discard_rt_init(void);
345 extern void lttng_ring_buffer_metadata_client_init(void);
346 extern void lttng_ring_buffer_client_overwrite_exit(void);
347 extern void lttng_ring_buffer_client_overwrite_rt_exit(void);
348 extern void lttng_ring_buffer_client_discard_exit(void);
349 extern void lttng_ring_buffer_client_discard_rt_exit(void);
350 extern void lttng_ring_buffer_metadata_client_exit(void);
351
352 static char *get_map_shm(struct sock_info *sock_info);
353
354 ssize_t lttng_ust_read(int fd, void *buf, size_t len)
355 {
356 ssize_t ret;
357 size_t copied = 0, to_copy = len;
358
359 do {
360 ret = read(fd, buf + copied, to_copy);
361 if (ret > 0) {
362 copied += ret;
363 to_copy -= ret;
364 }
365 } while ((ret > 0 && to_copy > 0)
366 || (ret < 0 && errno == EINTR));
367 if (ret > 0) {
368 ret = copied;
369 }
370 return ret;
371 }
372 /*
373 * Returns the HOME directory path. Caller MUST NOT free(3) the returned
374 * pointer.
375 */
376 static
377 const char *get_lttng_home_dir(void)
378 {
379 const char *val;
380
381 val = (const char *) lttng_getenv("LTTNG_HOME");
382 if (val != NULL) {
383 return val;
384 }
385 return (const char *) lttng_getenv("HOME");
386 }
387
388 /*
389 * Force a read (imply TLS fixup for dlopen) of TLS variables.
390 */
391 static
392 void lttng_fixup_nest_count_tls(void)
393 {
394 asm volatile ("" : : "m" (URCU_TLS(lttng_ust_nest_count)));
395 }
396
397 static
398 void lttng_fixup_ust_mutex_nest_tls(void)
399 {
400 asm volatile ("" : : "m" (URCU_TLS(ust_mutex_nest)));
401 }
402
403 /*
404 * Fixup urcu bp TLS.
405 */
406 static
407 void lttng_fixup_urcu_bp_tls(void)
408 {
409 rcu_read_lock();
410 rcu_read_unlock();
411 }
412
413 void lttng_ust_fixup_tls(void)
414 {
415 lttng_fixup_urcu_bp_tls();
416 lttng_fixup_ringbuffer_tls();
417 lttng_fixup_vtid_tls();
418 lttng_fixup_nest_count_tls();
419 lttng_fixup_procname_tls();
420 lttng_fixup_ust_mutex_nest_tls();
421 lttng_ust_fixup_fd_tracker_tls();
422 }
423
424 int lttng_get_notify_socket(void *owner)
425 {
426 struct sock_info *info = owner;
427
428 return info->notify_socket;
429 }
430
431 static
432 void print_cmd(int cmd, int handle)
433 {
434 const char *cmd_name = "Unknown";
435
436 if (cmd >= 0 && cmd < LTTNG_ARRAY_SIZE(cmd_name_mapping)
437 && cmd_name_mapping[cmd]) {
438 cmd_name = cmd_name_mapping[cmd];
439 }
440 DBG("Message Received \"%s\" (%d), Handle \"%s\" (%d)",
441 cmd_name, cmd,
442 lttng_ust_obj_get_name(handle), handle);
443 }
444
445 static
446 int setup_global_apps(void)
447 {
448 int ret = 0;
449 assert(!global_apps.wait_shm_mmap);
450
451 global_apps.wait_shm_mmap = get_map_shm(&global_apps);
452 if (!global_apps.wait_shm_mmap) {
453 WARN("Unable to get map shm for global apps. Disabling LTTng-UST global tracing.");
454 global_apps.allowed = 0;
455 ret = -EIO;
456 goto error;
457 }
458
459 global_apps.allowed = 1;
460 error:
461 return ret;
462 }
463 static
464 int setup_local_apps(void)
465 {
466 int ret = 0;
467 const char *home_dir;
468 uid_t uid;
469
470 assert(!local_apps.wait_shm_mmap);
471
472 uid = getuid();
473 /*
474 * Disallow per-user tracing for setuid binaries.
475 */
476 if (uid != geteuid()) {
477 assert(local_apps.allowed == 0);
478 ret = 0;
479 goto end;
480 }
481 home_dir = get_lttng_home_dir();
482 if (!home_dir) {
483 WARN("HOME environment variable not set. Disabling LTTng-UST per-user tracing.");
484 assert(local_apps.allowed == 0);
485 ret = -ENOENT;
486 goto end;
487 }
488 local_apps.allowed = 1;
489 snprintf(local_apps.sock_path, PATH_MAX, "%s/%s/%s",
490 home_dir,
491 LTTNG_DEFAULT_HOME_RUNDIR,
492 LTTNG_UST_SOCK_FILENAME);
493 snprintf(local_apps.wait_shm_path, PATH_MAX, "/%s-%u",
494 LTTNG_UST_WAIT_FILENAME,
495 uid);
496
497 local_apps.wait_shm_mmap = get_map_shm(&local_apps);
498 if (!local_apps.wait_shm_mmap) {
499 WARN("Unable to get map shm for local apps. Disabling LTTng-UST per-user tracing.");
500 local_apps.allowed = 0;
501 ret = -EIO;
502 goto end;
503 }
504 end:
505 return ret;
506 }
507
508 /*
509 * Get socket timeout, in ms.
510 * -1: wait forever. 0: don't wait. >0: timeout, in ms.
511 */
512 static
513 long get_timeout(void)
514 {
515 long constructor_delay_ms = LTTNG_UST_DEFAULT_CONSTRUCTOR_TIMEOUT_MS;
516
517 if (!got_timeout_env) {
518 str_timeout = lttng_getenv("LTTNG_UST_REGISTER_TIMEOUT");
519 got_timeout_env = 1;
520 }
521 if (str_timeout)
522 constructor_delay_ms = strtol(str_timeout, NULL, 10);
523 /* All negative values are considered as "-1". */
524 if (constructor_delay_ms < -1)
525 constructor_delay_ms = -1;
526 return constructor_delay_ms;
527 }
528
529 /* Timeout for notify socket send and recv. */
530 static
531 long get_notify_sock_timeout(void)
532 {
533 return get_timeout();
534 }
535
536 /* Timeout for connecting to cmd and notify sockets. */
537 static
538 long get_connect_sock_timeout(void)
539 {
540 return get_timeout();
541 }
542
543 /*
544 * Return values: -1: wait forever. 0: don't wait. 1: timeout wait.
545 */
546 static
547 int get_constructor_timeout(struct timespec *constructor_timeout)
548 {
549 long constructor_delay_ms;
550 int ret;
551
552 constructor_delay_ms = get_timeout();
553
554 switch (constructor_delay_ms) {
555 case -1:/* fall-through */
556 case 0:
557 return constructor_delay_ms;
558 default:
559 break;
560 }
561
562 /*
563 * If we are unable to find the current time, don't wait.
564 */
565 ret = clock_gettime(CLOCK_REALTIME, constructor_timeout);
566 if (ret) {
567 /* Don't wait. */
568 return 0;
569 }
570 constructor_timeout->tv_sec += constructor_delay_ms / 1000UL;
571 constructor_timeout->tv_nsec +=
572 (constructor_delay_ms % 1000UL) * 1000000UL;
573 if (constructor_timeout->tv_nsec >= 1000000000UL) {
574 constructor_timeout->tv_sec++;
575 constructor_timeout->tv_nsec -= 1000000000UL;
576 }
577 /* Timeout wait (constructor_delay_ms). */
578 return 1;
579 }
580
581 static
582 int register_to_sessiond(int socket, enum ustctl_socket_type type)
583 {
584 return ustcomm_send_reg_msg(socket,
585 type,
586 CAA_BITS_PER_LONG,
587 lttng_alignof(uint8_t) * CHAR_BIT,
588 lttng_alignof(uint16_t) * CHAR_BIT,
589 lttng_alignof(uint32_t) * CHAR_BIT,
590 lttng_alignof(uint64_t) * CHAR_BIT,
591 lttng_alignof(unsigned long) * CHAR_BIT);
592 }
593
594 static
595 int send_reply(int sock, struct ustcomm_ust_reply *lur)
596 {
597 ssize_t len;
598
599 len = ustcomm_send_unix_sock(sock, lur, sizeof(*lur));
600 switch (len) {
601 case sizeof(*lur):
602 DBG("message successfully sent");
603 return 0;
604 default:
605 if (len == -ECONNRESET) {
606 DBG("remote end closed connection");
607 return 0;
608 }
609 if (len < 0)
610 return len;
611 DBG("incorrect message size: %zd", len);
612 return -EINVAL;
613 }
614 }
615
616 static
617 void decrement_sem_count(unsigned int count)
618 {
619 int ret;
620
621 assert(uatomic_read(&sem_count) >= count);
622
623 if (uatomic_read(&sem_count) <= 0) {
624 return;
625 }
626
627 ret = uatomic_add_return(&sem_count, -count);
628 if (ret == 0) {
629 ret = sem_post(&constructor_wait);
630 assert(!ret);
631 }
632 }
633
634 static
635 int handle_register_done(struct sock_info *sock_info)
636 {
637 if (sock_info->registration_done)
638 return 0;
639 sock_info->registration_done = 1;
640
641 decrement_sem_count(1);
642 if (!sock_info->statedump_pending) {
643 sock_info->initial_statedump_done = 1;
644 decrement_sem_count(1);
645 }
646
647 return 0;
648 }
649
650 static
651 int handle_register_failed(struct sock_info *sock_info)
652 {
653 if (sock_info->registration_done)
654 return 0;
655 sock_info->registration_done = 1;
656 sock_info->initial_statedump_done = 1;
657
658 decrement_sem_count(2);
659
660 return 0;
661 }
662
663 /*
664 * Only execute pending statedump after the constructor semaphore has
665 * been posted by the current listener thread. This means statedump will
666 * only be performed after the "registration done" command is received
667 * from this thread's session daemon.
668 *
669 * This ensures we don't run into deadlock issues with the dynamic
670 * loader mutex, which is held while the constructor is called and
671 * waiting on the constructor semaphore. All operations requiring this
672 * dynamic loader lock need to be postponed using this mechanism.
673 *
674 * In a scenario with two session daemons connected to the application,
675 * it is possible that the first listener thread which receives the
676 * registration done command issues its statedump while the dynamic
677 * loader lock is still held by the application constructor waiting on
678 * the semaphore. It will however be allowed to proceed when the
679 * second session daemon sends the registration done command to the
680 * second listener thread. This situation therefore does not produce
681 * a deadlock.
682 */
683 static
684 void handle_pending_statedump(struct sock_info *sock_info)
685 {
686 if (sock_info->registration_done && sock_info->statedump_pending) {
687 sock_info->statedump_pending = 0;
688 pthread_mutex_lock(&ust_fork_mutex);
689 lttng_handle_pending_statedump(sock_info);
690 pthread_mutex_unlock(&ust_fork_mutex);
691
692 if (!sock_info->initial_statedump_done) {
693 sock_info->initial_statedump_done = 1;
694 decrement_sem_count(1);
695 }
696 }
697 }
698
699 static
700 int handle_message(struct sock_info *sock_info,
701 int sock, struct ustcomm_ust_msg *lum)
702 {
703 int ret = 0;
704 const struct lttng_ust_objd_ops *ops;
705 struct ustcomm_ust_reply lur;
706 union ust_args args;
707 char ctxstr[LTTNG_UST_SYM_NAME_LEN]; /* App context string. */
708 ssize_t len;
709
710 memset(&lur, 0, sizeof(lur));
711
712 if (ust_lock()) {
713 ret = -LTTNG_UST_ERR_EXITING;
714 goto error;
715 }
716
717 ops = objd_ops(lum->handle);
718 if (!ops) {
719 ret = -ENOENT;
720 goto error;
721 }
722
723 switch (lum->cmd) {
724 case LTTNG_UST_REGISTER_DONE:
725 if (lum->handle == LTTNG_UST_ROOT_HANDLE)
726 ret = handle_register_done(sock_info);
727 else
728 ret = -EINVAL;
729 break;
730 case LTTNG_UST_RELEASE:
731 if (lum->handle == LTTNG_UST_ROOT_HANDLE)
732 ret = -EPERM;
733 else
734 ret = lttng_ust_objd_unref(lum->handle, 1);
735 break;
736 case LTTNG_UST_FILTER:
737 {
738 /* Receive filter data */
739 struct lttng_ust_filter_bytecode_node *bytecode;
740
741 if (lum->u.filter.data_size > FILTER_BYTECODE_MAX_LEN) {
742 ERR("Filter data size is too large: %u bytes",
743 lum->u.filter.data_size);
744 ret = -EINVAL;
745 goto error;
746 }
747
748 if (lum->u.filter.reloc_offset > lum->u.filter.data_size) {
749 ERR("Filter reloc offset %u is not within data",
750 lum->u.filter.reloc_offset);
751 ret = -EINVAL;
752 goto error;
753 }
754
755 bytecode = zmalloc(sizeof(*bytecode) + lum->u.filter.data_size);
756 if (!bytecode) {
757 ret = -ENOMEM;
758 goto error;
759 }
760 len = ustcomm_recv_unix_sock(sock, bytecode->bc.data,
761 lum->u.filter.data_size);
762 switch (len) {
763 case 0: /* orderly shutdown */
764 ret = 0;
765 free(bytecode);
766 goto error;
767 default:
768 if (len == lum->u.filter.data_size) {
769 DBG("filter data received");
770 break;
771 } else if (len < 0) {
772 DBG("Receive failed from lttng-sessiond with errno %d", (int) -len);
773 if (len == -ECONNRESET) {
774 ERR("%s remote end closed connection", sock_info->name);
775 ret = len;
776 free(bytecode);
777 goto error;
778 }
779 ret = len;
780 free(bytecode);
781 goto error;
782 } else {
783 DBG("incorrect filter data message size: %zd", len);
784 ret = -EINVAL;
785 free(bytecode);
786 goto error;
787 }
788 }
789 bytecode->bc.len = lum->u.filter.data_size;
790 bytecode->bc.reloc_offset = lum->u.filter.reloc_offset;
791 bytecode->bc.seqnum = lum->u.filter.seqnum;
792 if (ops->cmd) {
793 ret = ops->cmd(lum->handle, lum->cmd,
794 (unsigned long) bytecode,
795 &args, sock_info);
796 if (ret) {
797 free(bytecode);
798 }
799 /* don't free bytecode if everything went fine. */
800 } else {
801 ret = -ENOSYS;
802 free(bytecode);
803 }
804 break;
805 }
806 case LTTNG_UST_EXCLUSION:
807 {
808 /* Receive exclusion names */
809 struct lttng_ust_excluder_node *node;
810 unsigned int count;
811
812 count = lum->u.exclusion.count;
813 if (count == 0) {
814 /* There are no names to read */
815 ret = 0;
816 goto error;
817 }
818 node = zmalloc(sizeof(*node) +
819 count * LTTNG_UST_SYM_NAME_LEN);
820 if (!node) {
821 ret = -ENOMEM;
822 goto error;
823 }
824 node->excluder.count = count;
825 len = ustcomm_recv_unix_sock(sock, node->excluder.names,
826 count * LTTNG_UST_SYM_NAME_LEN);
827 switch (len) {
828 case 0: /* orderly shutdown */
829 ret = 0;
830 free(node);
831 goto error;
832 default:
833 if (len == count * LTTNG_UST_SYM_NAME_LEN) {
834 DBG("Exclusion data received");
835 break;
836 } else if (len < 0) {
837 DBG("Receive failed from lttng-sessiond with errno %d", (int) -len);
838 if (len == -ECONNRESET) {
839 ERR("%s remote end closed connection", sock_info->name);
840 ret = len;
841 free(node);
842 goto error;
843 }
844 ret = len;
845 free(node);
846 goto error;
847 } else {
848 DBG("Incorrect exclusion data message size: %zd", len);
849 ret = -EINVAL;
850 free(node);
851 goto error;
852 }
853 }
854 if (ops->cmd) {
855 ret = ops->cmd(lum->handle, lum->cmd,
856 (unsigned long) node,
857 &args, sock_info);
858 if (ret) {
859 free(node);
860 }
861 /* Don't free exclusion data if everything went fine. */
862 } else {
863 ret = -ENOSYS;
864 free(node);
865 }
866 break;
867 }
868 case LTTNG_UST_CHANNEL:
869 {
870 void *chan_data;
871 int wakeup_fd;
872
873 len = ustcomm_recv_channel_from_sessiond(sock,
874 &chan_data, lum->u.channel.len,
875 &wakeup_fd);
876 switch (len) {
877 case 0: /* orderly shutdown */
878 ret = 0;
879 goto error;
880 default:
881 if (len == lum->u.channel.len) {
882 DBG("channel data received");
883 break;
884 } else if (len < 0) {
885 DBG("Receive failed from lttng-sessiond with errno %d", (int) -len);
886 if (len == -ECONNRESET) {
887 ERR("%s remote end closed connection", sock_info->name);
888 ret = len;
889 goto error;
890 }
891 ret = len;
892 goto error;
893 } else {
894 DBG("incorrect channel data message size: %zd", len);
895 ret = -EINVAL;
896 goto error;
897 }
898 }
899 args.channel.chan_data = chan_data;
900 args.channel.wakeup_fd = wakeup_fd;
901 if (ops->cmd)
902 ret = ops->cmd(lum->handle, lum->cmd,
903 (unsigned long) &lum->u,
904 &args, sock_info);
905 else
906 ret = -ENOSYS;
907 break;
908 }
909 case LTTNG_UST_STREAM:
910 {
911 /* Receive shm_fd, wakeup_fd */
912 ret = ustcomm_recv_stream_from_sessiond(sock,
913 NULL,
914 &args.stream.shm_fd,
915 &args.stream.wakeup_fd);
916 if (ret) {
917 goto error;
918 }
919
920 if (ops->cmd)
921 ret = ops->cmd(lum->handle, lum->cmd,
922 (unsigned long) &lum->u,
923 &args, sock_info);
924 else
925 ret = -ENOSYS;
926 break;
927 }
928 case LTTNG_UST_CONTEXT:
929 switch (lum->u.context.ctx) {
930 case LTTNG_UST_CONTEXT_APP_CONTEXT:
931 {
932 char *p;
933 size_t ctxlen, recvlen;
934
935 ctxlen = strlen("$app.") + lum->u.context.u.app_ctx.provider_name_len - 1
936 + strlen(":") + lum->u.context.u.app_ctx.ctx_name_len;
937 if (ctxlen >= LTTNG_UST_SYM_NAME_LEN) {
938 ERR("Application context string length size is too large: %zu bytes",
939 ctxlen);
940 ret = -EINVAL;
941 goto error;
942 }
943 strcpy(ctxstr, "$app.");
944 p = &ctxstr[strlen("$app.")];
945 recvlen = ctxlen - strlen("$app.");
946 len = ustcomm_recv_unix_sock(sock, p, recvlen);
947 switch (len) {
948 case 0: /* orderly shutdown */
949 ret = 0;
950 goto error;
951 default:
952 if (len == recvlen) {
953 DBG("app context data received");
954 break;
955 } else if (len < 0) {
956 DBG("Receive failed from lttng-sessiond with errno %d", (int) -len);
957 if (len == -ECONNRESET) {
958 ERR("%s remote end closed connection", sock_info->name);
959 ret = len;
960 goto error;
961 }
962 ret = len;
963 goto error;
964 } else {
965 DBG("incorrect app context data message size: %zd", len);
966 ret = -EINVAL;
967 goto error;
968 }
969 }
970 /* Put : between provider and ctxname. */
971 p[lum->u.context.u.app_ctx.provider_name_len - 1] = ':';
972 args.app_context.ctxname = ctxstr;
973 break;
974 }
975 default:
976 break;
977 }
978 if (ops->cmd) {
979 ret = ops->cmd(lum->handle, lum->cmd,
980 (unsigned long) &lum->u,
981 &args, sock_info);
982 } else {
983 ret = -ENOSYS;
984 }
985 break;
986 default:
987 if (ops->cmd)
988 ret = ops->cmd(lum->handle, lum->cmd,
989 (unsigned long) &lum->u,
990 &args, sock_info);
991 else
992 ret = -ENOSYS;
993 break;
994 }
995
996 lur.handle = lum->handle;
997 lur.cmd = lum->cmd;
998 lur.ret_val = ret;
999 if (ret >= 0) {
1000 lur.ret_code = LTTNG_UST_OK;
1001 } else {
1002 /*
1003 * Use -LTTNG_UST_ERR as wildcard for UST internal
1004 * error that are not caused by the transport, except if
1005 * we already have a more precise error message to
1006 * report.
1007 */
1008 if (ret > -LTTNG_UST_ERR) {
1009 /* Translate code to UST error. */
1010 switch (ret) {
1011 case -EEXIST:
1012 lur.ret_code = -LTTNG_UST_ERR_EXIST;
1013 break;
1014 case -EINVAL:
1015 lur.ret_code = -LTTNG_UST_ERR_INVAL;
1016 break;
1017 case -ENOENT:
1018 lur.ret_code = -LTTNG_UST_ERR_NOENT;
1019 break;
1020 case -EPERM:
1021 lur.ret_code = -LTTNG_UST_ERR_PERM;
1022 break;
1023 case -ENOSYS:
1024 lur.ret_code = -LTTNG_UST_ERR_NOSYS;
1025 break;
1026 default:
1027 lur.ret_code = -LTTNG_UST_ERR;
1028 break;
1029 }
1030 } else {
1031 lur.ret_code = ret;
1032 }
1033 }
1034 if (ret >= 0) {
1035 switch (lum->cmd) {
1036 case LTTNG_UST_TRACER_VERSION:
1037 lur.u.version = lum->u.version;
1038 break;
1039 case LTTNG_UST_TRACEPOINT_LIST_GET:
1040 memcpy(&lur.u.tracepoint, &lum->u.tracepoint, sizeof(lur.u.tracepoint));
1041 break;
1042 }
1043 }
1044 DBG("Return value: %d", lur.ret_val);
1045
1046 ust_unlock();
1047
1048 /*
1049 * Performed delayed statedump operations outside of the UST
1050 * lock. We need to take the dynamic loader lock before we take
1051 * the UST lock internally within handle_pending_statedump().
1052 */
1053 handle_pending_statedump(sock_info);
1054
1055 if (ust_lock()) {
1056 ret = -LTTNG_UST_ERR_EXITING;
1057 goto error;
1058 }
1059
1060 ret = send_reply(sock, &lur);
1061 if (ret < 0) {
1062 DBG("error sending reply");
1063 goto error;
1064 }
1065
1066 /*
1067 * LTTNG_UST_TRACEPOINT_FIELD_LIST_GET needs to send the field
1068 * after the reply.
1069 */
1070 if (lur.ret_code == LTTNG_UST_OK) {
1071 switch (lum->cmd) {
1072 case LTTNG_UST_TRACEPOINT_FIELD_LIST_GET:
1073 len = ustcomm_send_unix_sock(sock,
1074 &args.field_list.entry,
1075 sizeof(args.field_list.entry));
1076 if (len < 0) {
1077 ret = len;
1078 goto error;
1079 }
1080 if (len != sizeof(args.field_list.entry)) {
1081 ret = -EINVAL;
1082 goto error;
1083 }
1084 }
1085 }
1086
1087 error:
1088 ust_unlock();
1089
1090 return ret;
1091 }
1092
1093 static
1094 void cleanup_sock_info(struct sock_info *sock_info, int exiting)
1095 {
1096 int ret;
1097
1098 if (sock_info->root_handle != -1) {
1099 ret = lttng_ust_objd_unref(sock_info->root_handle, 1);
1100 if (ret) {
1101 ERR("Error unref root handle");
1102 }
1103 sock_info->root_handle = -1;
1104 }
1105 sock_info->registration_done = 0;
1106 sock_info->initial_statedump_done = 0;
1107
1108 /*
1109 * wait_shm_mmap, socket and notify socket are used by listener
1110 * threads outside of the ust lock, so we cannot tear them down
1111 * ourselves, because we cannot join on these threads. Leave
1112 * responsibility of cleaning up these resources to the OS
1113 * process exit.
1114 */
1115 if (exiting)
1116 return;
1117
1118 if (sock_info->socket != -1) {
1119 ret = ustcomm_close_unix_sock(sock_info->socket);
1120 if (ret) {
1121 ERR("Error closing ust cmd socket");
1122 }
1123 sock_info->socket = -1;
1124 }
1125 if (sock_info->notify_socket != -1) {
1126 ret = ustcomm_close_unix_sock(sock_info->notify_socket);
1127 if (ret) {
1128 ERR("Error closing ust notify socket");
1129 }
1130 sock_info->notify_socket = -1;
1131 }
1132 if (sock_info->wait_shm_mmap) {
1133 long page_size;
1134
1135 page_size = sysconf(_SC_PAGE_SIZE);
1136 if (page_size <= 0) {
1137 if (!page_size) {
1138 errno = EINVAL;
1139 }
1140 PERROR("Error in sysconf(_SC_PAGE_SIZE)");
1141 } else {
1142 ret = munmap(sock_info->wait_shm_mmap, page_size);
1143 if (ret) {
1144 ERR("Error unmapping wait shm");
1145 }
1146 }
1147 sock_info->wait_shm_mmap = NULL;
1148 }
1149 }
1150
1151 /*
1152 * Using fork to set umask in the child process (not multi-thread safe).
1153 * We deal with the shm_open vs ftruncate race (happening when the
1154 * sessiond owns the shm and does not let everybody modify it, to ensure
1155 * safety against shm_unlink) by simply letting the mmap fail and
1156 * retrying after a few seconds.
1157 * For global shm, everybody has rw access to it until the sessiond
1158 * starts.
1159 */
1160 static
1161 int get_wait_shm(struct sock_info *sock_info, size_t mmap_size)
1162 {
1163 int wait_shm_fd, ret;
1164 pid_t pid;
1165
1166 /*
1167 * Try to open read-only.
1168 */
1169 wait_shm_fd = shm_open(sock_info->wait_shm_path, O_RDONLY, 0);
1170 if (wait_shm_fd >= 0) {
1171 int32_t tmp_read;
1172 ssize_t len;
1173 size_t bytes_read = 0;
1174
1175 /*
1176 * Try to read the fd. If unable to do so, try opening
1177 * it in write mode.
1178 */
1179 do {
1180 len = read(wait_shm_fd,
1181 &((char *) &tmp_read)[bytes_read],
1182 sizeof(tmp_read) - bytes_read);
1183 if (len > 0) {
1184 bytes_read += len;
1185 }
1186 } while ((len < 0 && errno == EINTR)
1187 || (len > 0 && bytes_read < sizeof(tmp_read)));
1188 if (bytes_read != sizeof(tmp_read)) {
1189 ret = close(wait_shm_fd);
1190 if (ret) {
1191 ERR("close wait_shm_fd");
1192 }
1193 goto open_write;
1194 }
1195 goto end;
1196 } else if (wait_shm_fd < 0 && errno != ENOENT) {
1197 /*
1198 * Real-only open did not work, and it's not because the
1199 * entry was not present. It's a failure that prohibits
1200 * using shm.
1201 */
1202 ERR("Error opening shm %s", sock_info->wait_shm_path);
1203 goto end;
1204 }
1205
1206 open_write:
1207 /*
1208 * If the open failed because the file did not exist, or because
1209 * the file was not truncated yet, try creating it ourself.
1210 */
1211 URCU_TLS(lttng_ust_nest_count)++;
1212 pid = fork();
1213 URCU_TLS(lttng_ust_nest_count)--;
1214 if (pid > 0) {
1215 int status;
1216
1217 /*
1218 * Parent: wait for child to return, in which case the
1219 * shared memory map will have been created.
1220 */
1221 pid = wait(&status);
1222 if (pid < 0 || !WIFEXITED(status) || WEXITSTATUS(status) != 0) {
1223 wait_shm_fd = -1;
1224 goto end;
1225 }
1226 /*
1227 * Try to open read-only again after creation.
1228 */
1229 wait_shm_fd = shm_open(sock_info->wait_shm_path, O_RDONLY, 0);
1230 if (wait_shm_fd < 0) {
1231 /*
1232 * Real-only open did not work. It's a failure
1233 * that prohibits using shm.
1234 */
1235 ERR("Error opening shm %s", sock_info->wait_shm_path);
1236 goto end;
1237 }
1238 goto end;
1239 } else if (pid == 0) {
1240 int create_mode;
1241
1242 /* Child */
1243 create_mode = S_IRUSR | S_IWUSR | S_IRGRP;
1244 if (sock_info->global)
1245 create_mode |= S_IROTH | S_IWGRP | S_IWOTH;
1246 /*
1247 * We're alone in a child process, so we can modify the
1248 * process-wide umask.
1249 */
1250 umask(~create_mode);
1251 /*
1252 * Try creating shm (or get rw access).
1253 * We don't do an exclusive open, because we allow other
1254 * processes to create+ftruncate it concurrently.
1255 */
1256 wait_shm_fd = shm_open(sock_info->wait_shm_path,
1257 O_RDWR | O_CREAT, create_mode);
1258 if (wait_shm_fd >= 0) {
1259 ret = ftruncate(wait_shm_fd, mmap_size);
1260 if (ret) {
1261 PERROR("ftruncate");
1262 _exit(EXIT_FAILURE);
1263 }
1264 _exit(EXIT_SUCCESS);
1265 }
1266 /*
1267 * For local shm, we need to have rw access to accept
1268 * opening it: this means the local sessiond will be
1269 * able to wake us up. For global shm, we open it even
1270 * if rw access is not granted, because the root.root
1271 * sessiond will be able to override all rights and wake
1272 * us up.
1273 */
1274 if (!sock_info->global && errno != EACCES) {
1275 ERR("Error opening shm %s", sock_info->wait_shm_path);
1276 _exit(EXIT_FAILURE);
1277 }
1278 /*
1279 * The shm exists, but we cannot open it RW. Report
1280 * success.
1281 */
1282 _exit(EXIT_SUCCESS);
1283 } else {
1284 return -1;
1285 }
1286 end:
1287 if (wait_shm_fd >= 0 && !sock_info->global) {
1288 struct stat statbuf;
1289
1290 /*
1291 * Ensure that our user is the owner of the shm file for
1292 * local shm. If we do not own the file, it means our
1293 * sessiond will not have access to wake us up (there is
1294 * probably a rogue process trying to fake our
1295 * sessiond). Fallback to polling method in this case.
1296 */
1297 ret = fstat(wait_shm_fd, &statbuf);
1298 if (ret) {
1299 PERROR("fstat");
1300 goto error_close;
1301 }
1302 if (statbuf.st_uid != getuid())
1303 goto error_close;
1304 }
1305 return wait_shm_fd;
1306
1307 error_close:
1308 ret = close(wait_shm_fd);
1309 if (ret) {
1310 PERROR("Error closing fd");
1311 }
1312 return -1;
1313 }
1314
1315 static
1316 char *get_map_shm(struct sock_info *sock_info)
1317 {
1318 long page_size;
1319 int wait_shm_fd, ret;
1320 char *wait_shm_mmap;
1321
1322 page_size = sysconf(_SC_PAGE_SIZE);
1323 if (page_size <= 0) {
1324 if (!page_size) {
1325 errno = EINVAL;
1326 }
1327 PERROR("Error in sysconf(_SC_PAGE_SIZE)");
1328 goto error;
1329 }
1330
1331 lttng_ust_lock_fd_tracker();
1332 wait_shm_fd = get_wait_shm(sock_info, page_size);
1333 if (wait_shm_fd < 0) {
1334 lttng_ust_unlock_fd_tracker();
1335 goto error;
1336 }
1337
1338 ret = lttng_ust_add_fd_to_tracker(wait_shm_fd);
1339 if (ret < 0) {
1340 ret = close(wait_shm_fd);
1341 if (!ret) {
1342 PERROR("Error closing fd");
1343 }
1344 lttng_ust_unlock_fd_tracker();
1345 goto error;
1346 }
1347
1348 wait_shm_fd = ret;
1349 lttng_ust_unlock_fd_tracker();
1350
1351 wait_shm_mmap = mmap(NULL, page_size, PROT_READ,
1352 MAP_SHARED, wait_shm_fd, 0);
1353
1354 /* close shm fd immediately after taking the mmap reference */
1355 lttng_ust_lock_fd_tracker();
1356 ret = close(wait_shm_fd);
1357 if (!ret) {
1358 lttng_ust_delete_fd_from_tracker(wait_shm_fd);
1359 } else {
1360 PERROR("Error closing fd");
1361 }
1362 lttng_ust_unlock_fd_tracker();
1363
1364 if (wait_shm_mmap == MAP_FAILED) {
1365 DBG("mmap error (can be caused by race with sessiond). Fallback to poll mode.");
1366 goto error;
1367 }
1368 return wait_shm_mmap;
1369
1370 error:
1371 return NULL;
1372 }
1373
1374 static
1375 void wait_for_sessiond(struct sock_info *sock_info)
1376 {
1377 /* Use ust_lock to check if we should quit. */
1378 if (ust_lock()) {
1379 goto quit;
1380 }
1381 if (wait_poll_fallback) {
1382 goto error;
1383 }
1384 ust_unlock();
1385
1386 assert(sock_info->wait_shm_mmap);
1387
1388 DBG("Waiting for %s apps sessiond", sock_info->name);
1389 /* Wait for futex wakeup */
1390 if (uatomic_read((int32_t *) sock_info->wait_shm_mmap))
1391 goto end_wait;
1392
1393 while (futex_async((int32_t *) sock_info->wait_shm_mmap,
1394 FUTEX_WAIT, 0, NULL, NULL, 0)) {
1395 switch (errno) {
1396 case EWOULDBLOCK:
1397 /* Value already changed. */
1398 goto end_wait;
1399 case EINTR:
1400 /* Retry if interrupted by signal. */
1401 break; /* Get out of switch. */
1402 case EFAULT:
1403 wait_poll_fallback = 1;
1404 DBG(
1405 "Linux kernels 2.6.33 to 3.0 (with the exception of stable versions) "
1406 "do not support FUTEX_WAKE on read-only memory mappings correctly. "
1407 "Please upgrade your kernel "
1408 "(fix is commit 9ea71503a8ed9184d2d0b8ccc4d269d05f7940ae in Linux kernel "
1409 "mainline). LTTng-UST will use polling mode fallback.");
1410 if (ust_debug())
1411 PERROR("futex");
1412 goto end_wait;
1413 }
1414 }
1415 end_wait:
1416 return;
1417
1418 quit:
1419 ust_unlock();
1420 return;
1421
1422 error:
1423 ust_unlock();
1424 return;
1425 }
1426
1427 /*
1428 * This thread does not allocate any resource, except within
1429 * handle_message, within mutex protection. This mutex protects against
1430 * fork and exit.
1431 * The other moment it allocates resources is at socket connection, which
1432 * is also protected by the mutex.
1433 */
1434 static
1435 void *ust_listener_thread(void *arg)
1436 {
1437 struct sock_info *sock_info = arg;
1438 int sock, ret, prev_connect_failed = 0, has_waited = 0, fd;
1439 long timeout;
1440
1441 lttng_ust_fixup_tls();
1442 lttng_ust_fd_tracker_register_thread();
1443 /*
1444 * If available, add '-ust' to the end of this thread's
1445 * process name
1446 */
1447 ret = lttng_ust_setustprocname();
1448 if (ret) {
1449 ERR("Unable to set UST process name");
1450 }
1451
1452 /* Restart trying to connect to the session daemon */
1453 restart:
1454 if (prev_connect_failed) {
1455 /* Wait for sessiond availability with pipe */
1456 wait_for_sessiond(sock_info);
1457 if (has_waited) {
1458 has_waited = 0;
1459 /*
1460 * Sleep for 5 seconds before retrying after a
1461 * sequence of failure / wait / failure. This
1462 * deals with a killed or broken session daemon.
1463 */
1464 sleep(5);
1465 } else {
1466 has_waited = 1;
1467 }
1468 prev_connect_failed = 0;
1469 }
1470
1471 if (ust_lock()) {
1472 goto quit;
1473 }
1474
1475 if (sock_info->socket != -1) {
1476 /* FD tracker is updated by ustcomm_close_unix_sock() */
1477 ret = ustcomm_close_unix_sock(sock_info->socket);
1478 if (ret) {
1479 ERR("Error closing %s ust cmd socket",
1480 sock_info->name);
1481 }
1482 sock_info->socket = -1;
1483 }
1484 if (sock_info->notify_socket != -1) {
1485 /* FD tracker is updated by ustcomm_close_unix_sock() */
1486 ret = ustcomm_close_unix_sock(sock_info->notify_socket);
1487 if (ret) {
1488 ERR("Error closing %s ust notify socket",
1489 sock_info->name);
1490 }
1491 sock_info->notify_socket = -1;
1492 }
1493
1494
1495 /*
1496 * Register. We need to perform both connect and sending
1497 * registration message before doing the next connect otherwise
1498 * we may reach unix socket connect queue max limits and block
1499 * on the 2nd connect while the session daemon is awaiting the
1500 * first connect registration message.
1501 */
1502 /* Connect cmd socket */
1503 lttng_ust_lock_fd_tracker();
1504 ret = ustcomm_connect_unix_sock(sock_info->sock_path,
1505 get_connect_sock_timeout());
1506 if (ret < 0) {
1507 lttng_ust_unlock_fd_tracker();
1508 DBG("Info: sessiond not accepting connections to %s apps socket", sock_info->name);
1509 prev_connect_failed = 1;
1510
1511 /*
1512 * If we cannot find the sessiond daemon, don't delay
1513 * constructor execution.
1514 */
1515 ret = handle_register_failed(sock_info);
1516 assert(!ret);
1517 ust_unlock();
1518 goto restart;
1519 }
1520 fd = ret;
1521 ret = lttng_ust_add_fd_to_tracker(fd);
1522 if (ret < 0) {
1523 ret = close(fd);
1524 if (ret) {
1525 PERROR("close on sock_info->socket");
1526 }
1527 ret = -1;
1528 lttng_ust_unlock_fd_tracker();
1529 ust_unlock();
1530 goto quit;
1531 }
1532
1533 sock_info->socket = ret;
1534 lttng_ust_unlock_fd_tracker();
1535
1536 ust_unlock();
1537 /*
1538 * Unlock/relock ust lock because connect is blocking (with
1539 * timeout). Don't delay constructors on the ust lock for too
1540 * long.
1541 */
1542 if (ust_lock()) {
1543 goto quit;
1544 }
1545
1546 /*
1547 * Create only one root handle per listener thread for the whole
1548 * process lifetime, so we ensure we get ID which is statically
1549 * assigned to the root handle.
1550 */
1551 if (sock_info->root_handle == -1) {
1552 ret = lttng_abi_create_root_handle();
1553 if (ret < 0) {
1554 ERR("Error creating root handle");
1555 goto quit;
1556 }
1557 sock_info->root_handle = ret;
1558 }
1559
1560 ret = register_to_sessiond(sock_info->socket, USTCTL_SOCKET_CMD);
1561 if (ret < 0) {
1562 ERR("Error registering to %s ust cmd socket",
1563 sock_info->name);
1564 prev_connect_failed = 1;
1565 /*
1566 * If we cannot register to the sessiond daemon, don't
1567 * delay constructor execution.
1568 */
1569 ret = handle_register_failed(sock_info);
1570 assert(!ret);
1571 ust_unlock();
1572 goto restart;
1573 }
1574
1575 ust_unlock();
1576 /*
1577 * Unlock/relock ust lock because connect is blocking (with
1578 * timeout). Don't delay constructors on the ust lock for too
1579 * long.
1580 */
1581 if (ust_lock()) {
1582 goto quit;
1583 }
1584
1585 /* Connect notify socket */
1586 lttng_ust_lock_fd_tracker();
1587 ret = ustcomm_connect_unix_sock(sock_info->sock_path,
1588 get_connect_sock_timeout());
1589 if (ret < 0) {
1590 lttng_ust_unlock_fd_tracker();
1591 DBG("Info: sessiond not accepting connections to %s apps socket", sock_info->name);
1592 prev_connect_failed = 1;
1593
1594 /*
1595 * If we cannot find the sessiond daemon, don't delay
1596 * constructor execution.
1597 */
1598 ret = handle_register_failed(sock_info);
1599 assert(!ret);
1600 ust_unlock();
1601 goto restart;
1602 }
1603
1604 fd = ret;
1605 ret = lttng_ust_add_fd_to_tracker(fd);
1606 if (ret < 0) {
1607 ret = close(fd);
1608 if (ret) {
1609 PERROR("close on sock_info->notify_socket");
1610 }
1611 ret = -1;
1612 lttng_ust_unlock_fd_tracker();
1613 ust_unlock();
1614 goto quit;
1615 }
1616
1617 sock_info->notify_socket = ret;
1618 lttng_ust_unlock_fd_tracker();
1619
1620 ust_unlock();
1621 /*
1622 * Unlock/relock ust lock because connect is blocking (with
1623 * timeout). Don't delay constructors on the ust lock for too
1624 * long.
1625 */
1626 if (ust_lock()) {
1627 goto quit;
1628 }
1629
1630 timeout = get_notify_sock_timeout();
1631 if (timeout >= 0) {
1632 /*
1633 * Give at least 10ms to sessiond to reply to
1634 * notifications.
1635 */
1636 if (timeout < 10)
1637 timeout = 10;
1638 ret = ustcomm_setsockopt_rcv_timeout(sock_info->notify_socket,
1639 timeout);
1640 if (ret < 0) {
1641 WARN("Error setting socket receive timeout");
1642 }
1643 ret = ustcomm_setsockopt_snd_timeout(sock_info->notify_socket,
1644 timeout);
1645 if (ret < 0) {
1646 WARN("Error setting socket send timeout");
1647 }
1648 } else if (timeout < -1) {
1649 WARN("Unsupported timeout value %ld", timeout);
1650 }
1651
1652 ret = register_to_sessiond(sock_info->notify_socket,
1653 USTCTL_SOCKET_NOTIFY);
1654 if (ret < 0) {
1655 ERR("Error registering to %s ust notify socket",
1656 sock_info->name);
1657 prev_connect_failed = 1;
1658 /*
1659 * If we cannot register to the sessiond daemon, don't
1660 * delay constructor execution.
1661 */
1662 ret = handle_register_failed(sock_info);
1663 assert(!ret);
1664 ust_unlock();
1665 goto restart;
1666 }
1667 sock = sock_info->socket;
1668
1669 ust_unlock();
1670
1671 for (;;) {
1672 ssize_t len;
1673 struct ustcomm_ust_msg lum;
1674
1675 len = ustcomm_recv_unix_sock(sock, &lum, sizeof(lum));
1676 switch (len) {
1677 case 0: /* orderly shutdown */
1678 DBG("%s lttng-sessiond has performed an orderly shutdown", sock_info->name);
1679 if (ust_lock()) {
1680 goto quit;
1681 }
1682 /*
1683 * Either sessiond has shutdown or refused us by closing the socket.
1684 * In either case, we don't want to delay construction execution,
1685 * and we need to wait before retry.
1686 */
1687 prev_connect_failed = 1;
1688 /*
1689 * If we cannot register to the sessiond daemon, don't
1690 * delay constructor execution.
1691 */
1692 ret = handle_register_failed(sock_info);
1693 assert(!ret);
1694 ust_unlock();
1695 goto end;
1696 case sizeof(lum):
1697 print_cmd(lum.cmd, lum.handle);
1698 ret = handle_message(sock_info, sock, &lum);
1699 if (ret) {
1700 ERR("Error handling message for %s socket",
1701 sock_info->name);
1702 /*
1703 * Close socket if protocol error is
1704 * detected.
1705 */
1706 goto end;
1707 }
1708 continue;
1709 default:
1710 if (len < 0) {
1711 DBG("Receive failed from lttng-sessiond with errno %d", (int) -len);
1712 } else {
1713 DBG("incorrect message size (%s socket): %zd", sock_info->name, len);
1714 }
1715 if (len == -ECONNRESET) {
1716 DBG("%s remote end closed connection", sock_info->name);
1717 goto end;
1718 }
1719 goto end;
1720 }
1721
1722 }
1723 end:
1724 if (ust_lock()) {
1725 goto quit;
1726 }
1727 /* Cleanup socket handles before trying to reconnect */
1728 lttng_ust_objd_table_owner_cleanup(sock_info);
1729 ust_unlock();
1730 goto restart; /* try to reconnect */
1731
1732 quit:
1733 ust_unlock();
1734
1735 pthread_mutex_lock(&ust_exit_mutex);
1736 sock_info->thread_active = 0;
1737 pthread_mutex_unlock(&ust_exit_mutex);
1738 return NULL;
1739 }
1740
1741 /*
1742 * Weak symbol to call when the ust malloc wrapper is not loaded.
1743 */
1744 __attribute__((weak))
1745 void lttng_ust_malloc_wrapper_init(void)
1746 {
1747 }
1748
1749 /*
1750 * sessiond monitoring thread: monitor presence of global and per-user
1751 * sessiond by polling the application common named pipe.
1752 */
1753 void __attribute__((constructor)) lttng_ust_init(void)
1754 {
1755 struct timespec constructor_timeout;
1756 sigset_t sig_all_blocked, orig_parent_mask;
1757 pthread_attr_t thread_attr;
1758 int timeout_mode;
1759 int ret;
1760
1761 if (uatomic_xchg(&initialized, 1) == 1)
1762 return;
1763
1764 /*
1765 * Fixup interdependency between TLS fixup mutex (which happens
1766 * to be the dynamic linker mutex) and ust_lock, taken within
1767 * the ust lock.
1768 */
1769 lttng_ust_fixup_tls();
1770
1771 lttng_ust_loaded = 1;
1772
1773 /*
1774 * We want precise control over the order in which we construct
1775 * our sub-libraries vs starting to receive commands from
1776 * sessiond (otherwise leading to errors when trying to create
1777 * sessiond before the init functions are completed).
1778 */
1779 init_usterr();
1780 lttng_ust_getenv_init(); /* Needs init_usterr() to be completed. */
1781 init_tracepoint();
1782 lttng_ust_init_fd_tracker();
1783 lttng_ust_clock_init();
1784 lttng_ust_getcpu_init();
1785 lttng_ust_statedump_init();
1786 lttng_ring_buffer_metadata_client_init();
1787 lttng_ring_buffer_client_overwrite_init();
1788 lttng_ring_buffer_client_overwrite_rt_init();
1789 lttng_ring_buffer_client_discard_init();
1790 lttng_ring_buffer_client_discard_rt_init();
1791 lttng_perf_counter_init();
1792 /*
1793 * Invoke ust malloc wrapper init before starting other threads.
1794 */
1795 lttng_ust_malloc_wrapper_init();
1796
1797 timeout_mode = get_constructor_timeout(&constructor_timeout);
1798
1799 ret = sem_init(&constructor_wait, 0, 0);
1800 if (ret) {
1801 PERROR("sem_init");
1802 }
1803
1804 ret = setup_global_apps();
1805 if (ret) {
1806 assert(global_apps.allowed == 0);
1807 DBG("global apps setup returned %d", ret);
1808 }
1809
1810 ret = setup_local_apps();
1811 if (ret) {
1812 assert(local_apps.allowed == 0);
1813 DBG("local apps setup returned %d", ret);
1814 }
1815
1816 /* A new thread created by pthread_create inherits the signal mask
1817 * from the parent. To avoid any signal being received by the
1818 * listener thread, we block all signals temporarily in the parent,
1819 * while we create the listener thread.
1820 */
1821 sigfillset(&sig_all_blocked);
1822 ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_parent_mask);
1823 if (ret) {
1824 ERR("pthread_sigmask: %s", strerror(ret));
1825 }
1826
1827 ret = pthread_attr_init(&thread_attr);
1828 if (ret) {
1829 ERR("pthread_attr_init: %s", strerror(ret));
1830 }
1831 ret = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_DETACHED);
1832 if (ret) {
1833 ERR("pthread_attr_setdetachstate: %s", strerror(ret));
1834 }
1835
1836 if (global_apps.allowed) {
1837 pthread_mutex_lock(&ust_exit_mutex);
1838 ret = pthread_create(&global_apps.ust_listener, &thread_attr,
1839 ust_listener_thread, &global_apps);
1840 if (ret) {
1841 ERR("pthread_create global: %s", strerror(ret));
1842 }
1843 global_apps.thread_active = 1;
1844 pthread_mutex_unlock(&ust_exit_mutex);
1845 } else {
1846 handle_register_done(&global_apps);
1847 }
1848
1849 if (local_apps.allowed) {
1850 pthread_mutex_lock(&ust_exit_mutex);
1851 ret = pthread_create(&local_apps.ust_listener, &thread_attr,
1852 ust_listener_thread, &local_apps);
1853 if (ret) {
1854 ERR("pthread_create local: %s", strerror(ret));
1855 }
1856 local_apps.thread_active = 1;
1857 pthread_mutex_unlock(&ust_exit_mutex);
1858 } else {
1859 handle_register_done(&local_apps);
1860 }
1861 ret = pthread_attr_destroy(&thread_attr);
1862 if (ret) {
1863 ERR("pthread_attr_destroy: %s", strerror(ret));
1864 }
1865
1866 /* Restore original signal mask in parent */
1867 ret = pthread_sigmask(SIG_SETMASK, &orig_parent_mask, NULL);
1868 if (ret) {
1869 ERR("pthread_sigmask: %s", strerror(ret));
1870 }
1871
1872 switch (timeout_mode) {
1873 case 1: /* timeout wait */
1874 do {
1875 ret = sem_timedwait(&constructor_wait,
1876 &constructor_timeout);
1877 } while (ret < 0 && errno == EINTR);
1878 if (ret < 0) {
1879 switch (errno) {
1880 case ETIMEDOUT:
1881 ERR("Timed out waiting for lttng-sessiond");
1882 break;
1883 case EINVAL:
1884 PERROR("sem_timedwait");
1885 break;
1886 default:
1887 ERR("Unexpected error \"%s\" returned by sem_timedwait",
1888 strerror(errno));
1889 }
1890 }
1891 break;
1892 case -1:/* wait forever */
1893 do {
1894 ret = sem_wait(&constructor_wait);
1895 } while (ret < 0 && errno == EINTR);
1896 if (ret < 0) {
1897 switch (errno) {
1898 case EINVAL:
1899 PERROR("sem_wait");
1900 break;
1901 default:
1902 ERR("Unexpected error \"%s\" returned by sem_wait",
1903 strerror(errno));
1904 }
1905 }
1906 break;
1907 case 0: /* no timeout */
1908 break;
1909 }
1910 }
1911
1912 static
1913 void lttng_ust_cleanup(int exiting)
1914 {
1915 cleanup_sock_info(&global_apps, exiting);
1916 cleanup_sock_info(&local_apps, exiting);
1917 local_apps.allowed = 0;
1918 global_apps.allowed = 0;
1919 /*
1920 * The teardown in this function all affect data structures
1921 * accessed under the UST lock by the listener thread. This
1922 * lock, along with the lttng_ust_comm_should_quit flag, ensure
1923 * that none of these threads are accessing this data at this
1924 * point.
1925 */
1926 lttng_ust_abi_exit();
1927 lttng_ust_events_exit();
1928 lttng_perf_counter_exit();
1929 lttng_ring_buffer_client_discard_rt_exit();
1930 lttng_ring_buffer_client_discard_exit();
1931 lttng_ring_buffer_client_overwrite_rt_exit();
1932 lttng_ring_buffer_client_overwrite_exit();
1933 lttng_ring_buffer_metadata_client_exit();
1934 lttng_ust_statedump_destroy();
1935 exit_tracepoint();
1936 if (!exiting) {
1937 /* Reinitialize values for fork */
1938 sem_count = sem_count_initial_value;
1939 lttng_ust_comm_should_quit = 0;
1940 initialized = 0;
1941 }
1942 }
1943
1944 void __attribute__((destructor)) lttng_ust_exit(void)
1945 {
1946 int ret;
1947
1948 /*
1949 * Using pthread_cancel here because:
1950 * A) we don't want to hang application teardown.
1951 * B) the thread is not allocating any resource.
1952 */
1953
1954 /*
1955 * Require the communication thread to quit. Synchronize with
1956 * mutexes to ensure it is not in a mutex critical section when
1957 * pthread_cancel is later called.
1958 */
1959 ust_lock_nocheck();
1960 lttng_ust_comm_should_quit = 1;
1961 ust_unlock();
1962
1963 pthread_mutex_lock(&ust_exit_mutex);
1964 /* cancel threads */
1965 if (global_apps.thread_active) {
1966 ret = pthread_cancel(global_apps.ust_listener);
1967 if (ret) {
1968 ERR("Error cancelling global ust listener thread: %s",
1969 strerror(ret));
1970 } else {
1971 global_apps.thread_active = 0;
1972 }
1973 }
1974 if (local_apps.thread_active) {
1975 ret = pthread_cancel(local_apps.ust_listener);
1976 if (ret) {
1977 ERR("Error cancelling local ust listener thread: %s",
1978 strerror(ret));
1979 } else {
1980 local_apps.thread_active = 0;
1981 }
1982 }
1983 pthread_mutex_unlock(&ust_exit_mutex);
1984
1985 /*
1986 * Do NOT join threads: use of sys_futex makes it impossible to
1987 * join the threads without using async-cancel, but async-cancel
1988 * is delivered by a signal, which could hit the target thread
1989 * anywhere in its code path, including while the ust_lock() is
1990 * held, causing a deadlock for the other thread. Let the OS
1991 * cleanup the threads if there are stalled in a syscall.
1992 */
1993 lttng_ust_cleanup(1);
1994 }
1995
1996 /*
1997 * We exclude the worker threads across fork and clone (except
1998 * CLONE_VM), because these system calls only keep the forking thread
1999 * running in the child. Therefore, we don't want to call fork or clone
2000 * in the middle of an tracepoint or ust tracing state modification.
2001 * Holding this mutex protects these structures across fork and clone.
2002 */
2003 void ust_before_fork(sigset_t *save_sigset)
2004 {
2005 /*
2006 * Disable signals. This is to avoid that the child intervenes
2007 * before it is properly setup for tracing. It is safer to
2008 * disable all signals, because then we know we are not breaking
2009 * anything by restoring the original mask.
2010 */
2011 sigset_t all_sigs;
2012 int ret;
2013
2014 /* Fixup lttng-ust TLS. */
2015 lttng_ust_fixup_tls();
2016
2017 if (URCU_TLS(lttng_ust_nest_count))
2018 return;
2019 /* Disable signals */
2020 sigfillset(&all_sigs);
2021 ret = sigprocmask(SIG_BLOCK, &all_sigs, save_sigset);
2022 if (ret == -1) {
2023 PERROR("sigprocmask");
2024 }
2025
2026 pthread_mutex_lock(&ust_fork_mutex);
2027
2028 ust_lock_nocheck();
2029 rcu_bp_before_fork();
2030 lttng_ust_lock_fd_tracker();
2031 }
2032
2033 static void ust_after_fork_common(sigset_t *restore_sigset)
2034 {
2035 int ret;
2036
2037 DBG("process %d", getpid());
2038 lttng_ust_unlock_fd_tracker();
2039 ust_unlock();
2040
2041 pthread_mutex_unlock(&ust_fork_mutex);
2042
2043 /* Restore signals */
2044 ret = sigprocmask(SIG_SETMASK, restore_sigset, NULL);
2045 if (ret == -1) {
2046 PERROR("sigprocmask");
2047 }
2048 }
2049
2050 void ust_after_fork_parent(sigset_t *restore_sigset)
2051 {
2052 if (URCU_TLS(lttng_ust_nest_count))
2053 return;
2054 DBG("process %d", getpid());
2055 rcu_bp_after_fork_parent();
2056 /* Release mutexes and reenable signals */
2057 ust_after_fork_common(restore_sigset);
2058 }
2059
2060 /*
2061 * After fork, in the child, we need to cleanup all the leftover state,
2062 * except the worker thread which already magically disappeared thanks
2063 * to the weird Linux fork semantics. After tyding up, we call
2064 * lttng_ust_init() again to start over as a new PID.
2065 *
2066 * This is meant for forks() that have tracing in the child between the
2067 * fork and following exec call (if there is any).
2068 */
2069 void ust_after_fork_child(sigset_t *restore_sigset)
2070 {
2071 if (URCU_TLS(lttng_ust_nest_count))
2072 return;
2073 lttng_context_vpid_reset();
2074 lttng_context_vtid_reset();
2075 lttng_context_procname_reset();
2076 DBG("process %d", getpid());
2077 /* Release urcu mutexes */
2078 rcu_bp_after_fork_child();
2079 lttng_ust_cleanup(0);
2080 /* Release mutexes and reenable signals */
2081 ust_after_fork_common(restore_sigset);
2082 lttng_ust_init();
2083 }
2084
2085 void lttng_ust_sockinfo_session_enabled(void *owner)
2086 {
2087 struct sock_info *sock_info = owner;
2088 sock_info->statedump_pending = 1;
2089 }
This page took 0.117858 seconds and 3 git commands to generate.