Fix: lttng-ust-comm: wait on wrong child process
[lttng-ust.git] / liblttng-ust / lttng-ust-comm.c
index a299ba84c6ba583b1d2afff4d8ff0576a0991e36..716795e0e818028d1a6fc6fa63374abaaf0ebafa 100644 (file)
@@ -46,6 +46,7 @@
 #include <lttng/ust.h>
 #include <lttng/ust-error.h>
 #include <lttng/ust-ctl.h>
+#include <lttng/ust-cancelstate.h>
 #include <urcu/tls-compat.h>
 #include <ust-comm.h>
 #include <ust-fd.h>
@@ -86,6 +87,8 @@ static int initialized;
  *
  * ust_lock nests within the dynamic loader lock (within glibc) because
  * it is taken within the library constructor.
+ *
+ * The ust fd tracker lock nests within the ust_mutex.
  */
 static pthread_mutex_t ust_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -120,6 +123,28 @@ static int lttng_ust_comm_should_quit;
  */
 int lttng_ust_loaded __attribute__((weak));
 
+/*
+ * Notes on async-signal-safety of ust lock: a few libc functions are used
+ * which are not strictly async-signal-safe:
+ *
+ * - pthread_setcancelstate
+ * - pthread_mutex_lock
+ * - pthread_mutex_unlock
+ *
+ * As of glibc 2.35, the implementation of pthread_setcancelstate only
+ * touches TLS data, and it appears to be safe to use from signal
+ * handlers. If the libc implementation changes, this will need to be
+ * revisited, and we may ask glibc to provide an async-signal-safe
+ * pthread_setcancelstate.
+ *
+ * As of glibc 2.35, the implementation of pthread_mutex_lock/unlock
+ * for fast mutexes only relies on the pthread_mutex_t structure.
+ * Disabling signals around all uses of this mutex ensures
+ * signal-safety. If the libc implementation changes and eventually uses
+ * other global resources, this will need to be revisited and we may
+ * need to implement our own mutex.
+ */
+
 /*
  * Return 0 on success, -1 if should quit.
  * The lock is taken in both cases.
@@ -128,25 +153,21 @@ int lttng_ust_loaded __attribute__((weak));
 int ust_lock(void)
 {
        sigset_t sig_all_blocked, orig_mask;
-       int ret, oldstate;
+       int ret;
 
-       ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate);
-       if (ret) {
-               ERR("pthread_setcancelstate: %s", strerror(ret));
-       }
-       if (oldstate != PTHREAD_CANCEL_ENABLE) {
-               ERR("pthread_setcancelstate: unexpected oldstate");
+       if (lttng_ust_cancelstate_disable_push()) {
+               ERR("lttng_ust_cancelstate_disable_push");
        }
        sigfillset(&sig_all_blocked);
        ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
        if (!URCU_TLS(ust_mutex_nest)++)
                pthread_mutex_lock(&ust_mutex);
        ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
        if (lttng_ust_comm_should_quit) {
                return -1;
@@ -164,25 +185,21 @@ int ust_lock(void)
 void ust_lock_nocheck(void)
 {
        sigset_t sig_all_blocked, orig_mask;
-       int ret, oldstate;
+       int ret;
 
-       ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate);
-       if (ret) {
-               ERR("pthread_setcancelstate: %s", strerror(ret));
-       }
-       if (oldstate != PTHREAD_CANCEL_ENABLE) {
-               ERR("pthread_setcancelstate: unexpected oldstate");
+       if (lttng_ust_cancelstate_disable_push()) {
+               ERR("lttng_ust_cancelstate_disable_push");
        }
        sigfillset(&sig_all_blocked);
        ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
        if (!URCU_TLS(ust_mutex_nest)++)
                pthread_mutex_lock(&ust_mutex);
        ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
 }
 
@@ -192,25 +209,21 @@ void ust_lock_nocheck(void)
 void ust_unlock(void)
 {
        sigset_t sig_all_blocked, orig_mask;
-       int ret, oldstate;
+       int ret;
 
        sigfillset(&sig_all_blocked);
        ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
        if (!--URCU_TLS(ust_mutex_nest))
                pthread_mutex_unlock(&ust_mutex);
        ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
-       }
-       ret = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
-       if (ret) {
-               ERR("pthread_setcancelstate: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
-       if (oldstate != PTHREAD_CANCEL_DISABLE) {
-               ERR("pthread_setcancelstate: unexpected oldstate");
+       if (lttng_ust_cancelstate_disable_pop()) {
+               ERR("lttng_ust_cancelstate_disable_pop");
        }
 }
 
@@ -228,7 +241,11 @@ static sem_t constructor_wait;
 /*
  * Doing this for both the global and local sessiond.
  */
-static int sem_count = { 2 };
+enum {
+       sem_count_initial_value = 4,
+};
+
+static int sem_count = sem_count_initial_value;
 
 /*
  * Counting nesting within lttng-ust. Used to ensure that calling fork()
@@ -243,7 +260,7 @@ struct sock_info {
        const char *name;
        pthread_t ust_listener; /* listener thread */
        int root_handle;
-       int constructor_sem_posted;
+       int registration_done;
        int allowed;
        int global;
        int thread_active;
@@ -256,6 +273,9 @@ struct sock_info {
        char *wait_shm_mmap;
        /* Keep track of lazy state dump not performed yet. */
        int statedump_pending;
+       int initial_statedump_done;
+       /* Keep procname for statedump */
+       char procname[LTTNG_UST_PROCNAME_LEN];
 };
 
 /* Socket from app (connect) to session daemon (listen) for communication */
@@ -264,6 +284,7 @@ struct sock_info global_apps = {
        .global = 1,
 
        .root_handle = -1,
+       .registration_done = 0,
        .allowed = 0,
        .thread_active = 0,
 
@@ -274,6 +295,8 @@ struct sock_info global_apps = {
        .wait_shm_path = "/" LTTNG_UST_WAIT_FILENAME,
 
        .statedump_pending = 0,
+       .initial_statedump_done = 0,
+       .procname[0] = '\0'
 };
 
 /* TODO: allow global_apps_sock_path override */
@@ -282,6 +305,7 @@ struct sock_info local_apps = {
        .name = "local",
        .global = 0,
        .root_handle = -1,
+       .registration_done = 0,
        .allowed = 0,   /* Check setuid bit first */
        .thread_active = 0,
 
@@ -289,6 +313,8 @@ struct sock_info local_apps = {
        .notify_socket = -1,
 
        .statedump_pending = 0,
+       .initial_statedump_done = 0,
+       .procname[0] = '\0'
 };
 
 static int wait_poll_fallback;
@@ -412,7 +438,12 @@ void lttng_ust_fixup_tls(void)
        lttng_fixup_nest_count_tls();
        lttng_fixup_procname_tls();
        lttng_fixup_ust_mutex_nest_tls();
+       lttng_ust_fixup_perf_counter_tls();
        lttng_ust_fixup_fd_tracker_tls();
+       lttng_fixup_cgroup_ns_tls();
+       lttng_fixup_ipc_ns_tls();
+       lttng_fixup_net_ns_tls();
+       lttng_fixup_uts_ns_tls();
 }
 
 int lttng_get_notify_socket(void *owner)
@@ -422,6 +453,15 @@ int lttng_get_notify_socket(void *owner)
        return info->notify_socket;
 }
 
+
+LTTNG_HIDDEN
+char* lttng_ust_sockinfo_get_procname(void *owner)
+{
+       struct sock_info *info = owner;
+
+       return info->procname;
+}
+
 static
 void print_cmd(int cmd, int handle)
 {
@@ -451,6 +491,7 @@ int setup_global_apps(void)
        }
 
        global_apps.allowed = 1;
+       lttng_ust_getprocname(global_apps.procname);
 error:
        return ret;
 }
@@ -495,6 +536,8 @@ int setup_local_apps(void)
                ret = -EIO;
                goto end;
        }
+
+       lttng_ust_getprocname(local_apps.procname);
 end:
        return ret;
 }
@@ -621,45 +664,85 @@ int send_reply(int sock, struct ustcomm_ust_reply *lur)
 }
 
 static
-int handle_register_done(struct sock_info *sock_info)
+void decrement_sem_count(unsigned int count)
 {
        int ret;
 
-       if (sock_info->constructor_sem_posted)
-               return 0;
-       sock_info->constructor_sem_posted = 1;
+       assert(uatomic_read(&sem_count) >= count);
+
        if (uatomic_read(&sem_count) <= 0) {
-               return 0;
+               return;
        }
-       ret = uatomic_add_return(&sem_count, -1);
+
+       ret = uatomic_add_return(&sem_count, -count);
        if (ret == 0) {
                ret = sem_post(&constructor_wait);
                assert(!ret);
        }
+}
+
+static
+int handle_register_done(struct sock_info *sock_info)
+{
+       if (sock_info->registration_done)
+               return 0;
+       sock_info->registration_done = 1;
+
+       decrement_sem_count(1);
+       if (!sock_info->statedump_pending) {
+               sock_info->initial_statedump_done = 1;
+               decrement_sem_count(1);
+       }
+
+       return 0;
+}
+
+static
+int handle_register_failed(struct sock_info *sock_info)
+{
+       if (sock_info->registration_done)
+               return 0;
+       sock_info->registration_done = 1;
+       sock_info->initial_statedump_done = 1;
+
+       decrement_sem_count(2);
+
        return 0;
 }
 
 /*
  * Only execute pending statedump after the constructor semaphore has
- * been posted by each listener thread. This means statedump will only
- * be performed after the "registration done" command is received from
- * each session daemon the application is connected to.
+ * been posted by the current listener thread. This means statedump will
+ * only be performed after the "registration done" command is received
+ * from this thread's session daemon.
  *
  * This ensures we don't run into deadlock issues with the dynamic
  * loader mutex, which is held while the constructor is called and
  * waiting on the constructor semaphore. All operations requiring this
  * dynamic loader lock need to be postponed using this mechanism.
+ *
+ * In a scenario with two session daemons connected to the application,
+ * it is possible that the first listener thread which receives the
+ * registration done command issues its statedump while the dynamic
+ * loader lock is still held by the application constructor waiting on
+ * the semaphore. It will however be allowed to proceed when the
+ * second session daemon sends the registration done command to the
+ * second listener thread. This situation therefore does not produce
+ * a deadlock.
  */
 static
 void handle_pending_statedump(struct sock_info *sock_info)
 {
-       int ctor_passed = sock_info->constructor_sem_posted;
-
-       if (ctor_passed && sock_info->statedump_pending) {
+       if (sock_info->registration_done && sock_info->statedump_pending) {
                sock_info->statedump_pending = 0;
                pthread_mutex_lock(&ust_fork_mutex);
                lttng_handle_pending_statedump(sock_info);
                pthread_mutex_unlock(&ust_fork_mutex);
+
+               if (!sock_info->initial_statedump_done) {
+                       sock_info->initial_statedump_done = 1;
+                       decrement_sem_count(1);
+               }
        }
 }
 
@@ -871,10 +954,23 @@ int handle_message(struct sock_info *sock_info,
                                        &args, sock_info);
                else
                        ret = -ENOSYS;
+               if (args.channel.wakeup_fd >= 0) {
+                       int close_ret;
+
+                       lttng_ust_lock_fd_tracker();
+                       close_ret = close(args.channel.wakeup_fd);
+                       lttng_ust_unlock_fd_tracker();
+                       args.channel.wakeup_fd = -1;
+                       if (close_ret)
+                               PERROR("close");
+               }
+               free(args.channel.chan_data);
                break;
        }
        case LTTNG_UST_STREAM:
        {
+               int close_ret;
+
                /* Receive shm_fd, wakeup_fd */
                ret = ustcomm_recv_stream_from_sessiond(sock,
                        NULL,
@@ -890,6 +986,22 @@ int handle_message(struct sock_info *sock_info,
                                        &args, sock_info);
                else
                        ret = -ENOSYS;
+               if (args.stream.shm_fd >= 0) {
+                       lttng_ust_lock_fd_tracker();
+                       close_ret = close(args.stream.shm_fd);
+                       lttng_ust_unlock_fd_tracker();
+                       args.stream.shm_fd = -1;
+                       if (close_ret)
+                               PERROR("close");
+               }
+               if (args.stream.wakeup_fd >= 0) {
+                       lttng_ust_lock_fd_tracker();
+                       close_ret = close(args.stream.wakeup_fd);
+                       lttng_ust_unlock_fd_tracker();
+                       args.stream.wakeup_fd = -1;
+                       if (close_ret)
+                               PERROR("close");
+               }
                break;
        }
        case LTTNG_UST_CONTEXT:
@@ -1069,7 +1181,7 @@ void cleanup_sock_info(struct sock_info *sock_info, int exiting)
                }
                sock_info->root_handle = -1;
        }
-       sock_info->constructor_sem_posted = 0;
+
 
        /*
         * wait_shm_mmap, socket and notify socket are used by listener
@@ -1081,6 +1193,9 @@ void cleanup_sock_info(struct sock_info *sock_info, int exiting)
        if (exiting)
                return;
 
+       sock_info->registration_done = 0;
+       sock_info->initial_statedump_done = 0;
+
        if (sock_info->socket != -1) {
                ret = ustcomm_close_unix_sock(sock_info->socket);
                if (ret) {
@@ -1178,14 +1293,14 @@ open_write:
        pid = fork();
        URCU_TLS(lttng_ust_nest_count)--;
        if (pid > 0) {
-               int status;
+               int status, wait_ret;
 
                /*
                 * Parent: wait for child to return, in which case the
                 * shared memory map will have been created.
                 */
-               pid = wait(&status);
-               if (pid < 0 || !WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+               wait_ret = waitpid(pid, &status, 0);
+               if (wait_ret < 0 || !WIFEXITED(status) || WEXITSTATUS(status) != 0) {
                        wait_shm_fd = -1;
                        goto end;
                }
@@ -1353,18 +1468,25 @@ void wait_for_sessiond(struct sock_info *sock_info)
 
        DBG("Waiting for %s apps sessiond", sock_info->name);
        /* Wait for futex wakeup */
-       if (uatomic_read((int32_t *) sock_info->wait_shm_mmap))
-               goto end_wait;
-
-       while (futex_async((int32_t *) sock_info->wait_shm_mmap,
-                       FUTEX_WAIT, 0, NULL, NULL, 0)) {
+       while (!uatomic_read((int32_t *) sock_info->wait_shm_mmap)) {
+               if (!futex_async((int32_t *) sock_info->wait_shm_mmap, FUTEX_WAIT, 0, NULL, NULL, 0)) {
+                       /*
+                        * Prior queued wakeups queued by unrelated code
+                        * using the same address can cause futex wait to
+                        * return 0 even through the futex value is still
+                        * 0 (spurious wakeups). Check the value again
+                        * in user-space to validate whether it really
+                        * differs from 0.
+                        */
+                       continue;
+               }
                switch (errno) {
-               case EWOULDBLOCK:
+               case EAGAIN:
                        /* Value already changed. */
                        goto end_wait;
                case EINTR:
                        /* Retry if interrupted by signal. */
-                       break;  /* Get out of switch. */
+                       break;  /* Get out of switch. Check again. */
                case EFAULT:
                        wait_poll_fallback = 1;
                        DBG(
@@ -1477,7 +1599,7 @@ restart:
                 * If we cannot find the sessiond daemon, don't delay
                 * constructor execution.
                 */
-               ret = handle_register_done(sock_info);
+               ret = handle_register_failed(sock_info);
                assert(!ret);
                ust_unlock();
                goto restart;
@@ -1531,7 +1653,7 @@ restart:
                 * If we cannot register to the sessiond daemon, don't
                 * delay constructor execution.
                 */
-               ret = handle_register_done(sock_info);
+               ret = handle_register_failed(sock_info);
                assert(!ret);
                ust_unlock();
                goto restart;
@@ -1560,7 +1682,7 @@ restart:
                 * If we cannot find the sessiond daemon, don't delay
                 * constructor execution.
                 */
-               ret = handle_register_done(sock_info);
+               ret = handle_register_failed(sock_info);
                assert(!ret);
                ust_unlock();
                goto restart;
@@ -1624,7 +1746,7 @@ restart:
                 * If we cannot register to the sessiond daemon, don't
                 * delay constructor execution.
                 */
-               ret = handle_register_done(sock_info);
+               ret = handle_register_failed(sock_info);
                assert(!ret);
                ust_unlock();
                goto restart;
@@ -1654,7 +1776,7 @@ restart:
                         * If we cannot register to the sessiond daemon, don't
                         * delay constructor execution.
                         */
-                       ret = handle_register_done(sock_info);
+                       ret = handle_register_failed(sock_info);
                        assert(!ret);
                        ust_unlock();
                        goto end;
@@ -1923,7 +2045,7 @@ void lttng_ust_cleanup(int exiting)
        exit_tracepoint();
        if (!exiting) {
                /* Reinitialize values for fork */
-               sem_count = 2;
+               sem_count = sem_count_initial_value;
                lttng_ust_comm_should_quit = 0;
                initialized = 0;
        }
@@ -1981,6 +2103,34 @@ void __attribute__((destructor)) lttng_ust_exit(void)
        lttng_ust_cleanup(1);
 }
 
+static
+void ust_context_ns_reset(void)
+{
+       lttng_context_pid_ns_reset();
+       lttng_context_cgroup_ns_reset();
+       lttng_context_ipc_ns_reset();
+       lttng_context_mnt_ns_reset();
+       lttng_context_net_ns_reset();
+       lttng_context_user_ns_reset();
+       lttng_context_uts_ns_reset();
+}
+
+static
+void ust_context_vuids_reset(void)
+{
+       lttng_context_vuid_reset();
+       lttng_context_veuid_reset();
+       lttng_context_vsuid_reset();
+}
+
+static
+void ust_context_vgids_reset(void)
+{
+       lttng_context_vgid_reset();
+       lttng_context_vegid_reset();
+       lttng_context_vsgid_reset();
+}
+
 /*
  * We exclude the worker threads across fork and clone (except
  * CLONE_VM), because these system calls only keep the forking thread
@@ -2015,6 +2165,8 @@ void ust_before_fork(sigset_t *save_sigset)
 
        ust_lock_nocheck();
        urcu_bp_before_fork();
+       lttng_ust_lock_fd_tracker();
+       lttng_perf_lock();
 }
 
 static void ust_after_fork_common(sigset_t *restore_sigset)
@@ -2022,6 +2174,8 @@ static void ust_after_fork_common(sigset_t *restore_sigset)
        int ret;
 
        DBG("process %d", getpid());
+       lttng_perf_unlock();
+       lttng_ust_unlock_fd_tracker();
        ust_unlock();
 
        pthread_mutex_unlock(&ust_fork_mutex);
@@ -2059,6 +2213,9 @@ void ust_after_fork_child(sigset_t *restore_sigset)
        lttng_context_vpid_reset();
        lttng_context_vtid_reset();
        lttng_context_procname_reset();
+       ust_context_ns_reset();
+       ust_context_vuids_reset();
+       ust_context_vgids_reset();
        DBG("process %d", getpid());
        /* Release urcu mutexes */
        urcu_bp_after_fork_child();
@@ -2068,6 +2225,60 @@ void ust_after_fork_child(sigset_t *restore_sigset)
        lttng_ust_init();
 }
 
+void ust_after_setns(void)
+{
+       ust_context_ns_reset();
+       ust_context_vuids_reset();
+       ust_context_vgids_reset();
+}
+
+void ust_after_unshare(void)
+{
+       ust_context_ns_reset();
+       ust_context_vuids_reset();
+       ust_context_vgids_reset();
+}
+
+void ust_after_setuid(void)
+{
+       ust_context_vuids_reset();
+}
+
+void ust_after_seteuid(void)
+{
+       ust_context_vuids_reset();
+}
+
+void ust_after_setreuid(void)
+{
+       ust_context_vuids_reset();
+}
+
+void ust_after_setresuid(void)
+{
+       ust_context_vuids_reset();
+}
+
+void ust_after_setgid(void)
+{
+       ust_context_vgids_reset();
+}
+
+void ust_after_setegid(void)
+{
+       ust_context_vgids_reset();
+}
+
+void ust_after_setregid(void)
+{
+       ust_context_vgids_reset();
+}
+
+void ust_after_setresgid(void)
+{
+       ust_context_vgids_reset();
+}
+
 void lttng_ust_sockinfo_session_enabled(void *owner)
 {
        struct sock_info *sock_info = owner;
This page took 0.03223 seconds and 4 git commands to generate.