Fix: Set CLOEXEC flag on every created sockets

[lttng-tools.git] / src / bin / lttng-sessiond / main.c
diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c

index 318da741f8cca5060e76f41d87a2a68e121b21d0..736ec83ff8712d891f92d3a2724f7085672b924f 100644 (file)
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -21,7 +21,6 @@
  #include <grp.h>
  #include <limits.h>
  #include <pthread.h>
-#include <semaphore.h>
  #include <signal.h>
  #include <stdio.h>
  #include <stdlib.h>
@@ -62,6 +61,7 @@
  #include "fd-limit.h"
  #include "filter.h"
  #include "health.h"
+#include "testpoint.h"
  
  #define CONSUMERD_FILE "lttng-consumerd"
  
@@ -81,7 +81,10 @@ static int is_root;                  /* Set to 1 if the daemon is running as root */
  static pid_t ppid;          /* Parent PID for --sig-parent option */
  static char *rundir;
  
-/* Consumer daemon specific control data */
+/*
+ * Consumer daemon specific control data. Every value not initialized here is
+ * set to 0 by the static definition.
+ */
  static struct consumer_data kconsumer_data = {
         .type = LTTNG_CONSUMER_KERNEL,
         .err_unix_sock_path = DEFAULT_KCONSUMERD_ERR_SOCK_PATH,
@@ -90,6 +93,8 @@ static struct consumer_data kconsumer_data = {
         .cmd_sock = -1,
         .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
         .lock = PTHREAD_MUTEX_INITIALIZER,
+       .cond = PTHREAD_COND_INITIALIZER,
+       .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
  };
  static struct consumer_data ustconsumer64_data = {
         .type = LTTNG_CONSUMER64_UST,
@@ -99,6 +104,8 @@ static struct consumer_data ustconsumer64_data = {
         .cmd_sock = -1,
         .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
         .lock = PTHREAD_MUTEX_INITIALIZER,
+       .cond = PTHREAD_COND_INITIALIZER,
+       .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
  };
  static struct consumer_data ustconsumer32_data = {
         .type = LTTNG_CONSUMER32_UST,
@@ -108,6 +115,8 @@ static struct consumer_data ustconsumer32_data = {
         .cmd_sock = -1,
         .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
         .lock = PTHREAD_MUTEX_INITIALIZER,
+       .cond = PTHREAD_COND_INITIALIZER,
+       .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
  };
  
  /* Shared between threads */
@@ -403,6 +412,7 @@ static void cleanup(void)
                 ERR("Unable to clean %s", rundir);
         }
         free(cmd);
+       free(rundir);
  
         DBG("Cleaning up all sessions");
  
@@ -680,8 +690,12 @@ static void *thread_manage_kernel(void *data)
  
         DBG("Thread manage kernel started");
  
+       testpoint(thread_manage_kernel);
+
         health_code_update(&health_thread_kernel);
  
+       testpoint(thread_manage_kernel_before_loop);
+
         ret = create_thread_poll_set(&events, 2);
         if (ret < 0) {
                 goto error_poll_create;
@@ -788,6 +802,29 @@ error_poll_create:
         return NULL;
  }
  
+/*
+ * Signal pthread condition of the consumer data that the thread.
+ */
+static void signal_consumer_condition(struct consumer_data *data, int state)
+{
+       pthread_mutex_lock(&data->cond_mutex);
+
+       /*
+        * The state is set before signaling. It can be any value, it's the waiter
+        * job to correctly interpret this condition variable associated to the
+        * consumer pthread_cond.
+        *
+        * A value of 0 means that the corresponding thread of the consumer data
+        * was not started. 1 indicates that the thread has started and is ready
+        * for action. A negative value means that there was an error during the
+        * thread bootstrap.
+        */
+       data->consumer_thread_is_ready = state;
+       (void) pthread_cond_signal(&data->cond);
+
+       pthread_mutex_unlock(&data->cond_mutex);
+}
+
  /*
   * This thread manage the consumer error sent back to the session daemon.
   */
@@ -801,6 +838,25 @@ static void *thread_manage_consumer(void *data)
  
         DBG("[thread] Manage consumer started");
  
+       /*
+        * Since the consumer thread can be spawned at any moment in time, we init
+        * the health to a poll status (1, which is a valid health over time).
+        * When the thread starts, we update here the health to a "code" path being
+        * an even value so this thread, when reaching a poll wait, does not
+        * trigger an error with an even value.
+        *
+        * Here is the use case we avoid.
+        *
+        * +1: the first poll update during initialization (main())
+        * +2 * x: multiple code update once in this thread.
+        * +1: poll wait in this thread (being a good health state).
+        * == even number which after the wait period shows as a bad health.
+        *
+        * In a nutshell, the following poll update to the health state brings back
+        * the state to an even value meaning a code path.
+        */
+       health_poll_update(&consumer_data->health);
+
         health_code_update(&consumer_data->health);
  
         ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
@@ -829,6 +885,9 @@ static void *thread_manage_consumer(void *data)
         /* Inifinite blocking call, waiting for transmission */
  restart:
         health_poll_update(&consumer_data->health);
+
+       testpoint(thread_manage_consumer);
+
         ret = lttng_poll_wait(&events, -1);
         health_poll_update(&consumer_data->health);
         if (ret < 0) {
@@ -869,6 +928,12 @@ restart:
                 goto error;
         }
  
+       /*
+        * Set the CLOEXEC flag. Return code is useless because either way, the
+        * show must go on.
+        */
+       (void) utils_set_fd_cloexec(sock);
+
         health_code_update(&consumer_data->health);
  
         DBG2("Receiving code from consumer err_sock");
@@ -886,13 +951,13 @@ restart:
                 consumer_data->cmd_sock =
                         lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
                 if (consumer_data->cmd_sock < 0) {
-                       sem_post(&consumer_data->sem);
+                       /* On error, signal condition and quit. */
+                       signal_consumer_condition(consumer_data, -1);
                         PERROR("consumer connect");
                         goto error;
                 }
-               /* Signal condition to tell that the kconsumerd is ready */
-               sem_post(&consumer_data->sem);
-               DBG("consumer command socket ready");
+               signal_consumer_condition(consumer_data, 1);
+               DBG("Consumer command socket ready");
         } else {
                 ERR("consumer error when waiting for SOCK_READY : %s",
                                 lttcomm_get_readable_code(-code));
@@ -1026,6 +1091,8 @@ static void *thread_manage_apps(void *data)
  
         DBG("[thread] Manage application started");
  
+       testpoint(thread_manage_apps);
+
         rcu_register_thread();
         rcu_thread_online();
  
@@ -1041,6 +1108,8 @@ static void *thread_manage_apps(void *data)
                 goto error;
         }
  
+       testpoint(thread_manage_apps_before_loop);
+
         health_code_update(&health_thread_app_manage);
  
         while (1) {
@@ -1129,12 +1198,12 @@ static void *thread_manage_apps(void *data)
                                                 ust_app_unregister(ust_cmd.sock);
                                         } else {
                                                 /*
-                                                * We just need here to monitor the close of the UST
-                                                * socket and poll set monitor those by default.
-                                                * Listen on POLLIN (even if we never expect any
-                                                * data) to ensure that hangup wakes us.
+                                                * We only monitor the error events of the socket. This
+                                                * thread does not handle any incoming data from UST
+                                                * (POLLIN).
                                                  */
-                                               ret = lttng_poll_add(&events, ust_cmd.sock, LPOLLIN);
+                                               ret = lttng_poll_add(&events, ust_cmd.sock,
+                                                               LPOLLERR & LPOLLHUP & LPOLLRDHUP);
                                                 if (ret < 0) {
                                                         goto error;
                                                 }
@@ -1264,6 +1333,8 @@ static void *thread_registration_apps(void *data)
  
         DBG("[thread] Manage application registration started");
  
+       testpoint(thread_registration_apps);
+
         ret = lttcomm_listen_unix_sock(apps_sock);
         if (ret < 0) {
                 goto error_listen;
@@ -1337,6 +1408,12 @@ static void *thread_registration_apps(void *data)
                                                 goto error;
                                         }
  
+                                       /*
+                                        * Set the CLOEXEC flag. Return code is useless because
+                                        * either way, the show must go on.
+                                        */
+                                       (void) utils_set_fd_cloexec(sock);
+
                                         /* Create UST registration command for enqueuing */
                                         ust_cmd = zmalloc(sizeof(struct ust_command));
                                         if (ust_cmd == NULL) {
@@ -1446,59 +1523,110 @@ error_create_poll:
   */
  static int spawn_consumer_thread(struct consumer_data *consumer_data)
  {
-       int ret;
+       int ret, clock_ret;
         struct timespec timeout;
  
-       timeout.tv_sec = DEFAULT_SEM_WAIT_TIMEOUT;
-       timeout.tv_nsec = 0;
+       /* Make sure we set the readiness flag to 0 because we are NOT ready */
+       consumer_data->consumer_thread_is_ready = 0;
  
-       /* Setup semaphore */
-       ret = sem_init(&consumer_data->sem, 0, 0);
-       if (ret < 0) {
-               PERROR("sem_init consumer semaphore");
+       /* Setup pthread condition */
+       ret = pthread_condattr_init(&consumer_data->condattr);
+       if (ret != 0) {
+               errno = ret;
+               PERROR("pthread_condattr_init consumer data");
+               goto error;
+       }
+
+       /*
+        * Set the monotonic clock in order to make sure we DO NOT jump in time
+        * between the clock_gettime() call and the timedwait call. See bug #324
+        * for a more details and how we noticed it.
+        */
+       ret = pthread_condattr_setclock(&consumer_data->condattr, CLOCK_MONOTONIC);
+       if (ret != 0) {
+               errno = ret;
+               PERROR("pthread_condattr_setclock consumer data");
+               goto error;
+       }
+
+       ret = pthread_cond_init(&consumer_data->cond, &consumer_data->condattr);
+       if (ret != 0) {
+               errno = ret;
+               PERROR("pthread_cond_init consumer data");
                 goto error;
         }
  
-       ret = pthread_create(&consumer_data->thread, NULL,
-                       thread_manage_consumer, consumer_data);
+       ret = pthread_create(&consumer_data->thread, NULL, thread_manage_consumer,
+                       consumer_data);
         if (ret != 0) {
                 PERROR("pthread_create consumer");
                 ret = -1;
                 goto error;
         }
  
+       /* We are about to wait on a pthread condition */
+       pthread_mutex_lock(&consumer_data->cond_mutex);
+
         /* Get time for sem_timedwait absolute timeout */
-       ret = clock_gettime(CLOCK_REALTIME, &timeout);
-       if (ret < 0) {
-               PERROR("clock_gettime spawn consumer");
-               /* Infinite wait for the kconsumerd thread to be ready */
-               ret = sem_wait(&consumer_data->sem);
-       } else {
-               /* Normal timeout if the gettime was successful */
-               timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT;
-               ret = sem_timedwait(&consumer_data->sem, &timeout);
+       clock_ret = clock_gettime(CLOCK_MONOTONIC, &timeout);
+       /*
+        * Set the timeout for the condition timed wait even if the clock gettime
+        * call fails since we might loop on that call and we want to avoid to
+        * increment the timeout too many times.
+        */
+       timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT;
+
+       /*
+        * The following loop COULD be skipped in some conditions so this is why we
+        * set ret to 0 in order to make sure at least one round of the loop is
+        * done.
+        */
+       ret = 0;
+
+       /*
+        * Loop until the condition is reached or when a timeout is reached. Note
+        * that the pthread_cond_timedwait(P) man page specifies that EINTR can NOT
+        * be returned but the pthread_cond(3), from the glibc-doc, says that it is
+        * possible. This loop does not take any chances and works with both of
+        * them.
+        */
+       while (!consumer_data->consumer_thread_is_ready && ret != ETIMEDOUT) {
+               if (clock_ret < 0) {
+                       PERROR("clock_gettime spawn consumer");
+                       /* Infinite wait for the consumerd thread to be ready */
+                       ret = pthread_cond_wait(&consumer_data->cond,
+                                       &consumer_data->cond_mutex);
+               } else {
+                       ret = pthread_cond_timedwait(&consumer_data->cond,
+                                       &consumer_data->cond_mutex, &timeout);
+               }
         }
  
-       if (ret < 0) {
-               if (errno == ETIMEDOUT) {
+       /* Release the pthread condition */
+       pthread_mutex_unlock(&consumer_data->cond_mutex);
+
+       if (ret != 0) {
+               errno = ret;
+               if (ret == ETIMEDOUT) {
                         /*
                          * Call has timed out so we kill the kconsumerd_thread and return
                          * an error.
                          */
-                       ERR("The consumer thread was never ready. Killing it");
+                       ERR("Condition timed out. The consumer thread was never ready."
+                                       " Killing it");
                         ret = pthread_cancel(consumer_data->thread);
                         if (ret < 0) {
                                 PERROR("pthread_cancel consumer thread");
                         }
                 } else {
-                       PERROR("semaphore wait failed consumer thread");
+                       PERROR("pthread_cond_wait failed consumer thread");
                 }
                 goto error;
         }
  
         pthread_mutex_lock(&consumer_data->pid_mutex);
         if (consumer_data->pid == 0) {
-               ERR("Kconsumerd did not start");
+               ERR("Consumerd did not start");
                 pthread_mutex_unlock(&consumer_data->pid_mutex);
                 goto error;
         }
@@ -1830,6 +1958,15 @@ static int copy_session_consumer(int domain, struct ltt_session *session)
         switch (domain) {
         case LTTNG_DOMAIN_KERNEL:
                 DBG3("Copying tracing session consumer output in kernel session");
+               /*
+                * XXX: We should audit the session creation and what this function
+                * does "extra" in order to avoid a destroy since this function is used
+                * in the domain session creation (kernel and ust) only. Same for UST
+                * domain.
+                */
+               if (session->kernel_session->consumer) {
+                       consumer_destroy_output(session->kernel_session->consumer);
+               }
                 session->kernel_session->consumer =
                         consumer_copy_output(session->consumer);
                 /* Ease our life a bit for the next part */
@@ -1838,6 +1975,9 @@ static int copy_session_consumer(int domain, struct ltt_session *session)
                 break;
         case LTTNG_DOMAIN_UST:
                 DBG3("Copying tracing session consumer output in UST session");
+               if (session->ust_session->consumer) {
+                       consumer_destroy_output(session->ust_session->consumer);
+               }
                 session->ust_session->consumer =
                         consumer_copy_output(session->consumer);
                 /* Ease our life a bit for the next part */
@@ -2006,6 +2146,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
         case LTTNG_LIST_DOMAINS:
         case LTTNG_START_TRACE:
         case LTTNG_STOP_TRACE:
+       case LTTNG_DATA_PENDING:
                 need_domain = 0;
                 break;
         default:
@@ -2279,7 +2420,7 @@ skip_domain:
                 ret = cmd_add_context(cmd_ctx->session, cmd_ctx->lsm->domain.type,
                                 cmd_ctx->lsm->u.context.channel_name,
                                 cmd_ctx->lsm->u.context.event_name,
-                               &cmd_ctx->lsm->u.context.ctx);
+                               &cmd_ctx->lsm->u.context.ctx, kernel_poll_pipe[1]);
                 break;
         }
         case LTTNG_DISABLE_CHANNEL:
@@ -2444,12 +2585,14 @@ skip_domain:
                         DBG("No URIs received from client... continuing");
                         *sock_error = 1;
                         ret = LTTNG_ERR_SESSION_FAIL;
+                       free(uris);
                         goto error;
                 }
  
                 ret = cmd_set_consumer_uri(cmd_ctx->lsm->domain.type, cmd_ctx->session,
                                 nb_uri, uris);
                 if (ret != LTTNG_OK) {
+                       free(uris);
                         goto error;
                 }
  
@@ -2470,6 +2613,8 @@ skip_domain:
                         }
                 }
  
+               free(uris);
+
                 break;
         }
         case LTTNG_START_TRACE:
@@ -2504,12 +2649,14 @@ skip_domain:
                                 DBG("No URIs received from client... continuing");
                                 *sock_error = 1;
                                 ret = LTTNG_ERR_SESSION_FAIL;
+                               free(uris);
                                 goto error;
                         }
  
                         if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
                                 DBG("Creating session with ONE network URI is a bad call");
                                 ret = LTTNG_ERR_SESSION_FAIL;
+                               free(uris);
                                 goto error;
                         }
                 }
@@ -2517,6 +2664,8 @@ skip_domain:
                 ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris, nb_uri,
                         &cmd_ctx->creds);
  
+               free(uris);
+
                 break;
         }
         case LTTNG_DESTROY_SESSION:
@@ -2659,7 +2808,7 @@ skip_domain:
         {
                 struct lttng_filter_bytecode *bytecode;
  
-               if (cmd_ctx->lsm->u.filter.bytecode_len > 65336) {
+               if (cmd_ctx->lsm->u.filter.bytecode_len > LTTNG_FILTER_MAX_LEN) {
                         ret = LTTNG_ERR_FILTER_INVAL;
                         goto error;
                 }
@@ -2692,6 +2841,11 @@ skip_domain:
                                 bytecode);
                 break;
         }
+       case LTTNG_DATA_PENDING:
+       {
+               ret = cmd_data_pending(cmd_ctx->session);
+               break;
+       }
         default:
                 ret = LTTNG_ERR_UND;
                 break;
@@ -2740,6 +2894,12 @@ static void *thread_manage_health(void *data)
                 goto error;
         }
  
+       /*
+        * Set the CLOEXEC flag. Return code is useless because either way, the
+        * show must go on.
+        */
+       (void) utils_set_fd_cloexec(sock);
+
         ret = lttcomm_listen_unix_sock(sock);
         if (ret < 0) {
                 goto error;
@@ -2804,6 +2964,12 @@ restart:
                         goto error;
                 }
  
+               /*
+                * Set the CLOEXEC flag. Return code is useless because either way, the
+                * show must go on.
+                */
+               (void) utils_set_fd_cloexec(new_sock);
+
                 DBG("Receiving data from client for health...");
                 ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
                 if (ret <= 0) {
@@ -2912,6 +3078,8 @@ static void *thread_manage_clients(void *data)
  
         DBG("[thread] Manage client started");
  
+       testpoint(thread_manage_clients);
+
         rcu_register_thread();
  
         health_code_update(&health_thread_cmd);
@@ -2943,6 +3111,8 @@ static void *thread_manage_clients(void *data)
                 kill(ppid, SIGUSR1);
         }
  
+       testpoint(thread_manage_clients_before_loop);
+
         health_code_update(&health_thread_cmd);
  
         while (1) {
@@ -2997,6 +3167,12 @@ static void *thread_manage_clients(void *data)
                         goto error;
                 }
  
+               /*
+                * Set the CLOEXEC flag. Return code is useless because either way, the
+                * show must go on.
+                */
+               (void) utils_set_fd_cloexec(sock);
+
                 /* Set socket option for credentials retrieval */
                 ret = lttcomm_setsockopt_creds_unix_sock(sock);
                 if (ret < 0) {
@@ -3297,6 +3473,14 @@ static int init_daemon_socket(void)
                 goto end;
         }
  
+       /* Set the cloexec flag */
+       ret = utils_set_fd_cloexec(client_sock);
+       if (ret < 0) {
+               ERR("Unable to set CLOEXEC flag to the client Unix socket (fd: %d). "
+                               "Continuing but note that the consumer daemon will have a "
+                               "reference to this socket on exec()", client_sock);
+       }
+
         /* File permission MUST be 660 */
         ret = chmod(client_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
         if (ret < 0) {
@@ -3313,6 +3497,14 @@ static int init_daemon_socket(void)
                 goto end;
         }
  
+       /* Set the cloexec flag */
+       ret = utils_set_fd_cloexec(apps_sock);
+       if (ret < 0) {
+               ERR("Unable to set CLOEXEC flag to the app Unix socket (fd: %d). "
+                               "Continuing but note that the consumer daemon will have a "
+                               "reference to this socket on exec()", apps_sock);
+       }
+
         /* File permission MUST be 666 */
         ret = chmod(apps_unix_sock_path,
                         S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
@@ -3322,6 +3514,9 @@ static int init_daemon_socket(void)
                 goto end;
         }
  
+       DBG3("Session daemon client socket %d and application socket %d created",
+                       client_sock, apps_sock);
+
  end:
         umask(old_umask);
         return ret;
@@ -3850,7 +4045,9 @@ int main(int argc, char **argv)
         /*
          * Init health counters of the consumer thread. We do a quick hack here to
          * the state of the consumer health is fine even if the thread is not
-        * started.  This is simply to ease our life and has no cost what so ever.
+        * started. Once the thread starts, the health state is updated with a poll
+        * value to set a health code path. This is simply to ease our life and has
+        * no cost what so ever.
          */
         health_init(&kconsumer_data.health);
         health_poll_update(&kconsumer_data.health);
@@ -3947,7 +4144,25 @@ exit_dispatch:
                 goto error;     /* join error, exit without cleanup */
         }
  
+       ret = join_consumer_thread(&ustconsumer32_data);
+       if (ret != 0) {
+               PERROR("join_consumer ust32");
+               goto error;     /* join error, exit without cleanup */
+       }
+
+       ret = join_consumer_thread(&ustconsumer64_data);
+       if (ret != 0) {
+               PERROR("join_consumer ust64");
+               goto error;     /* join error, exit without cleanup */
+       }
+
  exit_client:
+       ret = pthread_join(health_thread, &status);
+       if (ret != 0) {
+               PERROR("pthread_join health thread");
+               goto error;     /* join error, exit without cleanup */
+       }
+
  exit_health:
  exit:
         /*