Fix: compat poll: add missing empty revents checks
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index bbdf141b2409825ee1b900453b2914ec03d7c9d3..c7beed55d9a519e598164d352088dc8accfc53da 100644 (file)
@@ -76,10 +76,10 @@ static int opt_sig_parent;
 static int opt_verbose_consumer;
 static int opt_daemon, opt_background;
 static int opt_no_kernel;
-static int is_root;                    /* Set to 1 if the daemon is running as root */
 static pid_t ppid;          /* Parent PID for --sig-parent option */
 static pid_t child_ppid;    /* Internal parent PID use with daemonize. */
 static char *rundir;
+static int lockfile_fd = -1;
 
 /* Set to 1 when a SIGUSR1 signal is received. */
 static int recv_child_signal;
@@ -244,6 +244,9 @@ struct health_app *health_sessiond;
 /* JUL TCP port for registration. Used by the JUL thread. */
 unsigned int jul_tcp_port = DEFAULT_JUL_TCP_PORT;
 
+/* Am I root or not. */
+int is_root;                   /* Set to 1 if the daemon is running as root */
+
 /*
  * Whether sessiond is ready for commands/health check requests.
  * NR_LTTNG_SESSIOND_READY must match the number of calls to
@@ -456,6 +459,27 @@ static void close_consumer_sockets(void)
        }
 }
 
+/*
+ * Generate the full lock file path using the rundir.
+ *
+ * Return the snprintf() return value thus a negative value is an error.
+ */
+static int generate_lock_file_path(char *path, size_t len)
+{
+       int ret;
+
+       assert(path);
+       assert(rundir);
+
+       /* Build lockfile path from rundir. */
+       ret = snprintf(path, len, "%s/" DEFAULT_LTTNG_SESSIOND_LOCKFILE, rundir);
+       if (ret < 0) {
+               PERROR("snprintf lockfile path");
+       }
+
+       return ret;
+}
+
 /*
  * Cleanup the daemon
  */
@@ -537,14 +561,6 @@ static void cleanup(void)
        DBG("Removing directory %s", path);
        (void) rmdir(path);
 
-       /*
-        * We do NOT rmdir rundir because there are other processes
-        * using it, for instance lttng-relayd, which can start in
-        * parallel with this teardown.
-        */
-
-       free(rundir);
-
        DBG("Cleaning up all sessions");
 
        /* Destroy session list mutex */
@@ -576,6 +592,35 @@ static void cleanup(void)
 
        close_consumer_sockets();
 
+
+       /*
+        * Cleanup lock file by deleting it and finaly closing it which will
+        * release the file system lock.
+        */
+       if (lockfile_fd >= 0) {
+               char lockfile_path[PATH_MAX];
+
+               ret = generate_lock_file_path(lockfile_path, sizeof(lockfile_path));
+               if (ret > 0) {
+                       ret = remove(lockfile_path);
+                       if (ret < 0) {
+                               PERROR("remove lock file");
+                       }
+                       ret = close(lockfile_fd);
+                       if (ret < 0) {
+                               PERROR("close lock file");
+                       }
+               }
+       }
+
+       /*
+        * We do NOT rmdir rundir because there are other processes
+        * using it, for instance lttng-relayd, which can start in
+        * parallel with this teardown.
+        */
+
+       free(rundir);
+
        /* <fun> */
        DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm"
                        "Matthew, BEET driven development works!%c[%dm",
@@ -900,6 +945,11 @@ static void *thread_manage_kernel(void *data)
 
                        health_code_update();
 
+                       if (!revents) {
+                               /* No activity for this FD (poll implementation). */
+                               continue;
+                       }
+
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = sessiond_check_thread_quit_pipe(pollfd, revents);
                        if (ret) {
@@ -983,7 +1033,7 @@ static void signal_consumer_condition(struct consumer_data *data, int state)
  */
 static void *thread_manage_consumer(void *data)
 {
-       int sock = -1, i, ret, pollfd, err = -1;
+       int sock = -1, i, ret, pollfd, err = -1, should_quit = 0;
        uint32_t revents, nb_fd;
        enum lttcomm_return_code code;
        struct lttng_poll_event events;
@@ -1045,6 +1095,11 @@ restart:
 
                health_code_update();
 
+               if (!revents) {
+                       /* No activity for this FD (poll implementation). */
+                       continue;
+               }
+
                /* Thread quit pipe has been closed. Killing thread. */
                ret = sessiond_check_thread_quit_pipe(pollfd, revents);
                if (ret) {
@@ -1084,7 +1139,6 @@ restart:
        }
 
        health_code_update();
-
        if (code == LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
                /* Connect both socket, command and metadata. */
                consumer_data->cmd_sock =
@@ -1142,6 +1196,15 @@ restart:
        /* Infinite blocking call, waiting for transmission */
 restart_poll:
        while (1) {
+               health_code_update();
+
+               /* Exit the thread because the thread quit pipe has been triggered. */
+               if (should_quit) {
+                       /* Not a health error. */
+                       err = 0;
+                       goto exit;
+               }
+
                health_poll_entry();
                ret = lttng_poll_wait(&events, -1);
                health_poll_exit();
@@ -1164,13 +1227,18 @@ restart_poll:
 
                        health_code_update();
 
-                       /* Thread quit pipe has been closed. Killing thread. */
-                       ret = sessiond_check_thread_quit_pipe(pollfd, revents);
-                       if (ret) {
-                               err = 0;
-                               goto exit;
+                       if (!revents) {
+                               /* No activity for this FD (poll implementation). */
+                               continue;
                        }
 
+                       /*
+                        * Thread quit pipe has been triggered, flag that we should stop
+                        * but continue the current loop to handle potential data from
+                        * consumer.
+                        */
+                       should_quit = sessiond_check_thread_quit_pipe(pollfd, revents);
+
                        if (pollfd == sock) {
                                /* Event on the consumerd socket */
                                if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
@@ -1198,11 +1266,8 @@ restart_poll:
                                        ERR("Handling metadata request");
                                        goto error;
                                }
-                               break;
-                       } else {
-                               ERR("Unknown pollfd");
-                               goto error;
                        }
+                       /* No need for an else branch all FDs are tested prior. */
                }
                health_code_update();
        }
@@ -1241,13 +1306,13 @@ error:
                }
                consumer_data->cmd_sock = -1;
        }
-       if (*consumer_data->metadata_sock.fd_ptr >= 0) {
+       if (consumer_data->metadata_sock.fd_ptr &&
+           *consumer_data->metadata_sock.fd_ptr >= 0) {
                ret = close(*consumer_data->metadata_sock.fd_ptr);
                if (ret) {
                        PERROR("close");
                }
        }
-
        if (sock >= 0) {
                ret = close(sock);
                if (ret) {
@@ -1261,9 +1326,10 @@ error:
        pthread_mutex_unlock(&consumer_data->lock);
 
        /* Cleanup metadata socket mutex. */
-       pthread_mutex_destroy(consumer_data->metadata_sock.lock);
-       free(consumer_data->metadata_sock.lock);
-
+       if (consumer_data->metadata_sock.lock) {
+               pthread_mutex_destroy(consumer_data->metadata_sock.lock);
+               free(consumer_data->metadata_sock.lock);
+       }
        lttng_poll_clean(&events);
 error_poll:
        if (err) {
@@ -1342,6 +1408,11 @@ static void *thread_manage_apps(void *data)
 
                        health_code_update();
 
+                       if (!revents) {
+                               /* No activity for this FD (poll implementation). */
+                               continue;
+                       }
+
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = sessiond_check_thread_quit_pipe(pollfd, revents);
                        if (ret) {
@@ -1520,6 +1591,11 @@ static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue)
                uint32_t revents = LTTNG_POLL_GETEV(&events, i);
                int pollfd = LTTNG_POLL_GETFD(&events, i);
 
+               if (!revents) {
+                       /* No activity for this FD (poll implementation). */
+                       continue;
+               }
+
                cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
                                &wait_queue->head, head) {
                        if (pollfd == wait_node->app->sock &&
@@ -1859,6 +1935,11 @@ static void *thread_registration_apps(void *data)
                        revents = LTTNG_POLL_GETEV(&events, i);
                        pollfd = LTTNG_POLL_GETFD(&events, i);
 
+                       if (!revents) {
+                               /* No activity for this FD (poll implementation). */
+                               continue;
+                       }
+
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = sessiond_check_thread_quit_pipe(pollfd, revents);
                        if (ret) {
@@ -2085,19 +2166,23 @@ static int spawn_consumer_thread(struct consumer_data *consumer_data)
        if (ret != 0) {
                errno = ret;
                if (ret == ETIMEDOUT) {
+                       int pth_ret;
+
                        /*
                         * Call has timed out so we kill the kconsumerd_thread and return
                         * an error.
                         */
                        ERR("Condition timed out. The consumer thread was never ready."
                                        " Killing it");
-                       ret = pthread_cancel(consumer_data->thread);
-                       if (ret < 0) {
+                       pth_ret = pthread_cancel(consumer_data->thread);
+                       if (pth_ret < 0) {
                                PERROR("pthread_cancel consumer thread");
                        }
                } else {
                        PERROR("pthread_cond_wait failed consumer thread");
                }
+               /* Caller is expecting a negative value on failure. */
+               ret = -1;
                goto error;
        }
 
@@ -2183,10 +2268,11 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                                consumer_to_use = consumerd32_bin;
                        } else {
                                DBG("Could not find any valid consumerd executable");
+                               ret = -EINVAL;
                                break;
                        }
                        DBG("Using kernel consumer at: %s",  consumer_to_use);
-                       execl(consumer_to_use,
+                       ret = execl(consumer_to_use,
                                "lttng-consumerd", verbosity, "-k",
                                "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
                                "--consumerd-err-sock", consumer_data->err_unix_sock_path,
@@ -2234,9 +2320,6 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                        if (consumerd64_libdir[0] != '\0') {
                                free(tmpnew);
                        }
-                       if (ret) {
-                               goto error;
-                       }
                        break;
                }
                case LTTNG_CONSUMER32_UST:
@@ -2280,9 +2363,6 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                        if (consumerd32_libdir[0] != '\0') {
                                free(tmpnew);
                        }
-                       if (ret) {
-                               goto error;
-                       }
                        break;
                }
                default:
@@ -2290,8 +2370,9 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                        exit(EXIT_FAILURE);
                }
                if (errno != 0) {
-                       PERROR("kernel start consumer exec");
+                       PERROR("Consumer execl()");
                }
+               /* Reaching this point, we got a failure on our execl(). */
                exit(EXIT_FAILURE);
        } else if (pid > 0) {
                ret = pid;
@@ -2849,6 +2930,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
                        }
 
                        /* 32-bit */
+                       pthread_mutex_lock(&ustconsumer32_data.pid_mutex);
                        if (consumerd32_bin[0] != '\0' &&
                                        ustconsumer32_data.pid == 0 &&
                                        cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
@@ -3053,7 +3135,9 @@ skip_domain:
                struct lttng_event *events;
                ssize_t nb_events;
 
+               session_lock_list();
                nb_events = cmd_list_tracepoints(cmd_ctx->lsm->domain.type, &events);
+               session_unlock_list();
                if (nb_events < 0) {
                        /* Return value is a negative lttng_error_code. */
                        ret = -nb_events;
@@ -3084,8 +3168,10 @@ skip_domain:
                struct lttng_event_field *fields;
                ssize_t nb_fields;
 
+               session_lock_list();
                nb_fields = cmd_list_tracepoint_fields(cmd_ctx->lsm->domain.type,
                                &fields);
+               session_unlock_list();
                if (nb_fields < 0) {
                        /* Return value is a negative lttng_error_code. */
                        ret = -nb_fields;
@@ -3259,7 +3345,7 @@ skip_domain:
        case LTTNG_LIST_CHANNELS:
        {
                int nb_chan;
-               struct lttng_channel *channels;
+               struct lttng_channel *channels = NULL;
 
                nb_chan = cmd_list_channels(cmd_ctx->lsm->domain.type,
                                cmd_ctx->session, &channels);
@@ -3627,6 +3713,11 @@ restart:
                        revents = LTTNG_POLL_GETEV(&events, i);
                        pollfd = LTTNG_POLL_GETFD(&events, i);
 
+                       if (!revents) {
+                               /* No activity for this FD (poll implementation). */
+                               continue;
+                       }
+
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = sessiond_check_thread_quit_pipe(pollfd, revents);
                        if (ret) {
@@ -3668,7 +3759,7 @@ restart:
 
                rcu_thread_online();
 
-               reply.ret_code = 0;
+               memset(&reply, 0, sizeof(reply));
                for (i = 0; i < NR_HEALTH_SESSIOND_TYPES; i++) {
                        /*
                         * health_check_state returns 0 if health is
@@ -3794,6 +3885,11 @@ static void *thread_manage_clients(void *data)
 
                        health_code_update();
 
+                       if (!revents) {
+                               /* No activity for this FD (poll implementation). */
+                               continue;
+                       }
+
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = sessiond_check_thread_quit_pipe(pollfd, revents);
                        if (ret) {
@@ -4508,6 +4604,24 @@ error:
        return;
 }
 
+/*
+ * Create lockfile using the rundir and return its fd.
+ */
+static int create_lockfile(void)
+{
+       int ret;
+       char lockfile_path[PATH_MAX];
+
+       ret = generate_lock_file_path(lockfile_path, sizeof(lockfile_path));
+       if (ret < 0) {
+               goto error;
+       }
+
+       ret = utils_create_lock_file(lockfile_path);
+error:
+       return ret;
+}
+
 /*
  * Write JUL TCP port using the rundir.
  */
@@ -4598,6 +4712,10 @@ int main(int argc, char **argv)
 
        if (is_root) {
                rundir = strdup(DEFAULT_LTTNG_RUNDIR);
+               if (!rundir) {
+                       ret = -ENOMEM;
+                       goto error;
+               }
 
                /* Create global run dir with root access */
                ret = create_lttng_rundir(rundir);
@@ -4684,6 +4802,11 @@ int main(int argc, char **argv)
                }
        }
 
+       lockfile_fd = create_lockfile();
+       if (lockfile_fd < 0) {
+               goto error;
+       }
+
        /* Set consumer initial state */
        kernel_consumerd_state = CONSUMER_STOPPED;
        ust_consumerd_state = CONSUMER_STOPPED;
@@ -4907,7 +5030,7 @@ int main(int argc, char **argv)
        ret = pthread_create(&apps_notify_thread, NULL,
                        ust_thread_manage_notify, (void *) NULL);
        if (ret != 0) {
-               PERROR("pthread_create apps");
+               PERROR("pthread_create notify");
                goto exit_apps_notify;
        }
 
@@ -4915,7 +5038,7 @@ int main(int argc, char **argv)
        ret = pthread_create(&jul_reg_thread, NULL,
                        jul_thread_manage_registration, (void *) NULL);
        if (ret != 0) {
-               PERROR("pthread_create apps");
+               PERROR("pthread_create JUL");
                goto exit_jul_reg;
        }
 
This page took 0.02954 seconds and 4 git commands to generate.