Get the maximum TCP timeout in sessiond
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index 007c722921700e351cfedd6441fc31a16a9683b3..7032191bd4dbc40eef7631263a74dbb907155dc2 100644 (file)
@@ -390,6 +390,51 @@ static void stop_threads(void)
        futex_nto1_wake(&ust_cmd_queue.futex);
 }
 
+/*
+ * Close every consumer sockets.
+ */
+static void close_consumer_sockets(void)
+{
+       int ret;
+
+       if (kconsumer_data.err_sock >= 0) {
+               ret = close(kconsumer_data.err_sock);
+               if (ret < 0) {
+                       PERROR("kernel consumer err_sock close");
+               }
+       }
+       if (ustconsumer32_data.err_sock >= 0) {
+               ret = close(ustconsumer32_data.err_sock);
+               if (ret < 0) {
+                       PERROR("UST consumerd32 err_sock close");
+               }
+       }
+       if (ustconsumer64_data.err_sock >= 0) {
+               ret = close(ustconsumer64_data.err_sock);
+               if (ret < 0) {
+                       PERROR("UST consumerd64 err_sock close");
+               }
+       }
+       if (kconsumer_data.cmd_sock >= 0) {
+               ret = close(kconsumer_data.cmd_sock);
+               if (ret < 0) {
+                       PERROR("kernel consumer cmd_sock close");
+               }
+       }
+       if (ustconsumer32_data.cmd_sock >= 0) {
+               ret = close(ustconsumer32_data.cmd_sock);
+               if (ret < 0) {
+                       PERROR("UST consumerd32 cmd_sock close");
+               }
+       }
+       if (ustconsumer64_data.cmd_sock >= 0) {
+               ret = close(ustconsumer64_data.cmd_sock);
+               if (ret < 0) {
+                       PERROR("UST consumerd64 cmd_sock close");
+               }
+       }
+}
+
 /*
  * Cleanup the daemon
  */
@@ -458,6 +503,8 @@ static void cleanup(void)
                modprobe_remove_lttng_all();
        }
 
+       close_consumer_sockets();
+
        /* <fun> */
        DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm"
                        "Matthew, BEET driven development works!%c[%dm",
@@ -1101,12 +1148,14 @@ error:
                if (ret) {
                        PERROR("close");
                }
+               consumer_data->err_sock = -1;
        }
        if (consumer_data->cmd_sock >= 0) {
                ret = close(consumer_data->cmd_sock);
                if (ret) {
                        PERROR("close");
                }
+               consumer_data->cmd_sock = -1;
        }
        if (consumer_data->metadata_sock.fd >= 0) {
                ret = close(consumer_data->metadata_sock.fd);
@@ -1334,6 +1383,91 @@ error:
        return ret;
 }
 
+/*
+ * Sanitize the wait queue of the dispatch registration thread meaning removing
+ * invalid nodes from it. This is to avoid memory leaks for the case the UST
+ * notify socket is never received.
+ */
+static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue)
+{
+       int ret, nb_fd = 0, i;
+       unsigned int fd_added = 0;
+       struct lttng_poll_event events;
+       struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
+
+       assert(wait_queue);
+
+       lttng_poll_init(&events);
+
+       /* Just skip everything for an empty queue. */
+       if (!wait_queue->count) {
+               goto end;
+       }
+
+       ret = lttng_poll_create(&events, wait_queue->count, LTTNG_CLOEXEC);
+       if (ret < 0) {
+               goto error_create;
+       }
+
+       cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
+                       &wait_queue->head, head) {
+               assert(wait_node->app);
+               ret = lttng_poll_add(&events, wait_node->app->sock,
+                               LPOLLHUP | LPOLLERR);
+               if (ret < 0) {
+                       goto error;
+               }
+
+               fd_added = 1;
+       }
+
+       if (!fd_added) {
+               goto end;
+       }
+
+       /*
+        * Poll but don't block so we can quickly identify the faulty events and
+        * clean them afterwards from the wait queue.
+        */
+       ret = lttng_poll_wait(&events, 0);
+       if (ret < 0) {
+               goto error;
+       }
+       nb_fd = ret;
+
+       for (i = 0; i < nb_fd; i++) {
+               /* Get faulty FD. */
+               uint32_t revents = LTTNG_POLL_GETEV(&events, i);
+               int pollfd = LTTNG_POLL_GETFD(&events, i);
+
+               cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
+                               &wait_queue->head, head) {
+                       if (pollfd == wait_node->app->sock &&
+                                       (revents & (LPOLLHUP | LPOLLERR))) {
+                               cds_list_del(&wait_node->head);
+                               wait_queue->count--;
+                               ust_app_destroy(wait_node->app);
+                               free(wait_node);
+                               break;
+                       }
+               }
+       }
+
+       if (nb_fd > 0) {
+               DBG("Wait queue sanitized, %d node were cleaned up", nb_fd);
+       }
+
+end:
+       lttng_poll_clean(&events);
+       return;
+
+error:
+       lttng_poll_clean(&events);
+error_create:
+       ERR("Unable to sanitize wait queue");
+       return;
+}
+
 /*
  * Dispatch request from the registration threads to the application
  * communication thread.
@@ -1343,16 +1477,16 @@ static void *thread_dispatch_ust_registration(void *data)
        int ret, err = -1;
        struct cds_wfq_node *node;
        struct ust_command *ust_cmd = NULL;
-       struct {
-               struct ust_app *app;
-               struct cds_list_head head;
-       } *wait_node = NULL, *tmp_wait_node;
+       struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
+       struct ust_reg_wait_queue wait_queue = {
+               .count = 0,
+       };
 
        health_register(HEALTH_TYPE_APP_REG_DISPATCH);
 
        health_code_update();
 
-       CDS_LIST_HEAD(wait_queue);
+       CDS_INIT_LIST_HEAD(&wait_queue.head);
 
        DBG("[thread] Dispatch UST command started");
 
@@ -1366,6 +1500,13 @@ static void *thread_dispatch_ust_registration(void *data)
                        struct ust_app *app = NULL;
                        ust_cmd = NULL;
 
+                       /*
+                        * Make sure we don't have node(s) that have hung up before receiving
+                        * the notify socket. This is to clean the list in order to avoid
+                        * memory leaks from notify socket that are never seen.
+                        */
+                       sanitize_wait_queue(&wait_queue);
+
                        health_code_update();
                        /* Dequeue command for registration */
                        node = cds_wfq_dequeue_blocking(&ust_cmd_queue.queue);
@@ -1415,7 +1556,8 @@ static void *thread_dispatch_ust_registration(void *data)
                                 * Add application to the wait queue so we can set the notify
                                 * socket before putting this object in the global ht.
                                 */
-                               cds_list_add(&wait_node->head, &wait_queue);
+                               cds_list_add(&wait_node->head, &wait_queue.head);
+                               wait_queue.count++;
 
                                free(ust_cmd);
                                /*
@@ -1430,11 +1572,12 @@ static void *thread_dispatch_ust_registration(void *data)
                                 * notify socket if found.
                                 */
                                cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
-                                               &wait_queue, head) {
+                                               &wait_queue.head, head) {
                                        health_code_update();
                                        if (wait_node->app->pid == ust_cmd->reg_msg.pid) {
                                                wait_node->app->notify_sock = ust_cmd->sock;
                                                cds_list_del(&wait_node->head);
+                                               wait_queue.count--;
                                                app = wait_node->app;
                                                free(wait_node);
                                                DBG3("UST app notify socket %d is set", ust_cmd->sock);
@@ -1529,8 +1672,9 @@ static void *thread_dispatch_ust_registration(void *data)
 error:
        /* Clean up wait queue. */
        cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
-                       &wait_queue, head) {
+                       &wait_queue.head, head) {
                cds_list_del(&wait_node->head);
+               wait_queue.count--;
                free(wait_node);
        }
 
@@ -2467,12 +2611,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
                session_lock_list();
                cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name);
                if (cmd_ctx->session == NULL) {
-                       if (cmd_ctx->lsm->session.name != NULL) {
-                               ret = LTTNG_ERR_SESS_NOT_FOUND;
-                       } else {
-                               /* If no session name specified */
-                               ret = LTTNG_ERR_SELECT_SESS;
-                       }
+                       ret = LTTNG_ERR_SESS_NOT_FOUND;
                        goto error;
                } else {
                        /* Acquire lock for the session */
@@ -2549,6 +2688,10 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
                break;
        case LTTNG_DOMAIN_UST:
        {
+               if (!ust_app_supported()) {
+                       ret = LTTNG_ERR_NO_UST;
+                       goto error;
+               }
                /* Consumer is in an ERROR state. Report back to client */
                if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
                        ret = LTTNG_ERR_NO_USTCONSUMERD;
@@ -3814,12 +3957,12 @@ static int set_permissions(char *rundir)
        ret = allowed_group();
        if (ret < 0) {
                WARN("No tracing group detected");
-               ret = 0;
-               goto end;
+               /* Setting gid to 0 if no tracing group is found */
+               gid = 0;
+       } else {
+               gid = ret;
        }
 
-       gid = ret;
-
        /* Set lttng run dir */
        ret = chown(rundir, 0, gid);
        if (ret < 0) {
@@ -3827,7 +3970,7 @@ static int set_permissions(char *rundir)
                PERROR("chown");
        }
 
-       /* Ensure tracing group can search the run dir */
+       /* Ensure all applications and tracing group can search the run dir */
        ret = chmod(rundir, S_IRWXU | S_IXGRP | S_IXOTH);
        if (ret < 0) {
                ERR("Unable to set permissions on %s", rundir);
@@ -3864,7 +4007,6 @@ static int set_permissions(char *rundir)
 
        DBG("All permissions are set");
 
-end:
        return ret;
 }
 
@@ -3938,6 +4080,16 @@ static int set_consumer_sockets(struct consumer_data *consumer_data,
                goto error;
        }
 
+       /*
+        * Set the CLOEXEC flag. Return code is useless because either way, the
+        * show must go on.
+        */
+       ret = utils_set_fd_cloexec(consumer_data->err_sock);
+       if (ret < 0) {
+               PERROR("utils_set_fd_cloexec");
+               /* continue anyway */
+       }
+
        /* File permission MUST be 660 */
        ret = chmod(consumer_data->err_unix_sock_path,
                        S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
@@ -4166,7 +4318,7 @@ int main(int argc, char **argv)
                DBG2("Kernel consumer cmd path: %s",
                                kconsumer_data.cmd_unix_sock_path);
        } else {
-               home_path = get_home_dir();
+               home_path = utils_get_home_dir();
                if (home_path == NULL) {
                        /* TODO: Add --socket PATH option */
                        ERR("Can't get HOME directory for sockets creation.");
@@ -4366,6 +4518,9 @@ int main(int argc, char **argv)
 
        write_pidfile();
 
+       /* This is to get the TCP timeout value. */
+       lttcomm_inet_init();
+
        /* Create thread to manage the client socket */
        ret = pthread_create(&ht_cleanup_thread, NULL,
                        thread_ht_cleanup, (void *) NULL);
This page took 0.027052 seconds and 4 git commands to generate.