Fix: put session list lock around the app registration
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index 58333c22df73fed08c3c64df0f6dac056dfd283e..d95cfa9bbcd6b837633a0bfea5151ccf44451d53 100644 (file)
@@ -221,12 +221,6 @@ enum consumerd_state {
 static enum consumerd_state ust_consumerd_state;
 static enum consumerd_state kernel_consumerd_state;
 
-/* Used for the health monitoring of the session daemon. See health.h */
-struct health_state health_thread_cmd;
-struct health_state health_thread_app_manage;
-struct health_state health_thread_app_reg;
-struct health_state health_thread_kernel;
-
 /*
  * Socket timeout for receiving and sending in seconds.
  */
@@ -664,8 +658,6 @@ static void update_ust_app(int app_sock)
 {
        struct ltt_session *sess, *stmp;
 
-       session_lock_list();
-
        /* For all tracing session(s) */
        cds_list_for_each_entry_safe(sess, stmp, &session_list_ptr->head, list) {
                session_lock(sess);
@@ -674,8 +666,6 @@ static void update_ust_app(int app_sock)
                }
                session_unlock(sess);
        }
-
-       session_unlock_list();
 }
 
 /*
@@ -705,14 +695,14 @@ static void *thread_manage_kernel(void *data)
                goto error_testpoint;
        }
 
-       health_code_update(&health_thread_kernel);
+       health_code_update();
 
        if (testpoint(thread_manage_kernel_before_loop)) {
                goto error_testpoint;
        }
 
        while (1) {
-               health_code_update(&health_thread_kernel);
+               health_code_update();
 
                if (update_poll_flag == 1) {
                        /* Clean events object. We are about to populate it again. */
@@ -740,9 +730,9 @@ static void *thread_manage_kernel(void *data)
 
                /* Poll infinite value of time */
        restart:
-               health_poll_update(&health_thread_kernel);
+               health_poll_entry();
                ret = lttng_poll_wait(&events, -1);
-               health_poll_update(&health_thread_kernel);
+               health_poll_exit();
                if (ret < 0) {
                        /*
                         * Restart interrupted system call.
@@ -765,7 +755,7 @@ static void *thread_manage_kernel(void *data)
                        revents = LTTNG_POLL_GETEV(&events, i);
                        pollfd = LTTNG_POLL_GETFD(&events, i);
 
-                       health_code_update(&health_thread_kernel);
+                       health_code_update();
 
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = check_thread_quit_pipe(pollfd, revents);
@@ -813,7 +803,7 @@ error_testpoint:
        utils_close_pipe(kernel_poll_pipe);
        kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1;
        if (err) {
-               health_error(&health_thread_kernel);
+               health_error();
                ERR("Health error occurred in %s", __func__);
                WARN("Kernel thread died unexpectedly. "
                                "Kernel tracing can continue but CPU hotplug is disabled.");
@@ -861,24 +851,7 @@ static void *thread_manage_consumer(void *data)
 
        health_register(HEALTH_TYPE_CONSUMER);
 
-       /*
-        * Since the consumer thread can be spawned at any moment in time, we init
-        * the health to a poll status (1, which is a valid health over time).
-        * When the thread starts, we update here the health to a "code" path being
-        * an even value so this thread, when reaching a poll wait, does not
-        * trigger an error with an even value.
-        *
-        * Here is the use case we avoid.
-        *
-        * +1: the first poll update during initialization (main())
-        * +2 * x: multiple code update once in this thread.
-        * +1: poll wait in this thread (being a good health state).
-        * == even number which after the wait period shows as a bad health.
-        *
-        * In a nutshell, the following poll update to the health state brings back
-        * the state to an even value meaning a code path.
-        */
-       health_poll_update(&consumer_data->health);
+       health_code_update();
 
        /*
         * Pass 2 as size here for the thread quit pipe and kconsumerd_err_sock.
@@ -899,18 +872,18 @@ static void *thread_manage_consumer(void *data)
                goto error;
        }
 
-       health_code_update(&consumer_data->health);
+       health_code_update();
 
        /* Inifinite blocking call, waiting for transmission */
 restart:
-       health_poll_update(&consumer_data->health);
+       health_poll_entry();
 
        if (testpoint(thread_manage_consumer)) {
                goto error;
        }
 
        ret = lttng_poll_wait(&events, -1);
-       health_poll_update(&consumer_data->health);
+       health_poll_exit();
        if (ret < 0) {
                /*
                 * Restart interrupted system call.
@@ -928,7 +901,7 @@ restart:
                revents = LTTNG_POLL_GETEV(&events, i);
                pollfd = LTTNG_POLL_GETFD(&events, i);
 
-               health_code_update(&consumer_data->health);
+               health_code_update();
 
                /* Thread quit pipe has been closed. Killing thread. */
                ret = check_thread_quit_pipe(pollfd, revents);
@@ -957,7 +930,7 @@ restart:
         */
        (void) utils_set_fd_cloexec(sock);
 
-       health_code_update(&consumer_data->health);
+       health_code_update();
 
        DBG2("Receiving code from consumer err_sock");
 
@@ -968,7 +941,7 @@ restart:
                goto error;
        }
 
-       health_code_update(&consumer_data->health);
+       health_code_update();
 
        if (code == LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
                consumer_data->cmd_sock =
@@ -998,13 +971,13 @@ restart:
                goto error;
        }
 
-       health_code_update(&consumer_data->health);
+       health_code_update();
 
        /* Inifinite blocking call, waiting for transmission */
 restart_poll:
-       health_poll_update(&consumer_data->health);
+       health_poll_entry();
        ret = lttng_poll_wait(&events, -1);
-       health_poll_update(&consumer_data->health);
+       health_poll_exit();
        if (ret < 0) {
                /*
                 * Restart interrupted system call.
@@ -1022,7 +995,7 @@ restart_poll:
                revents = LTTNG_POLL_GETEV(&events, i);
                pollfd = LTTNG_POLL_GETFD(&events, i);
 
-               health_code_update(&consumer_data->health);
+               health_code_update();
 
                /* Thread quit pipe has been closed. Killing thread. */
                ret = check_thread_quit_pipe(pollfd, revents);
@@ -1040,7 +1013,7 @@ restart_poll:
                }
        }
 
-       health_code_update(&consumer_data->health);
+       health_code_update();
 
        /* Wait for any kconsumerd error */
        ret = lttcomm_recv_unix_sock(sock, &code,
@@ -1091,7 +1064,7 @@ error:
        lttng_poll_clean(&events);
 error_poll:
        if (err) {
-               health_error(&consumer_data->health);
+               health_error();
                ERR("Health error occurred in %s", __func__);
        }
        health_unregister();
@@ -1121,7 +1094,7 @@ static void *thread_manage_apps(void *data)
                goto error_testpoint;
        }
 
-       health_code_update(&health_thread_app_manage);
+       health_code_update();
 
        ret = create_thread_poll_set(&events, 2);
        if (ret < 0) {
@@ -1137,16 +1110,16 @@ static void *thread_manage_apps(void *data)
                goto error;
        }
 
-       health_code_update(&health_thread_app_manage);
+       health_code_update();
 
        while (1) {
                DBG("Apps thread polling on %d fds", LTTNG_POLL_GETNB(&events));
 
                /* Inifinite blocking call, waiting for transmission */
        restart:
-               health_poll_update(&health_thread_app_manage);
+               health_poll_entry();
                ret = lttng_poll_wait(&events, -1);
-               health_poll_update(&health_thread_app_manage);
+               health_poll_exit();
                if (ret < 0) {
                        /*
                         * Restart interrupted system call.
@@ -1164,7 +1137,7 @@ static void *thread_manage_apps(void *data)
                        revents = LTTNG_POLL_GETEV(&events, i);
                        pollfd = LTTNG_POLL_GETFD(&events, i);
 
-                       health_code_update(&health_thread_app_manage);
+                       health_code_update();
 
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = check_thread_quit_pipe(pollfd, revents);
@@ -1188,18 +1161,28 @@ static void *thread_manage_apps(void *data)
                                                goto error;
                                        }
 
-                                       health_code_update(&health_thread_app_manage);
+                                       health_code_update();
+
+                                       /*
+                                        * @session_lock
+                                        * Lock the global session list so from the register up to
+                                        * the registration done message, no thread can see the
+                                        * application and change its state.
+                                        */
+                                       session_lock_list();
 
                                        /* Register applicaton to the session daemon */
                                        ret = ust_app_register(&ust_cmd.reg_msg,
                                                        ust_cmd.sock);
                                        if (ret == -ENOMEM) {
+                                               session_unlock_list();
                                                goto error;
                                        } else if (ret < 0) {
+                                               session_unlock_list();
                                                break;
                                        }
 
-                                       health_code_update(&health_thread_app_manage);
+                                       health_code_update();
 
                                        /*
                                         * Validate UST version compatibility.
@@ -1213,7 +1196,7 @@ static void *thread_manage_apps(void *data)
                                                update_ust_app(ust_cmd.sock);
                                        }
 
-                                       health_code_update(&health_thread_app_manage);
+                                       health_code_update();
 
                                        ret = ust_app_register_done(ust_cmd.sock);
                                        if (ret < 0) {
@@ -1231,6 +1214,7 @@ static void *thread_manage_apps(void *data)
                                                ret = lttng_poll_add(&events, ust_cmd.sock,
                                                                LPOLLERR & LPOLLHUP & LPOLLRDHUP);
                                                if (ret < 0) {
+                                                       session_unlock_list();
                                                        goto error;
                                                }
 
@@ -1243,8 +1227,9 @@ static void *thread_manage_apps(void *data)
                                                DBG("Apps with sock %d added to poll set",
                                                                ust_cmd.sock);
                                        }
+                                       session_unlock_list();
 
-                                       health_code_update(&health_thread_app_manage);
+                                       health_code_update();
 
                                        break;
                                }
@@ -1266,7 +1251,7 @@ static void *thread_manage_apps(void *data)
                                }
                        }
 
-                       health_code_update(&health_thread_app_manage);
+                       health_code_update();
                }
        }
 
@@ -1285,7 +1270,7 @@ error_testpoint:
         */
 
        if (err) {
-               health_error(&health_thread_app_manage);
+               health_error();
                ERR("Health error occurred in %s", __func__);
        }
        health_unregister();
@@ -1424,9 +1409,9 @@ static void *thread_registration_apps(void *data)
 
                /* Inifinite blocking call, waiting for transmission */
        restart:
-               health_poll_update(&health_thread_app_reg);
+               health_poll_entry();
                ret = lttng_poll_wait(&events, -1);
-               health_poll_update(&health_thread_app_reg);
+               health_poll_exit();
                if (ret < 0) {
                        /*
                         * Restart interrupted system call.
@@ -1440,7 +1425,7 @@ static void *thread_registration_apps(void *data)
                nb_fd = ret;
 
                for (i = 0; i < nb_fd; i++) {
-                       health_code_update(&health_thread_app_reg);
+                       health_code_update();
 
                        /* Fetch once the poll data */
                        revents = LTTNG_POLL_GETEV(&events, i);
@@ -1492,7 +1477,7 @@ static void *thread_registration_apps(void *data)
                                                sock = -1;
                                                continue;
                                        }
-                                       health_code_update(&health_thread_app_reg);
+                                       health_code_update();
                                        ret = lttcomm_recv_unix_sock(sock, &ust_cmd->reg_msg,
                                                        sizeof(struct ust_register_msg));
                                        if (ret < 0 || ret < sizeof(struct ust_register_msg)) {
@@ -1510,7 +1495,7 @@ static void *thread_registration_apps(void *data)
                                                sock = -1;
                                                continue;
                                        }
-                                       health_code_update(&health_thread_app_reg);
+                                       health_code_update();
 
                                        ust_cmd->sock = sock;
                                        sock = -1;
@@ -1541,7 +1526,7 @@ static void *thread_registration_apps(void *data)
 exit:
 error:
        if (err) {
-               health_error(&health_thread_app_reg);
+               health_error();
                ERR("Health error occurred in %s", __func__);
        }
 
@@ -3162,7 +3147,7 @@ static void *thread_manage_clients(void *data)
                goto error_testpoint;
        }
 
-       health_code_update(&health_thread_cmd);
+       health_code_update();
 
        ret = lttcomm_listen_unix_sock(client_sock);
        if (ret < 0) {
@@ -3195,16 +3180,16 @@ static void *thread_manage_clients(void *data)
                goto error;
        }
 
-       health_code_update(&health_thread_cmd);
+       health_code_update();
 
        while (1) {
                DBG("Accepting client command ...");
 
                /* Inifinite blocking call, waiting for transmission */
        restart:
-               health_poll_update(&health_thread_cmd);
+               health_poll_entry();
                ret = lttng_poll_wait(&events, -1);
-               health_poll_update(&health_thread_cmd);
+               health_poll_exit();
                if (ret < 0) {
                        /*
                         * Restart interrupted system call.
@@ -3222,7 +3207,7 @@ static void *thread_manage_clients(void *data)
                        revents = LTTNG_POLL_GETEV(&events, i);
                        pollfd = LTTNG_POLL_GETFD(&events, i);
 
-                       health_code_update(&health_thread_cmd);
+                       health_code_update();
 
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = check_thread_quit_pipe(pollfd, revents);
@@ -3242,7 +3227,7 @@ static void *thread_manage_clients(void *data)
 
                DBG("Wait for client response");
 
-               health_code_update(&health_thread_cmd);
+               health_code_update();
 
                sock = lttcomm_accept_unix_sock(client_sock);
                if (sock < 0) {
@@ -3278,7 +3263,7 @@ static void *thread_manage_clients(void *data)
                cmd_ctx->llm = NULL;
                cmd_ctx->session = NULL;
 
-               health_code_update(&health_thread_cmd);
+               health_code_update();
 
                /*
                 * Data is received from the lttng client. The struct
@@ -3299,7 +3284,7 @@ static void *thread_manage_clients(void *data)
                        continue;
                }
 
-               health_code_update(&health_thread_cmd);
+               health_code_update();
 
                // TODO: Validate cmd_ctx including sanity check for
                // security purpose.
@@ -3332,7 +3317,7 @@ static void *thread_manage_clients(void *data)
                        continue;
                }
 
-               health_code_update(&health_thread_cmd);
+               health_code_update();
 
                DBG("Sending response (size: %d, retcode: %s)",
                                cmd_ctx->lttng_msg_size,
@@ -3351,7 +3336,7 @@ static void *thread_manage_clients(void *data)
 
                clean_command_ctx(&cmd_ctx);
 
-               health_code_update(&health_thread_cmd);
+               health_code_update();
        }
 
 exit:
@@ -3378,7 +3363,7 @@ error_testpoint:
        }
 
        if (err) {
-               health_error(&health_thread_cmd);
+               health_error();
                ERR("Health error occurred in %s", __func__);
        }
 
This page took 0.031662 seconds and 4 git commands to generate.