static enum consumerd_state ust_consumerd_state;
static enum consumerd_state kernel_consumerd_state;
-/* Used for the health monitoring of the session daemon. See health.h */
-struct health_state health_thread_cmd;
-struct health_state health_thread_app_manage;
-struct health_state health_thread_app_reg;
-struct health_state health_thread_kernel;
-
/*
* Socket timeout for receiving and sending in seconds.
*/
static int send_unix_sock(int sock, void *buf, size_t len)
{
/* Check valid length */
- if (len <= 0) {
+ if (len == 0) {
return -1;
}
struct lttng_ht_iter iter;
struct consumer_socket *socket;
-
+ rcu_read_lock();
cds_lfht_for_each_entry(ksess->consumer->socks->ht,
&iter.iter, socket, node.node) {
/* Code flow error */
channel, ksess);
pthread_mutex_unlock(socket->lock);
if (ret < 0) {
+ rcu_read_unlock();
goto error;
}
}
+ rcu_read_unlock();
}
goto error;
}
DBG("[thread] Thread manage kernel started");
- if (testpoint(thread_manage_kernel)) {
- goto error_testpoint;
- }
+ health_register(HEALTH_TYPE_KERNEL);
- health_code_update(&health_thread_kernel);
+ /*
+ * This first step of the while is to clean this structure which could free
+ * non NULL pointers so zero it before the loop.
+ */
+ memset(&events, 0, sizeof(events));
- ret = create_thread_poll_set(&events, 2);
- if (ret < 0) {
- goto error_poll_create;
+ if (testpoint(thread_manage_kernel)) {
+ goto error_testpoint;
}
- ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN);
- if (ret < 0) {
- goto error;
- }
+ health_code_update();
if (testpoint(thread_manage_kernel_before_loop)) {
- goto error;
+ goto error_testpoint;
}
while (1) {
- health_code_update(&health_thread_kernel);
+ health_code_update();
if (update_poll_flag == 1) {
- /*
- * Reset number of fd in the poll set. Always 2 since there is the thread
- * quit pipe and the kernel pipe.
- */
- events.nb_fd = 2;
+ /* Clean events object. We are about to populate it again. */
+ lttng_poll_clean(&events);
+
+ ret = create_thread_poll_set(&events, 2);
+ if (ret < 0) {
+ goto error_poll_create;
+ }
+ ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN);
+ if (ret < 0) {
+ goto error;
+ }
+
+ /* This will add the available kernel channel if any. */
ret = update_kernel_poll(&events);
if (ret < 0) {
goto error;
update_poll_flag = 0;
}
- nb_fd = LTTNG_POLL_GETNB(&events);
-
- DBG("Thread kernel polling on %d fds", nb_fd);
-
- /* Zeroed the poll events */
- lttng_poll_reset(&events);
+ DBG("Thread kernel polling on %d fds", LTTNG_POLL_GETNB(&events));
/* Poll infinite value of time */
restart:
- health_poll_update(&health_thread_kernel);
+ health_poll_update();
ret = lttng_poll_wait(&events, -1);
- health_poll_update(&health_thread_kernel);
+ health_poll_update();
if (ret < 0) {
/*
* Restart interrupted system call.
continue;
}
+ nb_fd = ret;
+
for (i = 0; i < nb_fd; i++) {
/* Fetch once the poll data */
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
- health_code_update(&health_thread_kernel);
+ health_code_update();
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
/* Check for data on kernel pipe */
if (pollfd == kernel_poll_pipe[0] && (revents & LPOLLIN)) {
- ret = read(kernel_poll_pipe[0], &tmp, 1);
+ do {
+ ret = read(kernel_poll_pipe[0], &tmp, 1);
+ } while (ret < 0 && errno == EINTR);
+ /*
+ * Ret value is useless here, if this pipe gets any actions an
+ * update is required anyway.
+ */
update_poll_flag = 1;
continue;
} else {
utils_close_pipe(kernel_poll_pipe);
kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1;
if (err) {
- health_error(&health_thread_kernel);
+ health_error();
ERR("Health error occurred in %s", __func__);
WARN("Kernel thread died unexpectedly. "
"Kernel tracing can continue but CPU hotplug is disabled.");
}
- health_exit(&health_thread_kernel);
+ health_unregister();
DBG("Kernel thread dying");
return NULL;
}
DBG("[thread] Manage consumer started");
+ health_register(HEALTH_TYPE_CONSUMER);
+
/*
* Since the consumer thread can be spawned at any moment in time, we init
* the health to a poll status (1, which is a valid health over time).
* In a nutshell, the following poll update to the health state brings back
* the state to an even value meaning a code path.
*/
- health_poll_update(&consumer_data->health);
+ health_poll_update();
/*
* Pass 2 as size here for the thread quit pipe and kconsumerd_err_sock.
goto error;
}
- nb_fd = LTTNG_POLL_GETNB(&events);
-
- health_code_update(&consumer_data->health);
+ health_code_update();
/* Inifinite blocking call, waiting for transmission */
restart:
- health_poll_update(&consumer_data->health);
+ health_poll_update();
if (testpoint(thread_manage_consumer)) {
goto error;
}
ret = lttng_poll_wait(&events, -1);
- health_poll_update(&consumer_data->health);
+ health_poll_update();
if (ret < 0) {
/*
* Restart interrupted system call.
goto error;
}
+ nb_fd = ret;
+
for (i = 0; i < nb_fd; i++) {
/* Fetch once the poll data */
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
- health_code_update(&consumer_data->health);
+ health_code_update();
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
*/
(void) utils_set_fd_cloexec(sock);
- health_code_update(&consumer_data->health);
+ health_code_update();
DBG2("Receiving code from consumer err_sock");
goto error;
}
- health_code_update(&consumer_data->health);
+ health_code_update();
if (code == LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
consumer_data->cmd_sock =
goto error;
}
- health_code_update(&consumer_data->health);
-
- /* Update number of fd */
- nb_fd = LTTNG_POLL_GETNB(&events);
+ health_code_update();
/* Inifinite blocking call, waiting for transmission */
restart_poll:
- health_poll_update(&consumer_data->health);
+ health_poll_update();
ret = lttng_poll_wait(&events, -1);
- health_poll_update(&consumer_data->health);
+ health_poll_update();
if (ret < 0) {
/*
* Restart interrupted system call.
goto error;
}
+ nb_fd = ret;
+
for (i = 0; i < nb_fd; i++) {
/* Fetch once the poll data */
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
- health_code_update(&consumer_data->health);
+ health_code_update();
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
}
}
- health_code_update(&consumer_data->health);
+ health_code_update();
/* Wait for any kconsumerd error */
ret = lttcomm_recv_unix_sock(sock, &code,
lttng_poll_clean(&events);
error_poll:
if (err) {
- health_error(&consumer_data->health);
+ health_error();
ERR("Health error occurred in %s", __func__);
}
- health_exit(&consumer_data->health);
+ health_unregister();
DBG("consumer thread cleanup completed");
return NULL;
rcu_register_thread();
rcu_thread_online();
+ health_register(HEALTH_TYPE_APP_MANAGE);
+
if (testpoint(thread_manage_apps)) {
goto error_testpoint;
}
- health_code_update(&health_thread_app_manage);
+ health_code_update();
ret = create_thread_poll_set(&events, 2);
if (ret < 0) {
goto error;
}
- health_code_update(&health_thread_app_manage);
+ health_code_update();
while (1) {
- /* Zeroed the events structure */
- lttng_poll_reset(&events);
-
- nb_fd = LTTNG_POLL_GETNB(&events);
-
- DBG("Apps thread polling on %d fds", nb_fd);
+ DBG("Apps thread polling on %d fds", LTTNG_POLL_GETNB(&events));
/* Inifinite blocking call, waiting for transmission */
restart:
- health_poll_update(&health_thread_app_manage);
+ health_poll_update();
ret = lttng_poll_wait(&events, -1);
- health_poll_update(&health_thread_app_manage);
+ health_poll_update();
if (ret < 0) {
/*
* Restart interrupted system call.
goto error;
}
+ nb_fd = ret;
+
for (i = 0; i < nb_fd; i++) {
/* Fetch once the poll data */
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
- health_code_update(&health_thread_app_manage);
+ health_code_update();
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
goto error;
} else if (revents & LPOLLIN) {
/* Empty pipe */
- ret = read(apps_cmd_pipe[0], &ust_cmd, sizeof(ust_cmd));
+ do {
+ ret = read(apps_cmd_pipe[0], &ust_cmd, sizeof(ust_cmd));
+ } while (ret < 0 && errno == EINTR);
if (ret < 0 || ret < sizeof(ust_cmd)) {
PERROR("read apps cmd pipe");
goto error;
}
- health_code_update(&health_thread_app_manage);
+ health_code_update();
/* Register applicaton to the session daemon */
ret = ust_app_register(&ust_cmd.reg_msg,
break;
}
- health_code_update(&health_thread_app_manage);
+ health_code_update();
/*
* Validate UST version compatibility.
update_ust_app(ust_cmd.sock);
}
- health_code_update(&health_thread_app_manage);
+ health_code_update();
ret = ust_app_register_done(ust_cmd.sock);
if (ret < 0) {
ust_cmd.sock);
}
- health_code_update(&health_thread_app_manage);
+ health_code_update();
break;
}
}
}
- health_code_update(&health_thread_app_manage);
+ health_code_update();
}
}
*/
if (err) {
- health_error(&health_thread_app_manage);
+ health_error();
ERR("Health error occurred in %s", __func__);
}
- health_exit(&health_thread_app_manage);
+ health_unregister();
DBG("Application communication apps thread cleanup complete");
rcu_thread_offline();
rcu_unregister_thread();
* at some point in time or wait to the end of the world :)
*/
if (apps_cmd_pipe[1] >= 0) {
- ret = write(apps_cmd_pipe[1], ust_cmd,
- sizeof(struct ust_command));
- if (ret < 0) {
+ do {
+ ret = write(apps_cmd_pipe[1], ust_cmd,
+ sizeof(struct ust_command));
+ } while (ret < 0 && errno == EINTR);
+ if (ret < 0 || ret != sizeof(struct ust_command)) {
PERROR("write apps cmd pipe");
if (errno == EBADF) {
/*
DBG("[thread] Manage application registration started");
+ health_register(HEALTH_TYPE_APP_REG);
+
if (testpoint(thread_registration_apps)) {
goto error_testpoint;
}
while (1) {
DBG("Accepting application registration");
- nb_fd = LTTNG_POLL_GETNB(&events);
-
/* Inifinite blocking call, waiting for transmission */
restart:
- health_poll_update(&health_thread_app_reg);
+ health_poll_update();
ret = lttng_poll_wait(&events, -1);
- health_poll_update(&health_thread_app_reg);
+ health_poll_update();
if (ret < 0) {
/*
* Restart interrupted system call.
goto error;
}
+ nb_fd = ret;
+
for (i = 0; i < nb_fd; i++) {
- health_code_update(&health_thread_app_reg);
+ health_code_update();
/* Fetch once the poll data */
revents = LTTNG_POLL_GETEV(&events, i);
sock = -1;
continue;
}
- health_code_update(&health_thread_app_reg);
+ health_code_update();
ret = lttcomm_recv_unix_sock(sock, &ust_cmd->reg_msg,
sizeof(struct ust_register_msg));
if (ret < 0 || ret < sizeof(struct ust_register_msg)) {
sock = -1;
continue;
}
- health_code_update(&health_thread_app_reg);
+ health_code_update();
ust_cmd->sock = sock;
sock = -1;
exit:
error:
if (err) {
- health_error(&health_thread_app_reg);
+ health_error();
ERR("Health error occurred in %s", __func__);
}
error_create_poll:
error_testpoint:
DBG("UST Registration thread cleanup complete");
- health_exit(&health_thread_app_reg);
+ health_unregister();
return NULL;
}
static int join_consumer_thread(struct consumer_data *consumer_data)
{
void *status;
- int ret;
/* Consumer pid must be a real one. */
if (consumer_data->pid > 0) {
+ int ret;
ret = kill(consumer_data->pid, SIGTERM);
if (ret) {
ERR("Error killing consumer daemon");
*/
static int start_consumerd(struct consumer_data *consumer_data)
{
- int ret, err;
+ int ret;
/*
* Set the listen() state on the socket since there is a possible race
error:
/* Cleanup already created socket on error. */
if (consumer_data->err_sock >= 0) {
+ int err;
+
err = close(consumer_data->err_sock);
if (err < 0) {
PERROR("close consumer data error socket");
{
int ret;
- ret = health_check_state(&kconsumer_data.health) &&
- health_check_state(&ustconsumer32_data.health) &&
- health_check_state(&ustconsumer64_data.health);
+ ret = health_check_state(HEALTH_TYPE_CONSUMER);
DBG3("Health consumer check %d", ret);
while (1) {
DBG("Health check ready");
- nb_fd = LTTNG_POLL_GETNB(&events);
-
/* Inifinite blocking call, waiting for transmission */
restart:
ret = lttng_poll_wait(&events, -1);
goto error;
}
+ nb_fd = ret;
+
for (i = 0; i < nb_fd; i++) {
/* Fetch once the poll data */
revents = LTTNG_POLL_GETEV(&events, i);
switch (msg.component) {
case LTTNG_HEALTH_CMD:
- reply.ret_code = health_check_state(&health_thread_cmd);
+ reply.ret_code = health_check_state(HEALTH_TYPE_CMD);
break;
case LTTNG_HEALTH_APP_MANAGE:
- reply.ret_code = health_check_state(&health_thread_app_manage);
+ reply.ret_code = health_check_state(HEALTH_TYPE_APP_MANAGE);
break;
case LTTNG_HEALTH_APP_REG:
- reply.ret_code = health_check_state(&health_thread_app_reg);
+ reply.ret_code = health_check_state(HEALTH_TYPE_APP_REG);
break;
case LTTNG_HEALTH_KERNEL:
- reply.ret_code = health_check_state(&health_thread_kernel);
+ reply.ret_code = health_check_state(HEALTH_TYPE_KERNEL);
break;
case LTTNG_HEALTH_CONSUMER:
reply.ret_code = check_consumer_health();
break;
case LTTNG_HEALTH_ALL:
reply.ret_code =
- health_check_state(&health_thread_app_manage) &&
- health_check_state(&health_thread_app_reg) &&
- health_check_state(&health_thread_cmd) &&
- health_check_state(&health_thread_kernel) &&
+ health_check_state(HEALTH_TYPE_APP_MANAGE) &&
+ health_check_state(HEALTH_TYPE_APP_REG) &&
+ health_check_state(HEALTH_TYPE_CMD) &&
+ health_check_state(HEALTH_TYPE_KERNEL) &&
check_consumer_health();
break;
default:
rcu_register_thread();
+ health_register(HEALTH_TYPE_CMD);
+
if (testpoint(thread_manage_clients)) {
goto error_testpoint;
}
- health_code_update(&health_thread_cmd);
+ health_code_update();
ret = lttcomm_listen_unix_sock(client_sock);
if (ret < 0) {
goto error;
}
- health_code_update(&health_thread_cmd);
+ health_code_update();
while (1) {
DBG("Accepting client command ...");
- nb_fd = LTTNG_POLL_GETNB(&events);
-
/* Inifinite blocking call, waiting for transmission */
restart:
- health_poll_update(&health_thread_cmd);
+ health_poll_update();
ret = lttng_poll_wait(&events, -1);
- health_poll_update(&health_thread_cmd);
+ health_poll_update();
if (ret < 0) {
/*
* Restart interrupted system call.
goto error;
}
+ nb_fd = ret;
+
for (i = 0; i < nb_fd; i++) {
/* Fetch once the poll data */
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
- health_code_update(&health_thread_cmd);
+ health_code_update();
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
DBG("Wait for client response");
- health_code_update(&health_thread_cmd);
+ health_code_update();
sock = lttcomm_accept_unix_sock(client_sock);
if (sock < 0) {
cmd_ctx->llm = NULL;
cmd_ctx->session = NULL;
- health_code_update(&health_thread_cmd);
+ health_code_update();
/*
* Data is received from the lttng client. The struct
continue;
}
- health_code_update(&health_thread_cmd);
+ health_code_update();
// TODO: Validate cmd_ctx including sanity check for
// security purpose.
continue;
}
- health_code_update(&health_thread_cmd);
+ health_code_update();
DBG("Sending response (size: %d, retcode: %s)",
cmd_ctx->lttng_msg_size,
clean_command_ctx(&cmd_ctx);
- health_code_update(&health_thread_cmd);
+ health_code_update();
}
exit:
}
if (err) {
- health_error(&health_thread_cmd);
+ health_error();
ERR("Health error occurred in %s", __func__);
}
- health_exit(&health_thread_cmd);
+ health_unregister();
DBG("Client thread dying");
/* Parse arguments */
progname = argv[0];
- if ((ret = parse_args(argc, argv) < 0)) {
+ if ((ret = parse_args(argc, argv)) < 0) {
goto error;
}
cmd_init();
- /* Init all health thread counters. */
- health_init(&health_thread_cmd);
- health_init(&health_thread_kernel);
- health_init(&health_thread_app_manage);
- health_init(&health_thread_app_reg);
-
- /*
- * Init health counters of the consumer thread. We do a quick hack here to
- * the state of the consumer health is fine even if the thread is not
- * started. Once the thread starts, the health state is updated with a poll
- * value to set a health code path. This is simply to ease our life and has
- * no cost what so ever.
- */
- health_init(&kconsumer_data.health);
- health_poll_update(&kconsumer_data.health);
- health_init(&ustconsumer32_data.health);
- health_poll_update(&ustconsumer32_data.health);
- health_init(&ustconsumer64_data.health);
- health_poll_update(&ustconsumer64_data.health);
-
/* Check for the application socket timeout env variable. */
env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV);
if (env_app_timeout) {