#include <sys/types.h>
#include <sys/wait.h>
#include <urcu/futex.h>
+#include <urcu/uatomic.h>
#include <unistd.h>
#include <config.h>
#include "shm.h"
#include "ust-ctl.h"
#include "utils.h"
+#include "fd-limit.h"
#define CONSUMERD_FILE "lttng-consumerd"
static const char *consumerd32_libdir = CONFIG_CONSUMERD32_LIBDIR;
static const char *consumerd64_libdir = CONFIG_CONSUMERD64_LIBDIR;
+/*
+ * Consumer daemon state which is changed when spawning it, killing it or in
+ * case of a fatal error.
+ */
+enum consumerd_state {
+ CONSUMER_STARTED = 1,
+ CONSUMER_STOPPED = 2,
+ CONSUMER_ERROR = 3,
+};
+
+/*
+ * This consumer daemon state is used to validate if a client command will be
+ * able to reach the consumer. If not, the client is informed. For instance,
+ * doing a "lttng start" when the consumer state is set to ERROR will return an
+ * error to the client.
+ *
+ * The following example shows a possible race condition of this scheme:
+ *
+ * consumer thread error happens
+ * client cmd arrives
+ * client cmd checks state -> still OK
+ * consumer thread exit, sets error
+ * client cmd try to talk to consumer
+ * ...
+ *
+ * However, since the consumer is a different daemon, we have no way of making
+ * sure the command will reach it safely even with this state flag. This is why
+ * we consider that up to the state validation during command processing, the
+ * command is safe. After that, we can not guarantee the correctness of the
+ * client request vis-a-vis the consumer.
+ */
+static enum consumerd_state ust_consumerd_state;
+static enum consumerd_state kernel_consumerd_state;
+
static
void setup_consumerd_path(void)
{
if (ret) {
PERROR("close");
}
-
}
}
for (i = 0; i < 2; i++) {
ERR("consumer return code : %s", lttcomm_get_readable_code(-code));
error:
+ /* Immediately set the consumerd state to stopped */
+ if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
+ uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR);
+ } else if (consumer_data->type == LTTNG_CONSUMER64_UST ||
+ consumer_data->type == LTTNG_CONSUMER32_UST) {
+ uatomic_set(&ust_consumerd_state, CONSUMER_ERROR);
+ } else {
+ /* Code flow error... */
+ assert(0);
+ }
+
if (consumer_data->err_sock >= 0) {
ret = close(consumer_data->err_sock);
if (ret) {
* Using message-based transmissions to ensure we don't
* have to deal with partially received messages.
*/
+ ret = lttng_fd_get(LTTNG_FD_APPS, 1);
+ if (ret < 0) {
+ ERR("Exhausted file descriptors allowed for applications.");
+ free(ust_cmd);
+ ret = close(sock);
+ if (ret) {
+ PERROR("close");
+ }
+ sock = -1;
+ continue;
+ }
ret = lttcomm_recv_unix_sock(sock, &ust_cmd->reg_msg,
sizeof(struct ust_register_msg));
if (ret < 0 || ret < sizeof(struct ust_register_msg)) {
if (ret) {
PERROR("close");
}
+ lttng_fd_put(LTTNG_FD_APPS, 1);
sock = -1;
continue;
}
if (ret) {
PERROR("close");
}
+ lttng_fd_put(LTTNG_FD_APPS, 1);
}
unlink(apps_unix_sock_path);
nb_chan = session->kernel_session->channel_count;
}
DBG3("Number of kernel channels %zd", nb_chan);
+ if (nb_chan <= 0) {
+ ret = LTTCOMM_KERN_CHAN_NOT_FOUND;
+ }
break;
case LTTNG_DOMAIN_UST:
if (session->ust_session != NULL) {
session->ust_session->domain_global.channels);
}
DBG3("Number of UST global channels %zd", nb_chan);
+ if (nb_chan <= 0) {
+ ret = LTTCOMM_UST_CHAN_NOT_FOUND;
+ }
break;
default:
*channels = NULL;
list_lttng_channels(domain, session, *channels);
} else {
*channels = NULL;
+ /* Ret value was set in the domain switch case */
+ goto error;
}
return nb_chan;
break;
default:
DBG("Getting session %s by name", cmd_ctx->lsm->session.name);
+ /*
+ * We keep the session list lock across _all_ commands
+ * for now, because the per-session lock does not
+ * handle teardown properly.
+ */
session_lock_list();
cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name);
- session_unlock_list();
if (cmd_ctx->session == NULL) {
if (cmd_ctx->lsm->session.name != NULL) {
ret = LTTCOMM_SESS_NOT_FOUND;
}
}
+ /* Consumer is in an ERROR state. Report back to client */
+ if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) {
+ ret = LTTCOMM_NO_KERNCONSUMERD;
+ goto error;
+ }
+
/* Need a session for kernel command */
if (need_tracing_session) {
if (cmd_ctx->session->kernel_session == NULL) {
ret = LTTCOMM_KERN_CONSUMER_FAIL;
goto error;
}
+ uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED);
} else {
pthread_mutex_unlock(&kconsumer_data.pid_mutex);
}
}
+
break;
case LTTNG_DOMAIN_UST:
{
+ /* Consumer is in an ERROR state. Report back to client */
+ if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
+ ret = LTTCOMM_NO_USTCONSUMERD;
+ goto error;
+ }
+
if (need_tracing_session) {
if (cmd_ctx->session->ust_session == NULL) {
ret = create_ust_session(cmd_ctx->session,
}
ust_consumerd64_fd = ustconsumer64_data.cmd_sock;
+ uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
} else {
pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
}
ust_consumerd32_fd = -EINVAL;
goto error;
}
+
ust_consumerd32_fd = ustconsumer32_data.cmd_sock;
+ uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
} else {
pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
}
}
skip_domain:
+ /* Validate consumer daemon state when start/stop trace command */
+ if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE ||
+ cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) {
+ switch (cmd_ctx->lsm->domain.type) {
+ case LTTNG_DOMAIN_UST:
+ if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) {
+ ret = LTTCOMM_NO_USTCONSUMERD;
+ goto error;
+ }
+ break;
+ case LTTNG_DOMAIN_KERNEL:
+ if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) {
+ ret = LTTCOMM_NO_KERNCONSUMERD;
+ goto error;
+ }
+ break;
+ }
+ }
+
/*
* Check that the UID or GID match that of the tracing session.
* The root user can interact with all sessions.
{
ret = cmd_destroy_session(cmd_ctx->session,
cmd_ctx->lsm->session.name);
+ /*
+ * Set session to NULL so we do not unlock it after
+ * free.
+ */
+ cmd_ctx->session = NULL;
break;
}
case LTTNG_LIST_DOMAINS:
}
case LTTNG_LIST_CHANNELS:
{
- size_t nb_chan;
+ int nb_chan;
struct lttng_channel *channels;
nb_chan = cmd_list_channels(cmd_ctx->lsm->domain.type,
if (cmd_ctx->session) {
session_unlock(cmd_ctx->session);
}
+ if (need_tracing_session) {
+ session_unlock_list();
+ }
init_setup_error:
return ret;
}
int ret;
gid_t gid;
- gid = allowed_group();
- if (gid < 0) {
+ ret = allowed_group();
+ if (ret < 0) {
WARN("No tracing group detected");
ret = 0;
goto end;
}
+ gid = ret;
+
/* Set lttng run dir */
ret = chown(rundir, 0, gid);
if (ret < 0) {
rcu_register_thread();
- /* Create thread quit pipe */
- if ((ret = init_thread_quit_pipe()) < 0) {
- goto error;
- }
-
setup_consumerd_path();
/* Parse arguments */
/* Daemonize */
if (opt_daemon) {
+ int i;
+
+ /*
+ * fork
+ * child: setsid, close FD 0, 1, 2, chdir /
+ * parent: exit (if fork is successful)
+ */
ret = daemon(0, 0);
if (ret < 0) {
PERROR("daemon");
goto error;
}
+ /*
+ * We are in the child. Make sure all other file
+ * descriptors are closed, in case we are called with
+ * more opened file descriptors than the standard ones.
+ */
+ for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
+ (void) close(i);
+ }
+ }
+
+ /* Create thread quit pipe */
+ if ((ret = init_thread_quit_pipe()) < 0) {
+ goto error;
}
/* Check if daemon is UID = 0 */
}
}
+ /* Set consumer initial state */
+ kernel_consumerd_state = CONSUMER_STOPPED;
+ ust_consumerd_state = CONSUMER_STOPPED;
+
DBG("Client socket path %s", client_unix_sock_path);
DBG("Application socket path %s", apps_unix_sock_path);
DBG("LTTng run directory path: %s", rundir);
/* Set ulimit for open files */
set_ulimit();
}
+ /* init lttng_fd tracking must be done after set_ulimit. */
+ lttng_fd_init();
ret = set_consumer_sockets(&ustconsumer64_data, rundir);
if (ret < 0) {