Fix: Create a lock file to prevent multiple session daemons
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index 0bb06a3efe5aedd335f5a43238300aea7a5d62ba..841f5855a5bfa5fa71a105c5359dd69b3c7ca026 100644 (file)
@@ -76,10 +76,10 @@ static int opt_sig_parent;
 static int opt_verbose_consumer;
 static int opt_daemon, opt_background;
 static int opt_no_kernel;
-static int is_root;                    /* Set to 1 if the daemon is running as root */
 static pid_t ppid;          /* Parent PID for --sig-parent option */
 static pid_t child_ppid;    /* Internal parent PID use with daemonize. */
 static char *rundir;
+static int lockfile_fd = -1;
 
 /* Set to 1 when a SIGUSR1 signal is received. */
 static int recv_child_signal;
@@ -244,6 +244,9 @@ struct health_app *health_sessiond;
 /* JUL TCP port for registration. Used by the JUL thread. */
 unsigned int jul_tcp_port = DEFAULT_JUL_TCP_PORT;
 
+/* Am I root or not. */
+int is_root;                   /* Set to 1 if the daemon is running as root */
+
 /*
  * Whether sessiond is ready for commands/health check requests.
  * NR_LTTNG_SESSIOND_READY must match the number of calls to
@@ -456,6 +459,27 @@ static void close_consumer_sockets(void)
        }
 }
 
+/*
+ * Generate the full lock file path using the rundir.
+ *
+ * Return the snprintf() return value thus a negative value is an error.
+ */
+static int generate_lock_file_path(char *path, size_t len)
+{
+       int ret;
+
+       assert(path);
+       assert(rundir);
+
+       /* Build lockfile path from rundir. */
+       ret = snprintf(path, len, "%s/" DEFAULT_LTTNG_SESSIOND_LOCKFILE, rundir);
+       if (ret < 0) {
+               PERROR("snprintf lockfile path");
+       }
+
+       return ret;
+}
+
 /*
  * Cleanup the daemon
  */
@@ -537,14 +561,6 @@ static void cleanup(void)
        DBG("Removing directory %s", path);
        (void) rmdir(path);
 
-       /*
-        * We do NOT rmdir rundir because there are other processes
-        * using it, for instance lttng-relayd, which can start in
-        * parallel with this teardown.
-        */
-
-       free(rundir);
-
        DBG("Cleaning up all sessions");
 
        /* Destroy session list mutex */
@@ -576,6 +592,35 @@ static void cleanup(void)
 
        close_consumer_sockets();
 
+
+       /*
+        * Cleanup lock file by deleting it and finaly closing it which will
+        * release the file system lock.
+        */
+       if (lockfile_fd >= 0) {
+               char lockfile_path[PATH_MAX];
+
+               ret = generate_lock_file_path(lockfile_path, sizeof(lockfile_path));
+               if (ret > 0) {
+                       ret = remove(lockfile_path);
+                       if (ret < 0) {
+                               PERROR("remove lock file");
+                       }
+                       ret = close(lockfile_fd);
+                       if (ret < 0) {
+                               PERROR("close lock file");
+                       }
+               }
+       }
+
+       /*
+        * We do NOT rmdir rundir because there are other processes
+        * using it, for instance lttng-relayd, which can start in
+        * parallel with this teardown.
+        */
+
+       free(rundir);
+
        /* <fun> */
        DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm"
                        "Matthew, BEET driven development works!%c[%dm",
@@ -3053,7 +3098,9 @@ skip_domain:
                struct lttng_event *events;
                ssize_t nb_events;
 
+               session_lock_list();
                nb_events = cmd_list_tracepoints(cmd_ctx->lsm->domain.type, &events);
+               session_unlock_list();
                if (nb_events < 0) {
                        /* Return value is a negative lttng_error_code. */
                        ret = -nb_events;
@@ -3084,8 +3131,10 @@ skip_domain:
                struct lttng_event_field *fields;
                ssize_t nb_fields;
 
+               session_lock_list();
                nb_fields = cmd_list_tracepoint_fields(cmd_ctx->lsm->domain.type,
                                &fields);
+               session_unlock_list();
                if (nb_fields < 0) {
                        /* Return value is a negative lttng_error_code. */
                        ret = -nb_fields;
@@ -4508,6 +4557,24 @@ error:
        return;
 }
 
+/*
+ * Create lockfile using the rundir and return its fd.
+ */
+static int create_lockfile(void)
+{
+       int ret;
+       char lockfile_path[PATH_MAX];
+
+       ret = generate_lock_file_path(lockfile_path, sizeof(lockfile_path));
+       if (ret < 0) {
+               goto error;
+       }
+
+       ret = utils_create_lock_file(lockfile_path);
+error:
+       return ret;
+}
+
 /*
  * Write JUL TCP port using the rundir.
  */
@@ -4684,6 +4751,11 @@ int main(int argc, char **argv)
                }
        }
 
+       lockfile_fd = create_lockfile();
+       if (lockfile_fd < 0) {
+               goto error;
+       }
+
        /* Set consumer initial state */
        kernel_consumerd_state = CONSUMER_STOPPED;
        ust_consumerd_state = CONSUMER_STOPPED;
@@ -4907,7 +4979,7 @@ int main(int argc, char **argv)
        ret = pthread_create(&apps_notify_thread, NULL,
                        ust_thread_manage_notify, (void *) NULL);
        if (ret != 0) {
-               PERROR("pthread_create apps");
+               PERROR("pthread_create notify");
                goto exit_apps_notify;
        }
 
@@ -4915,7 +4987,7 @@ int main(int argc, char **argv)
        ret = pthread_create(&jul_reg_thread, NULL,
                        jul_thread_manage_registration, (void *) NULL);
        if (ret != 0) {
-               PERROR("pthread_create apps");
+               PERROR("pthread_create JUL");
                goto exit_jul_reg;
        }
 
This page took 0.025501 seconds and 4 git commands to generate.