Implement file-backed ring buffer
[lttng-ust.git] / libringbuffer / shm.c
index fc7fbfb2c0642ca0c410ddae0c6cd1a45b5a792e..fb2df13bc11a5a1a43c0b5dc3394930fb8e1bd9a 100644 (file)
@@ -1,15 +1,28 @@
 /*
  * libringbuffer/shm.c
  *
- * Copyright 2011 (c) - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  *
- * Dual LGPL v2.1/GPL v2 license.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; only
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "shm.h"
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/mman.h>
+#include <sys/types.h>
 #include <sys/stat.h>  /* For mode constants */
 #include <fcntl.h>     /* For O_* constants */
 #include <assert.h>
 #include <signal.h>
 #include <dirent.h>
 #include <lttng/align.h>
+#include <limits.h>
+#include <helper.h>
+
+/*
+ * Ensure we have the required amount of space available by writing 0
+ * into the entire buffer. Not doing so can trigger SIGBUS when going
+ * beyond the available shm space.
+ */
+static
+int zero_file(int fd, size_t len)
+{
+       ssize_t retlen;
+       size_t written = 0;
+       char *zeropage;
+       long pagelen;
+       int ret;
+
+       pagelen = sysconf(_SC_PAGESIZE);
+       if (pagelen < 0)
+               return (int) pagelen;
+       zeropage = calloc(pagelen, 1);
+       if (!zeropage)
+               return -ENOMEM;
+
+       while (len > written) {
+               do {
+                       retlen = write(fd, zeropage,
+                               min_t(size_t, pagelen, len - written));
+               } while (retlen == -1UL && errno == EINTR);
+               if (retlen < 0) {
+                       ret = (int) retlen;
+                       goto error;
+               }
+               written += retlen;
+       }
+       ret = 0;
+error:
+       free(zeropage);
+       return ret;
+}
 
 struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
 {
@@ -24,17 +77,75 @@ struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
 
        table = zmalloc(sizeof(struct shm_object_table) +
                        max_nb_obj * sizeof(table->objects[0]));
+       if (!table)
+               return NULL;
        table->size = max_nb_obj;
        return table;
 }
 
-struct shm_object *shm_object_table_append(struct shm_object_table *table,
-                                          size_t memory_map_size)
+static
+int create_posix_shm(void)
+{
+       char tmp_name[NAME_MAX] = "/ust-shm-tmp-XXXXXX";
+       int shmfd, ret;
+
+       /*
+        * Allocate shm, and immediately unlink its shm oject, keeping
+        * only the file descriptor as a reference to the object. If it
+        * already exists (caused by short race window during which the
+        * global object exists in a concurrent shm_open), simply retry.
+        * We specifically do _not_ use the / at the beginning of the
+        * pathname so that some OS implementations can keep it local to
+        * the process (POSIX leaves this implementation-defined).
+        */
+       do {
+               /*
+                * Using mktemp filename with O_CREAT | O_EXCL open
+                * flags.
+                */
+               (void) mktemp(tmp_name);
+               if (tmp_name[0] == '\0') {
+                       PERROR("mktemp");
+                       goto error_shm_open;
+               }
+               shmfd = shm_open(tmp_name,
+                                O_CREAT | O_EXCL | O_RDWR, 0700);
+       } while (shmfd < 0 && (errno == EEXIST || errno == EACCES));
+       if (shmfd < 0) {
+               PERROR("shm_open");
+               goto error_shm_open;
+       }
+       ret = shm_unlink(tmp_name);
+       if (ret < 0 && errno != ENOENT) {
+               PERROR("shm_unlink");
+               goto error_shm_release;
+       }
+       return shmfd;
+
+error_shm_release:
+       ret = close(shmfd);
+       if (ret) {
+               PERROR("close");
+               assert(0);
+       }
+error_shm_open:
+       return -1;
+}
+
+static
+int create_shared_file(const char *shm_path)
+{
+       return open(shm_path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+}
+
+static
+struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
+                                          size_t memory_map_size,
+                                          const char *shm_path)
 {
        int shmfd, waitfd[2], ret, i, sigblocked = 0;
        struct shm_object *obj;
        char *memory_map;
-       char tmp_name[NAME_MAX] = "ust-shm-tmp-XXXXXX";
        sigset_t all_sigs, orig_sigs;
 
        if (table->allocated_len >= table->size)
@@ -77,43 +188,32 @@ struct shm_object *shm_object_table_append(struct shm_object_table *table,
        }
        sigblocked = 1;
 
-       /*
-        * Allocate shm, and immediately unlink its shm oject, keeping
-        * only the file descriptor as a reference to the object. If it
-        * already exists (caused by short race window during which the
-        * global object exists in a concurrent shm_open), simply retry.
-        * We specifically do _not_ use the / at the beginning of the
-        * pathname so that some OS implementations can keep it local to
-        * the process (POSIX leaves this implementation-defined).
-        */
-       do {
-               /*
-                * Using mktemp filename with O_CREAT | O_EXCL open
-                * flags.
-                */
-               mktemp(tmp_name);
-               if (tmp_name[0] == '\0') {
-                       PERROR("mktemp");
-                       goto error_shm_open;
-               }
-               shmfd = shm_open(tmp_name,
-                                O_CREAT | O_EXCL | O_RDWR, 0700);
-       } while (shmfd < 0 && (errno == EEXIST || errno == EACCES));
-       if (shmfd < 0) {
-               PERROR("shm_open");
-               goto error_shm_open;
-       }
-       ret = shm_unlink(tmp_name);
-       if (ret < 0 && errno != ENOENT) {
-               PERROR("shm_unlink");
-               goto error_shm_release;
+
+       if (!shm_path) {
+               obj->shm_path[0] = '\0';
+               shmfd = create_posix_shm();
+       } else {
+               strncpy(obj->shm_path, shm_path,
+                       sizeof(obj->shm_path));
+               obj->shm_path[sizeof(obj->shm_path) - 1] = '\0';
+
+               /* Path should already exist, but could fail. */
+               shmfd = create_shared_file(shm_path);
        }
+       if (shmfd < 0)
+               goto error_shm_open;
+
        sigblocked = 0;
        ret = pthread_sigmask(SIG_SETMASK, &orig_sigs, NULL);
        if (ret == -1) {
                PERROR("pthread_sigmask");
                goto error_sigmask_release;
        }
+       ret = zero_file(shmfd, memory_map_size);
+       if (ret) {
+               PERROR("zero_file");
+               goto error_zero_file;
+       }
        ret = ftruncate(shmfd, memory_map_size);
        if (ret) {
                PERROR("ftruncate");
@@ -128,6 +228,7 @@ struct shm_object *shm_object_table_append(struct shm_object_table *table,
                PERROR("mmap");
                goto error_mmap;
        }
+       obj->type = SHM_OBJECT_SHM;
        obj->memory_map = memory_map;
        obj->memory_map_size = memory_map_size;
        obj->allocated_len = 0;
@@ -137,13 +238,19 @@ struct shm_object *shm_object_table_append(struct shm_object_table *table,
 
 error_mmap:
 error_ftruncate:
-error_shm_release:
+error_zero_file:
 error_sigmask_release:
        ret = close(shmfd);
        if (ret) {
                PERROR("close");
                assert(0);
        }
+       if (shm_path) {
+               ret = unlink(shm_path);
+               if (ret) {
+                       PERROR("ret");
+               }
+       }
 error_shm_open:
        if (sigblocked) {
                ret = pthread_sigmask(SIG_SETMASK, &orig_sigs, NULL);
@@ -162,24 +269,120 @@ error_fcntl:
        }
 error_pipe:
        return NULL;
-       
 }
 
-struct shm_object *shm_object_table_append_shadow(struct shm_object_table *table,
-                       int shm_fd, int wait_fd, size_t memory_map_size)
+static
+struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
+                                          size_t memory_map_size)
+{
+       struct shm_object *obj;
+       void *memory_map;
+       int waitfd[2], i, ret;
+
+       if (table->allocated_len >= table->size)
+               return NULL;
+       obj = &table->objects[table->allocated_len];
+
+       memory_map = zmalloc(memory_map_size);
+       if (!memory_map)
+               goto alloc_error;
+
+       /* wait_fd: create pipe */
+       ret = pipe(waitfd);
+       if (ret < 0) {
+               PERROR("pipe");
+               goto error_pipe;
+       }
+       for (i = 0; i < 2; i++) {
+               ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
+               if (ret < 0) {
+                       PERROR("fcntl");
+                       goto error_fcntl;
+               }
+       }
+       /* The write end of the pipe needs to be non-blocking */
+       ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
+       if (ret < 0) {
+               PERROR("fcntl");
+               goto error_fcntl;
+       }
+       memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
+
+       /* no shm_fd */
+       obj->shm_fd = -1;
+
+       obj->type = SHM_OBJECT_MEM;
+       obj->memory_map = memory_map;
+       obj->memory_map_size = memory_map_size;
+       obj->allocated_len = 0;
+       obj->index = table->allocated_len++;
+
+       return obj;
+
+error_fcntl:
+       for (i = 0; i < 2; i++) {
+               ret = close(waitfd[i]);
+               if (ret) {
+                       PERROR("close");
+                       assert(0);
+               }
+       }
+error_pipe:
+       free(memory_map);
+alloc_error:
+       return NULL;
+}
+
+struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
+                       size_t memory_map_size,
+                       enum shm_object_type type,
+                       const char *shm_path)
+{
+       switch (type) {
+       case SHM_OBJECT_SHM:
+               return _shm_object_table_alloc_shm(table, memory_map_size,
+                               shm_path);
+       case SHM_OBJECT_MEM:
+               return _shm_object_table_alloc_mem(table, memory_map_size);
+       default:
+               assert(0);
+       }
+       return NULL;
+}
+
+struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
+                       int shm_fd, int wakeup_fd, uint32_t stream_nr,
+                       size_t memory_map_size)
 {
        struct shm_object *obj;
        char *memory_map;
+       int ret;
 
        if (table->allocated_len >= table->size)
                return NULL;
+       /* streams _must_ be received in sequential order, else fail. */
+       if (stream_nr + 1 != table->allocated_len)
+               return NULL;
+
        obj = &table->objects[table->allocated_len];
 
-       /* wait_fd: set read end of the pipe. */
-       obj->wait_fd[0] = wait_fd;
-       obj->wait_fd[1] = -1;   /* write end is unset. */
+       /* wait_fd: set write end of the pipe. */
+       obj->wait_fd[0] = -1;   /* read end is unset */
+       obj->wait_fd[1] = wakeup_fd;
        obj->shm_fd = shm_fd;
 
+       ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
+       if (ret < 0) {
+               PERROR("fcntl");
+               goto error_fcntl;
+       }
+       /* The write end of the pipe needs to be non-blocking */
+       ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
+       if (ret < 0) {
+               PERROR("fcntl");
+               goto error_fcntl;
+       }
+
        /* memory_map: mmap */
        memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
                          MAP_SHARED, shm_fd, 0);
@@ -187,6 +390,7 @@ struct shm_object *shm_object_table_append_shadow(struct shm_object_table *table
                PERROR("mmap");
                goto error_mmap;
        }
+       obj->type = SHM_OBJECT_SHM;
        obj->memory_map = memory_map;
        obj->memory_map_size = memory_map_size;
        obj->allocated_len = memory_map_size;
@@ -194,33 +398,105 @@ struct shm_object *shm_object_table_append_shadow(struct shm_object_table *table
 
        return obj;
 
+error_fcntl:
 error_mmap:
        return NULL;
 }
 
+/*
+ * Passing ownership of mem to object.
+ */
+struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
+                       void *mem, size_t memory_map_size, int wakeup_fd)
+{
+       struct shm_object *obj;
+       int ret;
+
+       if (table->allocated_len >= table->size)
+               return NULL;
+       obj = &table->objects[table->allocated_len];
+
+       obj->wait_fd[0] = -1;   /* read end is unset */
+       obj->wait_fd[1] = wakeup_fd;
+       obj->shm_fd = -1;
+
+       ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
+       if (ret < 0) {
+               PERROR("fcntl");
+               goto error_fcntl;
+       }
+       /* The write end of the pipe needs to be non-blocking */
+       ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
+       if (ret < 0) {
+               PERROR("fcntl");
+               goto error_fcntl;
+       }
+
+       obj->type = SHM_OBJECT_MEM;
+       obj->memory_map = mem;
+       obj->memory_map_size = memory_map_size;
+       obj->allocated_len = memory_map_size;
+       obj->index = table->allocated_len++;
+
+       return obj;
+
+error_fcntl:
+       return NULL;
+}
+
 static
 void shmp_object_destroy(struct shm_object *obj)
 {
-       int ret, i;
-
-        ret = munmap(obj->memory_map, obj->memory_map_size);
-        if (ret) {
-                PERROR("umnmap");
-                assert(0);
-        }
-       ret = close(obj->shm_fd);
-       if (ret) {
-               PERROR("close");
-               assert(0);
-       }
-       for (i = 0; i < 2; i++) {
-               if (obj->wait_fd[i] < 0)
-                       continue;
-               ret = close(obj->wait_fd[i]);
+       switch (obj->type) {
+       case SHM_OBJECT_SHM:
+       {
+               int ret, i;
+
+               ret = munmap(obj->memory_map, obj->memory_map_size);
+               if (ret) {
+                       PERROR("umnmap");
+                       assert(0);
+               }
+               ret = close(obj->shm_fd);
                if (ret) {
                        PERROR("close");
                        assert(0);
                }
+               if (obj->shm_path[0]) {
+                       ret = unlink(obj->shm_path);
+                       if (ret) {
+                               PERROR("ret");
+                       }
+               }
+               for (i = 0; i < 2; i++) {
+                       if (obj->wait_fd[i] < 0)
+                               continue;
+                       ret = close(obj->wait_fd[i]);
+                       if (ret) {
+                               PERROR("close");
+                               assert(0);
+                       }
+               }
+               break;
+       }
+       case SHM_OBJECT_MEM:
+       {
+               int ret, i;
+
+               for (i = 0; i < 2; i++) {
+                       if (obj->wait_fd[i] < 0)
+                               continue;
+                       ret = close(obj->wait_fd[i]);
+                       if (ret) {
+                               PERROR("close");
+                               assert(0);
+                       }
+               }
+               free(obj->memory_map);
+               break;
+       }
+       default:
+               assert(0);
        }
 }
 
This page took 0.027786 seconds and 4 git commands to generate.