Version 2.0.8
[lttng-ust.git] / libringbuffer / ring_buffer_frontend.c
index cb9446ea45557dbe9b68c53e97344cf304b5c975..df8ddf7f01c6f26be5e011b11372f95bf535e0da 100644 (file)
@@ -1,7 +1,22 @@
 /*
  * ring_buffer_frontend.c
  *
- * (C) Copyright 2005-2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; only
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
  *
  * Ring buffer wait-free buffer synchronization. Producer-consumer and flight
  * recorder (overwrite) modes. See thesis:
  *   - splice one subbuffer worth of data to a pipe
  *   - splice the data from pipe to disk/network
  *   - put_subbuf
- *
- * Dual LGPL v2.1/GPL v2 license.
  */
 
+#define _GNU_SOURCE
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include "backend.h"
 #include "frontend.h"
 #include "shm.h"
+#include "tlsfixup.h"
 
 #ifndef max
 #define max(a, b)      ((a) > (b) ? (a) : (b))
 #endif
 
+/* Print DBG() messages about events lost only every 1048576 hits */
+#define DBG_PRINT_NR_LOST      (1UL << 20)
+
 /*
  * Use POSIX SHM: shm_open(3) and shm_unlink(3).
  * close(2) to close the fd returned by shm_open.
@@ -805,7 +823,7 @@ retry:
         */
        if (((commit_count - chan->backend.subbuf_size)
             & chan->commit_count_mask)
-           - (buf_trunc(consumed_cur, chan)
+           - (buf_trunc(consumed, chan)
               >> chan->backend.num_subbuf_order)
            != 0)
                goto nodata;
@@ -814,7 +832,7 @@ retry:
         * Check that we are not about to read the same subbuffer in
         * which the writer head is.
         */
-       if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
+       if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed, chan)
            == 0)
                goto nodata;
 
@@ -977,23 +995,30 @@ void lib_ring_buffer_print_errors(struct channel *chan,
        const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
        void *priv = channel_get_private(chan);
 
-       DBG("ring buffer %s, cpu %d: %lu records written, "
-                         "%lu records overrun\n",
-                         chan->backend.name, cpu,
-                         v_read(config, &buf->records_count),
-                         v_read(config, &buf->records_overrun));
-
-       if (v_read(config, &buf->records_lost_full)
-           || v_read(config, &buf->records_lost_wrap)
-           || v_read(config, &buf->records_lost_big))
-               DBG("ring buffer %s, cpu %d: records were lost. Caused by:\n"
-                      "  [ %lu buffer full, %lu nest buffer wrap-around, "
-                      "%lu event too big ]\n",
-                      chan->backend.name, cpu,
-                      v_read(config, &buf->records_lost_full),
-                      v_read(config, &buf->records_lost_wrap),
-                      v_read(config, &buf->records_lost_big));
-
+       if (!strcmp(chan->backend.name, "relay-metadata-mmap")) {
+               DBG("ring buffer %s: %lu records written, "
+                       "%lu records overrun\n",
+                       chan->backend.name,
+                       v_read(config, &buf->records_count),
+                       v_read(config, &buf->records_overrun));
+       } else {
+               DBG("ring buffer %s, cpu %d: %lu records written, "
+                       "%lu records overrun\n",
+                       chan->backend.name, cpu,
+                       v_read(config, &buf->records_count),
+                       v_read(config, &buf->records_overrun));
+
+               if (v_read(config, &buf->records_lost_full)
+                   || v_read(config, &buf->records_lost_wrap)
+                   || v_read(config, &buf->records_lost_big))
+                       DBG("ring buffer %s, cpu %d: records were lost. Caused by:\n"
+                               "  [ %lu buffer full, %lu nest buffer wrap-around, "
+                               "%lu event too big ]\n",
+                               chan->backend.name, cpu,
+                               v_read(config, &buf->records_lost_full),
+                               v_read(config, &buf->records_lost_wrap),
+                               v_read(config, &buf->records_lost_big));
+       }
        lib_ring_buffer_print_buffer_errors(buf, chan, priv, cpu, handle);
 }
 
@@ -1153,10 +1178,11 @@ int lib_ring_buffer_try_switch_slow(enum switch_mode mode,
                                    struct lttng_ust_lib_ring_buffer *buf,
                                    struct channel *chan,
                                    struct switch_offsets *offsets,
-                                   uint64_t *tsc)
+                                   uint64_t *tsc,
+                                   struct lttng_ust_shm_handle *handle)
 {
        const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
-       unsigned long off;
+       unsigned long off, reserve_commit_diff;
 
        offsets->begin = v_read(config, &buf->offset);
        offsets->old = offsets->begin;
@@ -1181,23 +1207,69 @@ int lib_ring_buffer_try_switch_slow(enum switch_mode mode,
         * timestamps) are visible to the reader. This is required for
         * quiescence guarantees for the fusion merge.
         */
-       if (mode == SWITCH_FLUSH || off > 0) {
-               if (caa_unlikely(off == 0)) {
-                       /*
-                        * The client does not save any header information.
-                        * Don't switch empty subbuffer on finalize, because it
-                        * is invalid to deliver a completely empty subbuffer.
-                        */
-                       if (!config->cb.subbuffer_header_size())
+       if (mode != SWITCH_FLUSH && !off)
+               return -1;      /* we do not have to switch : buffer is empty */
+
+       if (caa_unlikely(off == 0)) {
+               unsigned long sb_index, commit_count;
+
+               /*
+                * We are performing a SWITCH_FLUSH. At this stage, there are no
+                * concurrent writes into the buffer.
+                *
+                * The client does not save any header information.  Don't
+                * switch empty subbuffer on finalize, because it is invalid to
+                * deliver a completely empty subbuffer.
+                */
+               if (!config->cb.subbuffer_header_size())
+                       return -1;
+
+               /* Test new buffer integrity */
+               sb_index = subbuf_index(offsets->begin, chan);
+               commit_count = v_read(config,
+                               &shmp_index(handle, buf->commit_cold,
+                                       sb_index)->cc_sb);
+               reserve_commit_diff =
+                 (buf_trunc(offsets->begin, chan)
+                  >> chan->backend.num_subbuf_order)
+                 - (commit_count & chan->commit_count_mask);
+               if (caa_likely(reserve_commit_diff == 0)) {
+                       /* Next subbuffer not being written to. */
+                       if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE &&
+                               subbuf_trunc(offsets->begin, chan)
+                                - subbuf_trunc((unsigned long)
+                                    uatomic_read(&buf->consumed), chan)
+                               >= chan->backend.buf_size)) {
+                               /*
+                                * We do not overwrite non consumed buffers
+                                * and we are full : don't switch.
+                                */
                                return -1;
+                       } else {
+                               /*
+                                * Next subbuffer not being written to, and we
+                                * are either in overwrite mode or the buffer is
+                                * not full. It's safe to write in this new
+                                * subbuffer.
+                                */
+                       }
+               } else {
                        /*
-                        * Need to write the subbuffer start header on finalize.
+                        * Next subbuffer reserve offset does not match the
+                        * commit offset. Don't perform switch in
+                        * producer-consumer and overwrite mode.  Caused by
+                        * either a writer OOPS or too many nested writes over a
+                        * reserve/commit pair.
                         */
-                       offsets->switch_old_start = 1;
+                       return -1;
                }
-               offsets->begin = subbuf_align(offsets->begin, chan);
-       } else
-               return -1;      /* we do not have to switch : buffer is empty */
+
+               /*
+                * Need to write the subbuffer start header on finalize.
+                */
+               offsets->switch_old_start = 1;
+       }
+       offsets->begin = subbuf_align(offsets->begin, chan);
        /* Note: old points to the next subbuf at offset 0 */
        offsets->end = offsets->begin;
        return 0;
@@ -1227,7 +1299,7 @@ void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, enum swi
         */
        do {
                if (lib_ring_buffer_try_switch_slow(mode, buf, chan, &offsets,
-                                                   &tsc))
+                                                   &tsc, handle))
                        return; /* Switch not needed */
        } while (v_cmpxchg(config, &buf->offset, offsets.old, offsets.end)
                 != offsets.old);
@@ -1277,9 +1349,10 @@ int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf,
 {
        const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
        struct lttng_ust_shm_handle *handle = ctx->handle;
-       unsigned long reserve_commit_diff;
+       unsigned long reserve_commit_diff, offset_cmp;
 
-       offsets->begin = v_read(config, &buf->offset);
+retry:
+       offsets->begin = offset_cmp = v_read(config, &buf->offset);
        offsets->old = offsets->begin;
        offsets->switch_new_start = 0;
        offsets->switch_new_end = 0;
@@ -1311,7 +1384,7 @@ int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf,
                }
        }
        if (caa_unlikely(offsets->switch_new_start)) {
-               unsigned long sb_index;
+               unsigned long sb_index, commit_count;
 
                /*
                 * We are typically not filling the previous buffer completely.
@@ -1322,12 +1395,32 @@ int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf,
                                 + config->cb.subbuffer_header_size();
                /* Test new buffer integrity */
                sb_index = subbuf_index(offsets->begin, chan);
+               /*
+                * Read buf->offset before buf->commit_cold[sb_index].cc_sb.
+                * lib_ring_buffer_check_deliver() has the matching
+                * memory barriers required around commit_cold cc_sb
+                * updates to ensure reserve and commit counter updates
+                * are not seen reordered when updated by another CPU.
+                */
+               cmm_smp_rmb();
+               commit_count = v_read(config,
+                               &shmp_index(handle, buf->commit_cold,
+                                       sb_index)->cc_sb);
+               /* Read buf->commit_cold[sb_index].cc_sb before buf->offset. */
+               cmm_smp_rmb();
+               if (caa_unlikely(offset_cmp != v_read(config, &buf->offset))) {
+                       /*
+                        * The reserve counter have been concurrently updated
+                        * while we read the commit counter. This means the
+                        * commit counter we read might not match buf->offset
+                        * due to concurrent update. We therefore need to retry.
+                        */
+                       goto retry;
+               }
                reserve_commit_diff =
                  (buf_trunc(offsets->begin, chan)
                   >> chan->backend.num_subbuf_order)
-                 - ((unsigned long) v_read(config,
-                                           &shmp_index(handle, buf->commit_cold, sb_index)->cc_sb)
-                    & chan->commit_count_mask);
+                 - (commit_count & chan->commit_count_mask);
                if (caa_likely(reserve_commit_diff == 0)) {
                        /* Next subbuffer not being written to. */
                        if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE &&
@@ -1335,11 +1428,19 @@ int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf,
                                 - subbuf_trunc((unsigned long)
                                     uatomic_read(&buf->consumed), chan)
                                >= chan->backend.buf_size)) {
+                               unsigned long nr_lost;
+
                                /*
                                 * We do not overwrite non consumed buffers
                                 * and we are full : record is lost.
                                 */
+                               nr_lost = v_read(config, &buf->records_lost_full);
                                v_inc(config, &buf->records_lost_full);
+                               if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
+                                       DBG("%lu or more records lost in (%s:%d) (buffer full)\n",
+                                               nr_lost + 1, chan->backend.name,
+                                               buf->backend.cpu);
+                               }
                                return -ENOBUFS;
                        } else {
                                /*
@@ -1350,13 +1451,22 @@ int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf,
                                 */
                        }
                } else {
+                       unsigned long nr_lost;
+
                        /*
                         * Next subbuffer reserve offset does not match the
-                        * commit offset. Drop record in producer-consumer and
+                        * commit offset, and this did not involve update to the
+                        * reserve counter. Drop record in producer-consumer and
                         * overwrite mode. Caused by either a writer OOPS or too
                         * many nested writes over a reserve/commit pair.
                         */
+                       nr_lost = v_read(config, &buf->records_lost_wrap);
                        v_inc(config, &buf->records_lost_wrap);
+                       if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
+                               DBG("%lu or more records lost in (%s:%d) (wrap-around)\n",
+                                       nr_lost + 1, chan->backend.name,
+                                       buf->backend.cpu);
+                       }
                        return -EIO;
                }
                offsets->size =
@@ -1370,11 +1480,20 @@ int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf,
                        + ctx->data_size;
                if (caa_unlikely(subbuf_offset(offsets->begin, chan)
                             + offsets->size > chan->backend.subbuf_size)) {
+                       unsigned long nr_lost;
+
                        /*
                         * Record too big for subbuffers, report error, don't
                         * complete the sub-buffer switch.
                         */
+                       nr_lost = v_read(config, &buf->records_lost_big);
                        v_inc(config, &buf->records_lost_big);
+                       if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
+                               DBG("%lu or more records lost in (%s:%d) record size "
+                                       " of %zu bytes is too large for buffer\n",
+                                       nr_lost + 1, chan->backend.name,
+                                       buf->backend.cpu, offsets->size);
+                       }
                        return -ENOSPC;
                } else {
                        /*
@@ -1478,3 +1597,11 @@ int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx)
        ctx->buf_offset = offsets.begin + offsets.pre_header_padding;
        return 0;
 }
+
+/*
+ * Force a read (imply TLS fixup for dlopen) of TLS variables.
+ */
+void lttng_fixup_ringbuffer_tls(void)
+{
+       asm volatile ("" : : "m" (lib_ring_buffer_nesting));
+}
This page took 0.028513 seconds and 4 git commands to generate.