Fix: do not set quiescent state on channel destroy
[lttng-modules.git] / lib / ringbuffer / ring_buffer_frontend.c
index 8a6a1883a96dfa8ed923c7c48678a1f1a3f8b201..822af455b5b7fd1868921d05a6a71b7c123c8f08 100644 (file)
@@ -54,6 +54,7 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
+#include <asm/cacheflush.h>
 
 #include <wrapper/ringbuffer/config.h>
 #include <wrapper/ringbuffer/backend.h>
@@ -64,6 +65,7 @@
 #include <wrapper/kref.h>
 #include <wrapper/percpu-defs.h>
 #include <wrapper/timer.h>
+#include <wrapper/vmalloc.h>
 
 /*
  * Internal structure representing offsets to use at a sub-buffer switch.
@@ -146,8 +148,9 @@ void lib_ring_buffer_free(struct lib_ring_buffer *buf)
        struct channel *chan = buf->backend.chan;
 
        lib_ring_buffer_print_errors(chan, buf, buf->backend.cpu);
-       kfree(buf->commit_hot);
-       kfree(buf->commit_cold);
+       lttng_kvfree(buf->commit_hot);
+       lttng_kvfree(buf->commit_cold);
+       lttng_kvfree(buf->ts_end);
 
        lib_ring_buffer_backend_free(&buf->backend);
 }
@@ -177,6 +180,7 @@ void lib_ring_buffer_reset(struct lib_ring_buffer *buf)
                v_set(config, &buf->commit_hot[i].cc, 0);
                v_set(config, &buf->commit_hot[i].seq, 0);
                v_set(config, &buf->commit_cold[i].cc_sb, 0);
+               buf->ts_end[i] = 0;
        }
        atomic_long_set(&buf->consumed, 0);
        atomic_set(&buf->record_disabled, 0);
@@ -244,7 +248,7 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
                return ret;
 
        buf->commit_hot =
-               kzalloc_node(ALIGN(sizeof(*buf->commit_hot)
+               lttng_kvzalloc_node(ALIGN(sizeof(*buf->commit_hot)
                                   * chan->backend.num_subbuf,
                                   1 << INTERNODE_CACHE_SHIFT),
                        GFP_KERNEL | __GFP_NOWARN,
@@ -255,7 +259,7 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
        }
 
        buf->commit_cold =
-               kzalloc_node(ALIGN(sizeof(*buf->commit_cold)
+               lttng_kvzalloc_node(ALIGN(sizeof(*buf->commit_cold)
                                   * chan->backend.num_subbuf,
                                   1 << INTERNODE_CACHE_SHIFT),
                        GFP_KERNEL | __GFP_NOWARN,
@@ -265,6 +269,17 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
                goto free_commit;
        }
 
+       buf->ts_end =
+               lttng_kvzalloc_node(ALIGN(sizeof(*buf->ts_end)
+                                  * chan->backend.num_subbuf,
+                                  1 << INTERNODE_CACHE_SHIFT),
+                       GFP_KERNEL | __GFP_NOWARN,
+                       cpu_to_node(max(cpu, 0)));
+       if (!buf->ts_end) {
+               ret = -ENOMEM;
+               goto free_commit_cold;
+       }
+
        init_waitqueue_head(&buf->read_wait);
        init_waitqueue_head(&buf->write_wait);
        raw_spin_lock_init(&buf->raw_tick_nohz_spinlock);
@@ -304,17 +319,19 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
 
        /* Error handling */
 free_init:
-       kfree(buf->commit_cold);
+       lttng_kvfree(buf->ts_end);
+free_commit_cold:
+       lttng_kvfree(buf->commit_cold);
 free_commit:
-       kfree(buf->commit_hot);
+       lttng_kvfree(buf->commit_hot);
 free_chanbuf:
        lib_ring_buffer_backend_free(&buf->backend);
        return ret;
 }
 
-static void switch_buffer_timer(unsigned long data)
+static void switch_buffer_timer(LTTNG_TIMER_FUNC_ARG_TYPE t)
 {
-       struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data;
+       struct lib_ring_buffer *buf = lttng_from_timer(buf, t, switch_timer);
        struct channel *chan = buf->backend.chan;
        const struct lib_ring_buffer_config *config = &chan->backend.config;
 
@@ -339,22 +356,22 @@ static void lib_ring_buffer_start_switch_timer(struct lib_ring_buffer *buf)
 {
        struct channel *chan = buf->backend.chan;
        const struct lib_ring_buffer_config *config = &chan->backend.config;
+       unsigned int flags = 0;
 
        if (!chan->switch_timer_interval || buf->switch_timer_enabled)
                return;
 
        if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
-               lttng_init_timer_pinned(&buf->switch_timer);
-       else
-               init_timer(&buf->switch_timer);
+               flags = LTTNG_TIMER_PINNED;
 
-       buf->switch_timer.function = switch_buffer_timer;
+       lttng_timer_setup(&buf->switch_timer, switch_buffer_timer, flags, buf);
        buf->switch_timer.expires = jiffies + chan->switch_timer_interval;
-       buf->switch_timer.data = (unsigned long)buf;
+
        if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
                add_timer_on(&buf->switch_timer, buf->backend.cpu);
        else
                add_timer(&buf->switch_timer);
+
        buf->switch_timer_enabled = 1;
 }
 
@@ -375,9 +392,9 @@ static void lib_ring_buffer_stop_switch_timer(struct lib_ring_buffer *buf)
 /*
  * Polling timer to check the channels for data.
  */
-static void read_buffer_timer(unsigned long data)
+static void read_buffer_timer(LTTNG_TIMER_FUNC_ARG_TYPE t)
 {
-       struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data;
+       struct lib_ring_buffer *buf = lttng_from_timer(buf, t, read_timer);
        struct channel *chan = buf->backend.chan;
        const struct lib_ring_buffer_config *config = &chan->backend.config;
 
@@ -404,6 +421,7 @@ static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf)
 {
        struct channel *chan = buf->backend.chan;
        const struct lib_ring_buffer_config *config = &chan->backend.config;
+       unsigned int flags = 0;
 
        if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
            || !chan->read_timer_interval
@@ -411,18 +429,16 @@ static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf)
                return;
 
        if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
-               lttng_init_timer_pinned(&buf->read_timer);
-       else
-               init_timer(&buf->read_timer);
+               flags = LTTNG_TIMER_PINNED;
 
-       buf->read_timer.function = read_buffer_timer;
+       lttng_timer_setup(&buf->read_timer, read_buffer_timer, flags, buf);
        buf->read_timer.expires = jiffies + chan->read_timer_interval;
-       buf->read_timer.data = (unsigned long)buf;
 
        if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
                add_timer_on(&buf->read_timer, buf->backend.cpu);
        else
                add_timer(&buf->read_timer);
+
        buf->read_timer_enabled = 1;
 }
 
@@ -976,13 +992,11 @@ void *channel_destroy(struct channel *chan)
                                config->cb.buffer_finalize(buf,
                                                           chan->backend.priv,
                                                           cpu);
-                       if (buf->backend.allocated)
-                               lib_ring_buffer_set_quiescent(buf);
                        /*
                         * Perform flush before writing to finalized.
                         */
                        smp_wmb();
-                       ACCESS_ONCE(buf->finalized) = 1;
+                       WRITE_ONCE(buf->finalized, 1);
                        wake_up_interruptible(&buf->read_wait);
                }
        } else {
@@ -990,16 +1004,14 @@ void *channel_destroy(struct channel *chan)
 
                if (config->cb.buffer_finalize)
                        config->cb.buffer_finalize(buf, chan->backend.priv, -1);
-               if (buf->backend.allocated)
-                       lib_ring_buffer_set_quiescent(buf);
                /*
                 * Perform flush before writing to finalized.
                 */
                smp_wmb();
-               ACCESS_ONCE(buf->finalized) = 1;
+               WRITE_ONCE(buf->finalized, 1);
                wake_up_interruptible(&buf->read_wait);
        }
-       ACCESS_ONCE(chan->finalized) = 1;
+       WRITE_ONCE(chan->finalized, 1);
        wake_up_interruptible(&chan->hp_wait);
        wake_up_interruptible(&chan->read_wait);
        priv = chan->backend.priv;
@@ -1075,17 +1087,8 @@ int lib_ring_buffer_snapshot(struct lib_ring_buffer *buf,
        unsigned long consumed_cur, write_offset;
        int finalized;
 
-       /*
-        * First, ensure we perform a "final" flush onto the stream.  This will
-        * ensure we create a packet of padding if we encounter an empty
-        * packet. This ensures the time-stamps right before the snapshot is
-        * used as end of packet timestamp.
-        */
-       if (!buf->quiescent)
-               _lib_ring_buffer_switch_remote(buf, SWITCH_FLUSH);
-
 retry:
-       finalized = ACCESS_ONCE(buf->finalized);
+       finalized = READ_ONCE(buf->finalized);
        /*
         * Read finalized before counters.
         */
@@ -1158,6 +1161,47 @@ void lib_ring_buffer_move_consumer(struct lib_ring_buffer *buf,
 }
 EXPORT_SYMBOL_GPL(lib_ring_buffer_move_consumer);
 
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+static void lib_ring_buffer_flush_read_subbuf_dcache(
+               const struct lib_ring_buffer_config *config,
+               struct channel *chan,
+               struct lib_ring_buffer *buf)
+{
+       struct lib_ring_buffer_backend_pages *pages;
+       unsigned long sb_bindex, id, i, nr_pages;
+
+       if (config->output != RING_BUFFER_MMAP)
+               return;
+
+       /*
+        * Architectures with caches aliased on virtual addresses may
+        * use different cache lines for the linear mapping vs
+        * user-space memory mapping. Given that the ring buffer is
+        * based on the kernel linear mapping, aligning it with the
+        * user-space mapping is not straightforward, and would require
+        * extra TLB entries. Therefore, simply flush the dcache for the
+        * entire sub-buffer before reading it.
+        */
+       id = buf->backend.buf_rsb.id;
+       sb_bindex = subbuffer_id_get_index(config, id);
+       pages = buf->backend.array[sb_bindex];
+       nr_pages = buf->backend.num_pages_per_subbuf;
+       for (i = 0; i < nr_pages; i++) {
+               struct lib_ring_buffer_backend_page *backend_page;
+
+               backend_page = &pages->p[i];
+               flush_dcache_page(pfn_to_page(backend_page->pfn));
+       }
+}
+#else
+static void lib_ring_buffer_flush_read_subbuf_dcache(
+               const struct lib_ring_buffer_config *config,
+               struct channel *chan,
+               struct lib_ring_buffer *buf)
+{
+}
+#endif
+
 /**
  * lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading
  * @buf: ring buffer
@@ -1184,7 +1228,7 @@ int lib_ring_buffer_get_subbuf(struct lib_ring_buffer *buf,
                return -EBUSY;
        }
 retry:
-       finalized = ACCESS_ONCE(buf->finalized);
+       finalized = READ_ONCE(buf->finalized);
        /*
         * Read finalized before counters.
         */
@@ -1300,6 +1344,8 @@ retry:
        buf->get_subbuf_consumed = consumed;
        buf->get_subbuf = 1;
 
+       lib_ring_buffer_flush_read_subbuf_dcache(config, chan, buf);
+
        return 0;
 
 nodata:
@@ -1473,8 +1519,7 @@ void lib_ring_buffer_print_errors(struct channel *chan,
 /*
  * lib_ring_buffer_switch_old_start: Populate old subbuffer header.
  *
- * Only executed by SWITCH_FLUSH, which can be issued while tracing is active
- * or at buffer finalization (destroy).
+ * Only executed when the buffer is finalized, in SWITCH_FLUSH.
  */
 static
 void lib_ring_buffer_switch_old_start(struct lib_ring_buffer *buf,
@@ -1531,14 +1576,26 @@ void lib_ring_buffer_switch_old_end(struct lib_ring_buffer *buf,
        unsigned long oldidx = subbuf_index(offsets->old - 1, chan);
        unsigned long commit_count, padding_size, data_size;
        struct commit_counters_hot *cc_hot;
+       u64 *ts_end;
 
        data_size = subbuf_offset(offsets->old - 1, chan) + 1;
        padding_size = chan->backend.subbuf_size - data_size;
        subbuffer_set_data_size(config, &buf->backend, oldidx, data_size);
 
+       ts_end = &buf->ts_end[oldidx];
        /*
-        * Order all writes to buffer before the commit count update that will
-        * determine that the subbuffer is full.
+        * This is the last space reservation in that sub-buffer before
+        * it gets delivered. This provides exclusive access to write to
+        * this sub-buffer's ts_end. There are also no concurrent
+        * readers of that ts_end because delivery of that sub-buffer is
+        * postponed until the commit counter is incremented for the
+        * current space reservation.
+        */
+       *ts_end = tsc;
+
+       /*
+        * Order all writes to buffer and store to ts_end before the commit
+        * count update that will determine that the subbuffer is full.
         */
        if (config->ipi == RING_BUFFER_IPI_BARRIER) {
                /*
@@ -1619,10 +1676,21 @@ void lib_ring_buffer_switch_new_end(struct lib_ring_buffer *buf,
 {
        const struct lib_ring_buffer_config *config = &chan->backend.config;
        unsigned long endidx, data_size;
+       u64 *ts_end;
 
        endidx = subbuf_index(offsets->end - 1, chan);
        data_size = subbuf_offset(offsets->end - 1, chan) + 1;
        subbuffer_set_data_size(config, &buf->backend, endidx, data_size);
+       ts_end = &buf->ts_end[endidx];
+       /*
+        * This is the last space reservation in that sub-buffer before
+        * it gets delivered. This provides exclusive access to write to
+        * this sub-buffer's ts_end. There are also no concurrent
+        * readers of that ts_end because delivery of that sub-buffer is
+        * postponed until the commit counter is incremented for the
+        * current space reservation.
+        */
+       *ts_end = tsc;
 }
 
 /*
@@ -1670,14 +1738,12 @@ int lib_ring_buffer_try_switch_slow(enum switch_mode mode,
                unsigned long sb_index, commit_count;
 
                /*
-                * We are performing a SWITCH_FLUSH. There may be concurrent
-                * writes into the buffer if e.g. invoked while performing a
-                * snapshot on an active trace.
+                * We are performing a SWITCH_FLUSH. At this stage, there are no
+                * concurrent writes into the buffer.
                 *
-                * If the client does not save any header information (sub-buffer
-                * header size == 0), don't switch empty subbuffer on finalize,
-                * because it is invalid to deliver a completely empty
-                * subbuffer.
+                * The client does not save any header information.  Don't
+                * switch empty subbuffer on finalize, because it is invalid to
+                * deliver a completely empty subbuffer.
                 */
                if (!config->cb.subbuffer_header_size())
                        return -1;
@@ -1821,16 +1887,14 @@ static void _lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf,
        }
 
        /*
-        * Taking lock on CPU hotplug to ensure two things: first, that the
+        * Disabling preemption ensures two things: first, that the
         * target cpu is not taken concurrently offline while we are within
-        * smp_call_function_single() (I don't trust that get_cpu() on the
-        * _local_ CPU actually inhibit CPU hotplug for the _remote_ CPU (to be
-        * confirmed)). Secondly, if it happens that the CPU is not online, our
-        * own call to lib_ring_buffer_switch_slow() needs to be protected from
-        * CPU hotplug handlers, which can also perform a remote subbuffer
-        * switch.
+        * smp_call_function_single(). Secondly, if it happens that the
+        * CPU is not online, our own call to lib_ring_buffer_switch_slow()
+        * needs to be protected from CPU hotplug handlers, which can
+        * also perform a remote subbuffer switch.
         */
-       get_online_cpus();
+       preempt_disable();
        param.buf = buf;
        param.mode = mode;
        ret = smp_call_function_single(buf->backend.cpu,
@@ -1839,15 +1903,23 @@ static void _lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf,
                /* Remote CPU is offline, do it ourself. */
                lib_ring_buffer_switch_slow(buf, mode);
        }
-       put_online_cpus();
+       preempt_enable();
 }
 
+/* Switch sub-buffer if current sub-buffer is non-empty. */
 void lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf)
 {
        _lib_ring_buffer_switch_remote(buf, SWITCH_ACTIVE);
 }
 EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_remote);
 
+/* Switch sub-buffer even if current sub-buffer is empty. */
+void lib_ring_buffer_switch_remote_empty(struct lib_ring_buffer *buf)
+{
+       _lib_ring_buffer_switch_remote(buf, SWITCH_FLUSH);
+}
+EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_remote_empty);
+
 /*
  * Returns :
  * 0 if ok
@@ -2180,14 +2252,24 @@ void lib_ring_buffer_check_deliver_slow(const struct lib_ring_buffer_config *con
        if (likely(v_cmpxchg(config, &buf->commit_cold[idx].cc_sb,
                                 old_commit_count, old_commit_count + 1)
                   == old_commit_count)) {
+               u64 *ts_end;
+
                /*
                 * Start of exclusive subbuffer access. We are
                 * guaranteed to be the last writer in this subbuffer
                 * and any other writer trying to access this subbuffer
                 * in this state is required to drop records.
+                *
+                * We can read the ts_end for the current sub-buffer
+                * which has been saved by the very last space
+                * reservation for the current sub-buffer.
+                *
+                * Order increment of commit counter before reading ts_end.
                 */
+               smp_mb();
+               ts_end = &buf->ts_end[idx];
                deliver_count_events(config, buf, idx);
-               config->cb.buffer_end(buf, tsc, idx,
+               config->cb.buffer_end(buf, *ts_end, idx,
                                      lib_ring_buffer_get_data_size(config,
                                                                buf,
                                                                idx));
This page took 0.029948 seconds and 4 git commands to generate.