/*
* ring_buffer_frontend.c
*
- * (C) Copyright 2005-2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; only
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
*
* Ring buffer wait-free buffer synchronization. Producer-consumer and flight
* recorder (overwrite) modes. See thesis:
* - splice one subbuffer worth of data to a pipe
* - splice the data from pipe to disk/network
* - put_subbuf
- *
- * Dual LGPL v2.1/GPL v2 license.
*/
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/percpu.h>
-
-#include "../../wrapper/ringbuffer/config.h"
-#include "../../wrapper/ringbuffer/backend.h"
-#include "../../wrapper/ringbuffer/frontend.h"
-#include "../../wrapper/ringbuffer/iterator.h"
-#include "../../wrapper/ringbuffer/nohz.h"
+#include <asm/cacheflush.h>
+
+#include <wrapper/ringbuffer/config.h>
+#include <wrapper/ringbuffer/backend.h>
+#include <wrapper/ringbuffer/frontend.h>
+#include <wrapper/ringbuffer/iterator.h>
+#include <wrapper/ringbuffer/nohz.h>
+#include <wrapper/atomic.h>
+#include <wrapper/kref.h>
+#include <wrapper/percpu-defs.h>
+#include <wrapper/timer.h>
+#include <wrapper/vmalloc.h>
/*
* Internal structure representing offsets to use at a sub-buffer switch.
static
void lib_ring_buffer_print_errors(struct channel *chan,
struct lib_ring_buffer *buf, int cpu);
+static
+void _lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf,
+ enum switch_mode mode);
+
+static
+int lib_ring_buffer_poll_deliver(const struct lib_ring_buffer_config *config,
+ struct lib_ring_buffer *buf,
+ struct channel *chan)
+{
+ unsigned long consumed_old, consumed_idx, commit_count, write_offset;
+
+ consumed_old = atomic_long_read(&buf->consumed);
+ consumed_idx = subbuf_index(consumed_old, chan);
+ commit_count = v_read(config, &buf->commit_cold[consumed_idx].cc_sb);
+ /*
+ * No memory barrier here, since we are only interested
+ * in a statistically correct polling result. The next poll will
+ * get the data is we are racing. The mb() that ensures correct
+ * memory order is in get_subbuf.
+ */
+ write_offset = v_read(config, &buf->offset);
+
+ /*
+ * Check that the subbuffer we are trying to consume has been
+ * already fully committed.
+ */
+
+ if (((commit_count - chan->backend.subbuf_size)
+ & chan->commit_count_mask)
+ - (buf_trunc(consumed_old, chan)
+ >> chan->backend.num_subbuf_order)
+ != 0)
+ return 0;
+
+ /*
+ * Check that we are not about to read the same subbuffer in
+ * which the writer head is.
+ */
+ if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_old, chan)
+ == 0)
+ return 0;
+
+ return 1;
+}
/*
* Must be called under cpu hotplug protection.
struct channel *chan = buf->backend.chan;
lib_ring_buffer_print_errors(chan, buf, buf->backend.cpu);
- kfree(buf->commit_hot);
- kfree(buf->commit_cold);
+ lttng_kvfree(buf->commit_hot);
+ lttng_kvfree(buf->commit_cold);
+ lttng_kvfree(buf->ts_end);
lib_ring_buffer_backend_free(&buf->backend);
}
void lib_ring_buffer_reset(struct lib_ring_buffer *buf)
{
struct channel *chan = buf->backend.chan;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
unsigned int i;
/*
v_set(config, &buf->commit_hot[i].cc, 0);
v_set(config, &buf->commit_hot[i].seq, 0);
v_set(config, &buf->commit_cold[i].cc_sb, 0);
+ buf->ts_end[i] = 0;
}
atomic_long_set(&buf->consumed, 0);
atomic_set(&buf->record_disabled, 0);
int lib_ring_buffer_create(struct lib_ring_buffer *buf,
struct channel_backend *chanb, int cpu)
{
- const struct lib_ring_buffer_config *config = chanb->config;
+ const struct lib_ring_buffer_config *config = &chanb->config;
struct channel *chan = container_of(chanb, struct channel, backend);
void *priv = chanb->priv;
- unsigned int num_subbuf;
size_t subbuf_header_size;
u64 tsc;
int ret;
return ret;
buf->commit_hot =
- kzalloc_node(ALIGN(sizeof(*buf->commit_hot)
+ lttng_kvzalloc_node(ALIGN(sizeof(*buf->commit_hot)
* chan->backend.num_subbuf,
1 << INTERNODE_CACHE_SHIFT),
- GFP_KERNEL, cpu_to_node(max(cpu, 0)));
+ GFP_KERNEL | __GFP_NOWARN,
+ cpu_to_node(max(cpu, 0)));
if (!buf->commit_hot) {
ret = -ENOMEM;
goto free_chanbuf;
}
buf->commit_cold =
- kzalloc_node(ALIGN(sizeof(*buf->commit_cold)
+ lttng_kvzalloc_node(ALIGN(sizeof(*buf->commit_cold)
* chan->backend.num_subbuf,
1 << INTERNODE_CACHE_SHIFT),
- GFP_KERNEL, cpu_to_node(max(cpu, 0)));
+ GFP_KERNEL | __GFP_NOWARN,
+ cpu_to_node(max(cpu, 0)));
if (!buf->commit_cold) {
ret = -ENOMEM;
goto free_commit;
}
- num_subbuf = chan->backend.num_subbuf;
+ buf->ts_end =
+ lttng_kvzalloc_node(ALIGN(sizeof(*buf->ts_end)
+ * chan->backend.num_subbuf,
+ 1 << INTERNODE_CACHE_SHIFT),
+ GFP_KERNEL | __GFP_NOWARN,
+ cpu_to_node(max(cpu, 0)));
+ if (!buf->ts_end) {
+ ret = -ENOMEM;
+ goto free_commit_cold;
+ }
+
init_waitqueue_head(&buf->read_wait);
+ init_waitqueue_head(&buf->write_wait);
raw_spin_lock_init(&buf->raw_tick_nohz_spinlock);
/*
/* Error handling */
free_init:
- kfree(buf->commit_cold);
+ lttng_kvfree(buf->ts_end);
+free_commit_cold:
+ lttng_kvfree(buf->commit_cold);
free_commit:
- kfree(buf->commit_hot);
+ lttng_kvfree(buf->commit_hot);
free_chanbuf:
lib_ring_buffer_backend_free(&buf->backend);
return ret;
}
-static void switch_buffer_timer(unsigned long data)
+static void switch_buffer_timer(LTTNG_TIMER_FUNC_ARG_TYPE t)
{
- struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data;
+ struct lib_ring_buffer *buf = lttng_from_timer(buf, t, switch_timer);
struct channel *chan = buf->backend.chan;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
/*
* Only flush buffers periodically if readers are active.
lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE);
if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
- mod_timer_pinned(&buf->switch_timer,
+ lttng_mod_timer_pinned(&buf->switch_timer,
jiffies + chan->switch_timer_interval);
else
mod_timer(&buf->switch_timer,
static void lib_ring_buffer_start_switch_timer(struct lib_ring_buffer *buf)
{
struct channel *chan = buf->backend.chan;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+ unsigned int flags = 0;
if (!chan->switch_timer_interval || buf->switch_timer_enabled)
return;
- init_timer(&buf->switch_timer);
- buf->switch_timer.function = switch_buffer_timer;
+
+ if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
+ flags = LTTNG_TIMER_PINNED;
+
+ lttng_timer_setup(&buf->switch_timer, switch_buffer_timer, flags, buf);
buf->switch_timer.expires = jiffies + chan->switch_timer_interval;
- buf->switch_timer.data = (unsigned long)buf;
+
if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
add_timer_on(&buf->switch_timer, buf->backend.cpu);
else
add_timer(&buf->switch_timer);
+
buf->switch_timer_enabled = 1;
}
/*
* Polling timer to check the channels for data.
*/
-static void read_buffer_timer(unsigned long data)
+static void read_buffer_timer(LTTNG_TIMER_FUNC_ARG_TYPE t)
{
- struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data;
+ struct lib_ring_buffer *buf = lttng_from_timer(buf, t, read_timer);
struct channel *chan = buf->backend.chan;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
CHAN_WARN_ON(chan, !buf->backend.allocated);
}
if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
- mod_timer_pinned(&buf->read_timer,
+ lttng_mod_timer_pinned(&buf->read_timer,
jiffies + chan->read_timer_interval);
else
mod_timer(&buf->read_timer,
static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf)
{
struct channel *chan = buf->backend.chan;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+ unsigned int flags;
if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
|| !chan->read_timer_interval
|| buf->read_timer_enabled)
return;
- init_timer(&buf->read_timer);
- buf->read_timer.function = read_buffer_timer;
+ if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
+ flags = LTTNG_TIMER_PINNED;
+
+ lttng_timer_setup(&buf->read_timer, read_buffer_timer, flags, buf);
buf->read_timer.expires = jiffies + chan->read_timer_interval;
- buf->read_timer.data = (unsigned long)buf;
if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
add_timer_on(&buf->read_timer, buf->backend.cpu);
else
add_timer(&buf->read_timer);
+
buf->read_timer_enabled = 1;
}
static void lib_ring_buffer_stop_read_timer(struct lib_ring_buffer *buf)
{
struct channel *chan = buf->backend.chan;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
|| !chan->read_timer_interval
buf->read_timer_enabled = 0;
}
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+
+enum cpuhp_state lttng_rb_hp_prepare;
+enum cpuhp_state lttng_rb_hp_online;
+
+void lttng_rb_set_hp_prepare(enum cpuhp_state val)
+{
+ lttng_rb_hp_prepare = val;
+}
+EXPORT_SYMBOL_GPL(lttng_rb_set_hp_prepare);
+
+void lttng_rb_set_hp_online(enum cpuhp_state val)
+{
+ lttng_rb_hp_online = val;
+}
+EXPORT_SYMBOL_GPL(lttng_rb_set_hp_online);
+
+int lttng_cpuhp_rb_frontend_dead(unsigned int cpu,
+ struct lttng_cpuhp_node *node)
+{
+ struct channel *chan = container_of(node, struct channel,
+ cpuhp_prepare);
+ struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, cpu);
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+
+ CHAN_WARN_ON(chan, config->alloc == RING_BUFFER_ALLOC_GLOBAL);
+
+ /*
+ * Performing a buffer switch on a remote CPU. Performed by
+ * the CPU responsible for doing the hotunplug after the target
+ * CPU stopped running completely. Ensures that all data
+ * from that remote CPU is flushed.
+ */
+ lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(lttng_cpuhp_rb_frontend_dead);
+
+int lttng_cpuhp_rb_frontend_online(unsigned int cpu,
+ struct lttng_cpuhp_node *node)
+{
+ struct channel *chan = container_of(node, struct channel,
+ cpuhp_online);
+ struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, cpu);
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+
+ CHAN_WARN_ON(chan, config->alloc == RING_BUFFER_ALLOC_GLOBAL);
+
+ wake_up_interruptible(&chan->hp_wait);
+ lib_ring_buffer_start_switch_timer(buf);
+ lib_ring_buffer_start_read_timer(buf);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(lttng_cpuhp_rb_frontend_online);
+
+int lttng_cpuhp_rb_frontend_offline(unsigned int cpu,
+ struct lttng_cpuhp_node *node)
+{
+ struct channel *chan = container_of(node, struct channel,
+ cpuhp_online);
+ struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, cpu);
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+
+ CHAN_WARN_ON(chan, config->alloc == RING_BUFFER_ALLOC_GLOBAL);
+
+ lib_ring_buffer_stop_switch_timer(buf);
+ lib_ring_buffer_stop_read_timer(buf);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(lttng_cpuhp_rb_frontend_offline);
+
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
#ifdef CONFIG_HOTPLUG_CPU
+
/**
* lib_ring_buffer_cpu_hp_callback - CPU hotplug callback
* @nb: notifier block
* Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD)
*/
static
-int __cpuinit lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb,
+int lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb,
unsigned long action,
void *hcpu)
{
struct channel *chan = container_of(nb, struct channel,
cpu_hp_notifier);
struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, cpu);
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
if (!chan->cpu_hp_enable)
return NOTIFY_DONE;
case CPU_DOWN_FAILED_FROZEN:
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
+ wake_up_interruptible(&chan->hp_wait);
lib_ring_buffer_start_switch_timer(buf);
lib_ring_buffer_start_read_timer(buf);
return NOTIFY_OK;
return NOTIFY_DONE;
}
}
+
#endif
-#ifdef CONFIG_NO_HZ
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_LIB_RING_BUFFER)
/*
* For per-cpu buffers, call the reader wakeups before switching the buffer, so
* that wake-up-tracing generated events are flushed before going idle (in
{
struct channel *chan = container_of(nb, struct channel,
tick_nohz_notifier);
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
struct lib_ring_buffer *buf;
int cpu = smp_processor_id();
raw_spin_unlock(&buf->raw_tick_nohz_spinlock);
break;
case TICK_NOHZ_STOP:
- spin_lock(&__get_cpu_var(ring_buffer_nohz_lock));
+ spin_lock(lttng_this_cpu_ptr(&ring_buffer_nohz_lock));
lib_ring_buffer_stop_switch_timer(buf);
lib_ring_buffer_stop_read_timer(buf);
- spin_unlock(&__get_cpu_var(ring_buffer_nohz_lock));
+ spin_unlock(lttng_this_cpu_ptr(&ring_buffer_nohz_lock));
break;
case TICK_NOHZ_RESTART:
- spin_lock(&__get_cpu_var(ring_buffer_nohz_lock));
+ spin_lock(lttng_this_cpu_ptr(&ring_buffer_nohz_lock));
lib_ring_buffer_start_read_timer(buf);
lib_ring_buffer_start_switch_timer(buf);
- spin_unlock(&__get_cpu_var(ring_buffer_nohz_lock));
+ spin_unlock(lttng_this_cpu_ptr(&ring_buffer_nohz_lock));
break;
}
atomic_notifier_call_chain(&tick_nohz_notifier, TICK_NOHZ_RESTART,
NULL);
}
-#endif /* CONFIG_NO_HZ */
+#endif /* defined(CONFIG_NO_HZ) && defined(CONFIG_LIB_RING_BUFFER) */
/*
* Holds CPU hotplug.
*/
static void channel_unregister_notifiers(struct channel *chan)
{
- const struct lib_ring_buffer_config *config = chan->backend.config;
- int cpu;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
channel_iterator_unregister_notifiers(chan);
if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
* concurrency.
*/
#endif /* CONFIG_NO_HZ */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+ {
+ int ret;
+
+ ret = cpuhp_state_remove_instance(lttng_rb_hp_online,
+ &chan->cpuhp_online.node);
+ WARN_ON(ret);
+ ret = cpuhp_state_remove_instance_nocalls(lttng_rb_hp_prepare,
+ &chan->cpuhp_prepare.node);
+ WARN_ON(ret);
+ }
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+ {
+ int cpu;
+
#ifdef CONFIG_HOTPLUG_CPU
+ get_online_cpus();
+ chan->cpu_hp_enable = 0;
+ for_each_online_cpu(cpu) {
+ struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
+ cpu);
+ lib_ring_buffer_stop_switch_timer(buf);
+ lib_ring_buffer_stop_read_timer(buf);
+ }
+ put_online_cpus();
+ unregister_cpu_notifier(&chan->cpu_hp_notifier);
+#else
+ for_each_possible_cpu(cpu) {
+ struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
+ cpu);
+ lib_ring_buffer_stop_switch_timer(buf);
+ lib_ring_buffer_stop_read_timer(buf);
+ }
+#endif
+ }
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+ } else {
+ struct lib_ring_buffer *buf = chan->backend.buf;
+
+ lib_ring_buffer_stop_switch_timer(buf);
+ lib_ring_buffer_stop_read_timer(buf);
+ }
+ channel_backend_unregister_notifiers(&chan->backend);
+}
+
+static void lib_ring_buffer_set_quiescent(struct lib_ring_buffer *buf)
+{
+ if (!buf->quiescent) {
+ buf->quiescent = true;
+ _lib_ring_buffer_switch_remote(buf, SWITCH_FLUSH);
+ }
+}
+
+static void lib_ring_buffer_clear_quiescent(struct lib_ring_buffer *buf)
+{
+ buf->quiescent = false;
+}
+
+void lib_ring_buffer_set_quiescent_channel(struct channel *chan)
+{
+ int cpu;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+
+ if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
get_online_cpus();
- chan->cpu_hp_enable = 0;
- for_each_online_cpu(cpu) {
+ for_each_channel_cpu(cpu, chan) {
struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
cpu);
- lib_ring_buffer_stop_switch_timer(buf);
- lib_ring_buffer_stop_read_timer(buf);
+
+ lib_ring_buffer_set_quiescent(buf);
}
put_online_cpus();
- unregister_cpu_notifier(&chan->cpu_hp_notifier);
-#else
- for_each_possible_cpu(cpu) {
+ } else {
+ struct lib_ring_buffer *buf = chan->backend.buf;
+
+ lib_ring_buffer_set_quiescent(buf);
+ }
+}
+EXPORT_SYMBOL_GPL(lib_ring_buffer_set_quiescent_channel);
+
+void lib_ring_buffer_clear_quiescent_channel(struct channel *chan)
+{
+ int cpu;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+
+ if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
+ get_online_cpus();
+ for_each_channel_cpu(cpu, chan) {
struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
cpu);
- lib_ring_buffer_stop_switch_timer(buf);
- lib_ring_buffer_stop_read_timer(buf);
+
+ lib_ring_buffer_clear_quiescent(buf);
}
-#endif
+ put_online_cpus();
} else {
struct lib_ring_buffer *buf = chan->backend.buf;
- lib_ring_buffer_stop_switch_timer(buf);
- lib_ring_buffer_stop_read_timer(buf);
+ lib_ring_buffer_clear_quiescent(buf);
}
- channel_backend_unregister_notifiers(&chan->backend);
}
+EXPORT_SYMBOL_GPL(lib_ring_buffer_clear_quiescent_channel);
static void channel_free(struct channel *chan)
{
+ if (chan->backend.release_priv_ops) {
+ chan->backend.release_priv_ops(chan->backend.priv_ops);
+ }
channel_iterator_free(chan);
channel_backend_free(&chan->backend);
kfree(chan);
size_t num_subbuf, unsigned int switch_timer_interval,
unsigned int read_timer_interval)
{
- int ret, cpu;
+ int ret;
struct channel *chan;
if (lib_ring_buffer_check_config(config, switch_timer_interval,
chan->commit_count_mask = (~0UL >> chan->backend.num_subbuf_order);
chan->switch_timer_interval = usecs_to_jiffies(switch_timer_interval);
chan->read_timer_interval = usecs_to_jiffies(read_timer_interval);
+ kref_init(&chan->ref);
init_waitqueue_head(&chan->read_wait);
+ init_waitqueue_head(&chan->hp_wait);
if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
-#ifdef CONFIG_NO_HZ
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+ chan->cpuhp_prepare.component = LTTNG_RING_BUFFER_FRONTEND;
+ ret = cpuhp_state_add_instance_nocalls(lttng_rb_hp_prepare,
+ &chan->cpuhp_prepare.node);
+ if (ret)
+ goto cpuhp_prepare_error;
+
+ chan->cpuhp_online.component = LTTNG_RING_BUFFER_FRONTEND;
+ ret = cpuhp_state_add_instance(lttng_rb_hp_online,
+ &chan->cpuhp_online.node);
+ if (ret)
+ goto cpuhp_online_error;
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+ {
+ int cpu;
+ /*
+ * In case of non-hotplug cpu, if the ring-buffer is allocated
+ * in early initcall, it will not be notified of secondary cpus.
+ * In that off case, we need to allocate for all possible cpus.
+ */
+#ifdef CONFIG_HOTPLUG_CPU
+ chan->cpu_hp_notifier.notifier_call =
+ lib_ring_buffer_cpu_hp_callback;
+ chan->cpu_hp_notifier.priority = 6;
+ register_cpu_notifier(&chan->cpu_hp_notifier);
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
+ cpu);
+ spin_lock(&per_cpu(ring_buffer_nohz_lock, cpu));
+ lib_ring_buffer_start_switch_timer(buf);
+ lib_ring_buffer_start_read_timer(buf);
+ spin_unlock(&per_cpu(ring_buffer_nohz_lock, cpu));
+ }
+ chan->cpu_hp_enable = 1;
+ put_online_cpus();
+#else
+ for_each_possible_cpu(cpu) {
+ struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
+ cpu);
+ spin_lock(&per_cpu(ring_buffer_nohz_lock, cpu));
+ lib_ring_buffer_start_switch_timer(buf);
+ lib_ring_buffer_start_read_timer(buf);
+ spin_unlock(&per_cpu(ring_buffer_nohz_lock, cpu));
+ }
+#endif
+ }
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_LIB_RING_BUFFER)
/* Only benefit from NO_HZ idle with per-cpu buffers for now. */
chan->tick_nohz_notifier.notifier_call =
ring_buffer_tick_nohz_callback;
chan->tick_nohz_notifier.priority = ~0U;
atomic_notifier_chain_register(&tick_nohz_notifier,
&chan->tick_nohz_notifier);
-#endif /* CONFIG_NO_HZ */
-
- /*
- * In case of non-hotplug cpu, if the ring-buffer is allocated
- * in early initcall, it will not be notified of secondary cpus.
- * In that off case, we need to allocate for all possible cpus.
- */
-#ifdef CONFIG_HOTPLUG_CPU
- chan->cpu_hp_notifier.notifier_call =
- lib_ring_buffer_cpu_hp_callback;
- chan->cpu_hp_notifier.priority = 6;
- register_cpu_notifier(&chan->cpu_hp_notifier);
+#endif /* defined(CONFIG_NO_HZ) && defined(CONFIG_LIB_RING_BUFFER) */
- get_online_cpus();
- for_each_online_cpu(cpu) {
- struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
- cpu);
- spin_lock(&per_cpu(ring_buffer_nohz_lock, cpu));
- lib_ring_buffer_start_switch_timer(buf);
- lib_ring_buffer_start_read_timer(buf);
- spin_unlock(&per_cpu(ring_buffer_nohz_lock, cpu));
- }
- chan->cpu_hp_enable = 1;
- put_online_cpus();
-#else
- for_each_possible_cpu(cpu) {
- struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
- cpu);
- spin_lock(&per_cpu(ring_buffer_nohz_lock, cpu));
- lib_ring_buffer_start_switch_timer(buf);
- lib_ring_buffer_start_read_timer(buf);
- spin_unlock(&per_cpu(ring_buffer_nohz_lock, cpu));
- }
-#endif
} else {
struct lib_ring_buffer *buf = chan->backend.buf;
return chan;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+cpuhp_online_error:
+ ret = cpuhp_state_remove_instance_nocalls(lttng_rb_hp_prepare,
+ &chan->cpuhp_prepare.node);
+ WARN_ON(ret);
+cpuhp_prepare_error:
+#endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
error_free_backend:
channel_backend_free(&chan->backend);
error:
}
EXPORT_SYMBOL_GPL(channel_create);
+static
+void channel_release(struct kref *kref)
+{
+ struct channel *chan = container_of(kref, struct channel, ref);
+ channel_free(chan);
+}
+
/**
* channel_destroy - Finalize, wait for q.s. and destroy channel.
* @chan: channel to destroy
*
* Holds cpu hotplug.
- * Call "destroy" callback, finalize channels, wait for readers to release their
- * reference, then destroy ring buffer data. Note that when readers have
- * completed data consumption of finalized channels, get_subbuf() will return
- * -ENODATA. They should release their handle at that point.
- * Returns the private data pointer.
+ * Call "destroy" callback, finalize channels, and then decrement the
+ * channel reference count. Note that when readers have completed data
+ * consumption of finalized channels, get_subbuf() will return -ENODATA.
+ * They should release their handle at that point. Returns the private
+ * data pointer.
*/
void *channel_destroy(struct channel *chan)
{
int cpu;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
void *priv;
channel_unregister_notifiers(chan);
chan->backend.priv,
cpu);
if (buf->backend.allocated)
- lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH);
+ lib_ring_buffer_set_quiescent(buf);
/*
* Perform flush before writing to finalized.
*/
smp_wmb();
- ACCESS_ONCE(buf->finalized) = 1;
+ WRITE_ONCE(buf->finalized, 1);
wake_up_interruptible(&buf->read_wait);
}
} else {
if (config->cb.buffer_finalize)
config->cb.buffer_finalize(buf, chan->backend.priv, -1);
if (buf->backend.allocated)
- lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH);
+ lib_ring_buffer_set_quiescent(buf);
/*
* Perform flush before writing to finalized.
*/
smp_wmb();
- ACCESS_ONCE(buf->finalized) = 1;
+ WRITE_ONCE(buf->finalized, 1);
wake_up_interruptible(&buf->read_wait);
}
+ WRITE_ONCE(chan->finalized, 1);
+ wake_up_interruptible(&chan->hp_wait);
wake_up_interruptible(&chan->read_wait);
-
- while (atomic_long_read(&chan->read_ref) > 0)
- msleep(100);
- /* Finish waiting for refcount before free */
- smp_mb();
priv = chan->backend.priv;
- channel_free(chan);
+ kref_put(&chan->ref, channel_release);
return priv;
}
EXPORT_SYMBOL_GPL(channel_destroy);
if (!atomic_long_add_unless(&buf->active_readers, 1, 1))
return -EBUSY;
- atomic_long_inc(&chan->read_ref);
- smp_mb__after_atomic_inc();
+ if (!lttng_kref_get(&chan->ref)) {
+ atomic_long_dec(&buf->active_readers);
+ return -EOVERFLOW;
+ }
+ lttng_smp_mb__after_atomic();
return 0;
}
EXPORT_SYMBOL_GPL(lib_ring_buffer_open_read);
struct channel *chan = buf->backend.chan;
CHAN_WARN_ON(chan, atomic_long_read(&buf->active_readers) != 1);
- smp_mb__before_atomic_dec();
- atomic_long_dec(&chan->read_ref);
+ lttng_smp_mb__before_atomic();
atomic_long_dec(&buf->active_readers);
+ kref_put(&chan->ref, channel_release);
}
EXPORT_SYMBOL_GPL(lib_ring_buffer_release_read);
unsigned long *consumed, unsigned long *produced)
{
struct channel *chan = buf->backend.chan;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
unsigned long consumed_cur, write_offset;
int finalized;
retry:
- finalized = ACCESS_ONCE(buf->finalized);
+ finalized = READ_ONCE(buf->finalized);
/*
* Read finalized before counters.
*/
/**
* lib_ring_buffer_put_snapshot - move consumed counter forward
+ *
+ * Should only be called from consumer context.
* @buf: ring buffer
* @consumed_new: new consumed count value
*/
while ((long) consumed - (long) consumed_new < 0)
consumed = atomic_long_cmpxchg(&buf->consumed, consumed,
consumed_new);
+ /* Wake-up the metadata producer */
+ wake_up_interruptible(&buf->write_wait);
}
EXPORT_SYMBOL_GPL(lib_ring_buffer_move_consumer);
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+static void lib_ring_buffer_flush_read_subbuf_dcache(
+ const struct lib_ring_buffer_config *config,
+ struct channel *chan,
+ struct lib_ring_buffer *buf)
+{
+ struct lib_ring_buffer_backend_pages *pages;
+ unsigned long sb_bindex, id, i, nr_pages;
+
+ if (config->output != RING_BUFFER_MMAP)
+ return;
+
+ /*
+ * Architectures with caches aliased on virtual addresses may
+ * use different cache lines for the linear mapping vs
+ * user-space memory mapping. Given that the ring buffer is
+ * based on the kernel linear mapping, aligning it with the
+ * user-space mapping is not straightforward, and would require
+ * extra TLB entries. Therefore, simply flush the dcache for the
+ * entire sub-buffer before reading it.
+ */
+ id = buf->backend.buf_rsb.id;
+ sb_bindex = subbuffer_id_get_index(config, id);
+ pages = buf->backend.array[sb_bindex];
+ nr_pages = buf->backend.num_pages_per_subbuf;
+ for (i = 0; i < nr_pages; i++) {
+ struct lib_ring_buffer_backend_page *backend_page;
+
+ backend_page = &pages->p[i];
+ flush_dcache_page(pfn_to_page(backend_page->pfn));
+ }
+}
+#else
+static void lib_ring_buffer_flush_read_subbuf_dcache(
+ const struct lib_ring_buffer_config *config,
+ struct channel *chan,
+ struct lib_ring_buffer *buf)
+{
+}
+#endif
+
/**
* lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading
* @buf: ring buffer
unsigned long consumed)
{
struct channel *chan = buf->backend.chan;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
unsigned long consumed_cur, consumed_idx, commit_count, write_offset;
int ret;
int finalized;
+ if (buf->get_subbuf) {
+ /*
+ * Reader is trying to get a subbuffer twice.
+ */
+ CHAN_WARN_ON(chan, 1);
+ return -EBUSY;
+ }
retry:
- finalized = ACCESS_ONCE(buf->finalized);
+ finalized = READ_ONCE(buf->finalized);
/*
* Read finalized before counters.
*/
*/
if (((commit_count - chan->backend.subbuf_size)
& chan->commit_count_mask)
- - (buf_trunc(consumed_cur, chan)
+ - (buf_trunc(consumed, chan)
>> chan->backend.num_subbuf_order)
!= 0)
goto nodata;
* Check that we are not about to read the same subbuffer in
* which the writer head is.
*/
- if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
+ if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed, chan)
== 0)
goto nodata;
buf->get_subbuf_consumed = consumed;
buf->get_subbuf = 1;
+ lib_ring_buffer_flush_read_subbuf_dcache(config, chan, buf);
+
return 0;
nodata:
{
struct lib_ring_buffer_backend *bufb = &buf->backend;
struct channel *chan = bufb->chan;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
unsigned long read_sb_bindex, consumed_idx, consumed;
CHAN_WARN_ON(chan, atomic_long_read(&buf->active_readers) != 1);
unsigned long cons_offset,
int cpu)
{
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
unsigned long cons_idx, commit_count, commit_count_sb;
cons_idx = subbuf_index(cons_offset, chan);
struct channel *chan,
void *priv, int cpu)
{
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
unsigned long write_offset, cons_offset;
- /*
- * Can be called in the error path of allocation when
- * trans_channel_data is not yet set.
- */
- if (!chan)
- return;
/*
* No need to order commit_count, write_offset and cons_offset reads
* because we execute at teardown when no more writer nor reader
write_offset = v_read(config, &buf->offset);
cons_offset = atomic_long_read(&buf->consumed);
if (write_offset != cons_offset)
- printk(KERN_WARNING
+ printk(KERN_DEBUG
"ring buffer %s, cpu %d: "
"non-consumed data\n"
" [ %lu bytes written, %lu bytes read ]\n",
void lib_ring_buffer_print_errors(struct channel *chan,
struct lib_ring_buffer *buf, int cpu)
{
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
void *priv = chan->backend.priv;
- printk(KERN_DEBUG "ring buffer %s, cpu %d: %lu records written, "
- "%lu records overrun\n",
- chan->backend.name, cpu,
- v_read(config, &buf->records_count),
- v_read(config, &buf->records_overrun));
-
- if (v_read(config, &buf->records_lost_full)
- || v_read(config, &buf->records_lost_wrap)
- || v_read(config, &buf->records_lost_big))
- printk(KERN_WARNING
- "ring buffer %s, cpu %d: records were lost. Caused by:\n"
- " [ %lu buffer full, %lu nest buffer wrap-around, "
- "%lu event too big ]\n",
- chan->backend.name, cpu,
- v_read(config, &buf->records_lost_full),
- v_read(config, &buf->records_lost_wrap),
- v_read(config, &buf->records_lost_big));
-
+ if (!strcmp(chan->backend.name, "relay-metadata")) {
+ printk(KERN_DEBUG "ring buffer %s: %lu records written, "
+ "%lu records overrun\n",
+ chan->backend.name,
+ v_read(config, &buf->records_count),
+ v_read(config, &buf->records_overrun));
+ } else {
+ printk(KERN_DEBUG "ring buffer %s, cpu %d: %lu records written, "
+ "%lu records overrun\n",
+ chan->backend.name, cpu,
+ v_read(config, &buf->records_count),
+ v_read(config, &buf->records_overrun));
+
+ if (v_read(config, &buf->records_lost_full)
+ || v_read(config, &buf->records_lost_wrap)
+ || v_read(config, &buf->records_lost_big))
+ printk(KERN_WARNING
+ "ring buffer %s, cpu %d: records were lost. Caused by:\n"
+ " [ %lu buffer full, %lu nest buffer wrap-around, "
+ "%lu event too big ]\n",
+ chan->backend.name, cpu,
+ v_read(config, &buf->records_lost_full),
+ v_read(config, &buf->records_lost_wrap),
+ v_read(config, &buf->records_lost_big));
+ }
lib_ring_buffer_print_buffer_errors(buf, chan, priv, cpu);
}
struct switch_offsets *offsets,
u64 tsc)
{
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
unsigned long oldidx = subbuf_index(offsets->old, chan);
unsigned long commit_count;
+ struct commit_counters_hot *cc_hot;
config->cb.buffer_begin(buf, tsc, oldidx);
barrier();
} else
smp_wmb();
- v_add(config, config->cb.subbuffer_header_size(),
- &buf->commit_hot[oldidx].cc);
- commit_count = v_read(config, &buf->commit_hot[oldidx].cc);
+ cc_hot = &buf->commit_hot[oldidx];
+ v_add(config, config->cb.subbuffer_header_size(), &cc_hot->cc);
+ commit_count = v_read(config, &cc_hot->cc);
/* Check if the written buffer has to be delivered */
lib_ring_buffer_check_deliver(config, buf, chan, offsets->old,
- commit_count, oldidx);
- lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
- offsets->old, commit_count,
- config->cb.subbuffer_header_size());
+ commit_count, oldidx, tsc);
+ lib_ring_buffer_write_commit_counter(config, buf, chan,
+ offsets->old + config->cb.subbuffer_header_size(),
+ commit_count, cc_hot);
}
/*
struct switch_offsets *offsets,
u64 tsc)
{
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
unsigned long oldidx = subbuf_index(offsets->old - 1, chan);
unsigned long commit_count, padding_size, data_size;
+ struct commit_counters_hot *cc_hot;
+ u64 *ts_end;
data_size = subbuf_offset(offsets->old - 1, chan) + 1;
padding_size = chan->backend.subbuf_size - data_size;
subbuffer_set_data_size(config, &buf->backend, oldidx, data_size);
+ ts_end = &buf->ts_end[oldidx];
/*
- * Order all writes to buffer before the commit count update that will
- * determine that the subbuffer is full.
+ * This is the last space reservation in that sub-buffer before
+ * it gets delivered. This provides exclusive access to write to
+ * this sub-buffer's ts_end. There are also no concurrent
+ * readers of that ts_end because delivery of that sub-buffer is
+ * postponed until the commit counter is incremented for the
+ * current space reservation.
+ */
+ *ts_end = tsc;
+
+ /*
+ * Order all writes to buffer and store to ts_end before the commit
+ * count update that will determine that the subbuffer is full.
*/
if (config->ipi == RING_BUFFER_IPI_BARRIER) {
/*
barrier();
} else
smp_wmb();
- v_add(config, padding_size, &buf->commit_hot[oldidx].cc);
- commit_count = v_read(config, &buf->commit_hot[oldidx].cc);
+ cc_hot = &buf->commit_hot[oldidx];
+ v_add(config, padding_size, &cc_hot->cc);
+ commit_count = v_read(config, &cc_hot->cc);
lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1,
- commit_count, oldidx);
- lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
- offsets->old, commit_count,
- padding_size);
+ commit_count, oldidx, tsc);
+ lib_ring_buffer_write_commit_counter(config, buf, chan,
+ offsets->old + padding_size, commit_count,
+ cc_hot);
}
/*
struct switch_offsets *offsets,
u64 tsc)
{
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
unsigned long beginidx = subbuf_index(offsets->begin, chan);
unsigned long commit_count;
+ struct commit_counters_hot *cc_hot;
config->cb.buffer_begin(buf, tsc, beginidx);
barrier();
} else
smp_wmb();
- v_add(config, config->cb.subbuffer_header_size(),
- &buf->commit_hot[beginidx].cc);
- commit_count = v_read(config, &buf->commit_hot[beginidx].cc);
+ cc_hot = &buf->commit_hot[beginidx];
+ v_add(config, config->cb.subbuffer_header_size(), &cc_hot->cc);
+ commit_count = v_read(config, &cc_hot->cc);
/* Check if the written buffer has to be delivered */
lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin,
- commit_count, beginidx);
- lib_ring_buffer_write_commit_counter(config, buf, chan, beginidx,
- offsets->begin, commit_count,
- config->cb.subbuffer_header_size());
+ commit_count, beginidx, tsc);
+ lib_ring_buffer_write_commit_counter(config, buf, chan,
+ offsets->begin + config->cb.subbuffer_header_size(),
+ commit_count, cc_hot);
}
/*
* lib_ring_buffer_switch_new_end: finish switching current subbuffer
*
- * The only remaining threads could be the ones with pending commits. They will
- * have to do the deliver themselves.
+ * Calls subbuffer_set_data_size() to set the data size of the current
+ * sub-buffer. We do not need to perform check_deliver nor commit here,
+ * since this task will be done by the "commit" of the event for which
+ * we are currently doing the space reservation.
*/
static
void lib_ring_buffer_switch_new_end(struct lib_ring_buffer *buf,
struct switch_offsets *offsets,
u64 tsc)
{
- const struct lib_ring_buffer_config *config = chan->backend.config;
- unsigned long endidx = subbuf_index(offsets->end - 1, chan);
- unsigned long commit_count, padding_size, data_size;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+ unsigned long endidx, data_size;
+ u64 *ts_end;
+ endidx = subbuf_index(offsets->end - 1, chan);
data_size = subbuf_offset(offsets->end - 1, chan) + 1;
- padding_size = chan->backend.subbuf_size - data_size;
subbuffer_set_data_size(config, &buf->backend, endidx, data_size);
-
+ ts_end = &buf->ts_end[endidx];
/*
- * Order all writes to buffer before the commit count update that will
- * determine that the subbuffer is full.
+ * This is the last space reservation in that sub-buffer before
+ * it gets delivered. This provides exclusive access to write to
+ * this sub-buffer's ts_end. There are also no concurrent
+ * readers of that ts_end because delivery of that sub-buffer is
+ * postponed until the commit counter is incremented for the
+ * current space reservation.
*/
- if (config->ipi == RING_BUFFER_IPI_BARRIER) {
- /*
- * Must write slot data before incrementing commit count. This
- * compiler barrier is upgraded into a smp_mb() by the IPI sent
- * by get_subbuf().
- */
- barrier();
- } else
- smp_wmb();
- v_add(config, padding_size, &buf->commit_hot[endidx].cc);
- commit_count = v_read(config, &buf->commit_hot[endidx].cc);
- lib_ring_buffer_check_deliver(config, buf, chan, offsets->end - 1,
- commit_count, endidx);
- lib_ring_buffer_write_commit_counter(config, buf, chan, endidx,
- offsets->end, commit_count,
- padding_size);
+ *ts_end = tsc;
}
/*
struct switch_offsets *offsets,
u64 *tsc)
{
- const struct lib_ring_buffer_config *config = chan->backend.config;
- unsigned long off;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+ unsigned long off, reserve_commit_diff;
offsets->begin = v_read(config, &buf->offset);
offsets->old = offsets->begin;
* (records and header timestamps) are visible to the reader. This is
* required for quiescence guarantees for the fusion merge.
*/
- if (mode == SWITCH_FLUSH || off > 0) {
- if (unlikely(off == 0)) {
- /*
- * The client does not save any header information.
- * Don't switch empty subbuffer on finalize, because it
- * is invalid to deliver a completely empty subbuffer.
- */
- if (!config->cb.subbuffer_header_size())
+ if (mode != SWITCH_FLUSH && !off)
+ return -1; /* we do not have to switch : buffer is empty */
+
+ if (unlikely(off == 0)) {
+ unsigned long sb_index, commit_count;
+
+ /*
+ * We are performing a SWITCH_FLUSH. At this stage, there are no
+ * concurrent writes into the buffer.
+ *
+ * The client does not save any header information. Don't
+ * switch empty subbuffer on finalize, because it is invalid to
+ * deliver a completely empty subbuffer.
+ */
+ if (!config->cb.subbuffer_header_size())
+ return -1;
+
+ /* Test new buffer integrity */
+ sb_index = subbuf_index(offsets->begin, chan);
+ commit_count = v_read(config,
+ &buf->commit_cold[sb_index].cc_sb);
+ reserve_commit_diff =
+ (buf_trunc(offsets->begin, chan)
+ >> chan->backend.num_subbuf_order)
+ - (commit_count & chan->commit_count_mask);
+ if (likely(reserve_commit_diff == 0)) {
+ /* Next subbuffer not being written to. */
+ if (unlikely(config->mode != RING_BUFFER_OVERWRITE &&
+ subbuf_trunc(offsets->begin, chan)
+ - subbuf_trunc((unsigned long)
+ atomic_long_read(&buf->consumed), chan)
+ >= chan->backend.buf_size)) {
+ /*
+ * We do not overwrite non consumed buffers
+ * and we are full : don't switch.
+ */
return -1;
+ } else {
+ /*
+ * Next subbuffer not being written to, and we
+ * are either in overwrite mode or the buffer is
+ * not full. It's safe to write in this new
+ * subbuffer.
+ */
+ }
+ } else {
/*
- * Need to write the subbuffer start header on finalize.
+ * Next subbuffer reserve offset does not match the
+ * commit offset. Don't perform switch in
+ * producer-consumer and overwrite mode. Caused by
+ * either a writer OOPS or too many nested writes over a
+ * reserve/commit pair.
*/
- offsets->switch_old_start = 1;
+ return -1;
}
- offsets->begin = subbuf_align(offsets->begin, chan);
- } else
- return -1; /* we do not have to switch : buffer is empty */
+
+ /*
+ * Need to write the subbuffer start header on finalize.
+ */
+ offsets->switch_old_start = 1;
+ }
+ offsets->begin = subbuf_align(offsets->begin, chan);
/* Note: old points to the next subbuf at offset 0 */
offsets->end = offsets->begin;
return 0;
void lib_ring_buffer_switch_slow(struct lib_ring_buffer *buf, enum switch_mode mode)
{
struct channel *chan = buf->backend.chan;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
struct switch_offsets offsets;
unsigned long oldidx;
u64 tsc;
}
EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_slow);
+struct switch_param {
+ struct lib_ring_buffer *buf;
+ enum switch_mode mode;
+};
+
+static void remote_switch(void *info)
+{
+ struct switch_param *param = info;
+ struct lib_ring_buffer *buf = param->buf;
+
+ lib_ring_buffer_switch_slow(buf, param->mode);
+}
+
+static void _lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf,
+ enum switch_mode mode)
+{
+ struct channel *chan = buf->backend.chan;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+ int ret;
+ struct switch_param param;
+
+ /*
+ * With global synchronization we don't need to use the IPI scheme.
+ */
+ if (config->sync == RING_BUFFER_SYNC_GLOBAL) {
+ lib_ring_buffer_switch_slow(buf, mode);
+ return;
+ }
+
+ /*
+ * Disabling preemption ensures two things: first, that the
+ * target cpu is not taken concurrently offline while we are within
+ * smp_call_function_single(). Secondly, if it happens that the
+ * CPU is not online, our own call to lib_ring_buffer_switch_slow()
+ * needs to be protected from CPU hotplug handlers, which can
+ * also perform a remote subbuffer switch.
+ */
+ preempt_disable();
+ param.buf = buf;
+ param.mode = mode;
+ ret = smp_call_function_single(buf->backend.cpu,
+ remote_switch, ¶m, 1);
+ if (ret) {
+ /* Remote CPU is offline, do it ourself. */
+ lib_ring_buffer_switch_slow(buf, mode);
+ }
+ preempt_enable();
+}
+
+/* Switch sub-buffer if current sub-buffer is non-empty. */
+void lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf)
+{
+ _lib_ring_buffer_switch_remote(buf, SWITCH_ACTIVE);
+}
+EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_remote);
+
+/* Switch sub-buffer even if current sub-buffer is empty. */
+void lib_ring_buffer_switch_remote_empty(struct lib_ring_buffer *buf)
+{
+ _lib_ring_buffer_switch_remote(buf, SWITCH_FLUSH);
+}
+EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_remote_empty);
+
/*
* Returns :
* 0 if ok
- * !0 if execution must be aborted.
+ * -ENOSPC if event size is too large for packet.
+ * -ENOBUFS if there is currently not enough space in buffer for the event.
+ * -EIO if data cannot be written into the buffer for any other reason.
*/
static
int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf,
struct switch_offsets *offsets,
struct lib_ring_buffer_ctx *ctx)
{
- const struct lib_ring_buffer_config *config = chan->backend.config;
- unsigned long reserve_commit_diff;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+ unsigned long reserve_commit_diff, offset_cmp;
- offsets->begin = v_read(config, &buf->offset);
+retry:
+ offsets->begin = offset_cmp = v_read(config, &buf->offset);
offsets->old = offsets->begin;
offsets->switch_new_start = 0;
offsets->switch_new_end = 0;
offsets->pre_header_padding = 0;
ctx->tsc = config->cb.ring_buffer_clock_read(chan);
+ if ((int64_t) ctx->tsc == -EIO)
+ return -EIO;
if (last_tsc_overflow(config, buf, ctx->tsc))
- ctx->rflags = RING_BUFFER_RFLAG_FULL_TSC;
+ ctx->rflags |= RING_BUFFER_RFLAG_FULL_TSC;
if (unlikely(subbuf_offset(offsets->begin, ctx->chan) == 0)) {
offsets->switch_new_start = 1; /* For offsets->begin */
} else {
offsets->size = config->cb.record_header_size(config, chan,
offsets->begin,
- ctx->data_size,
&offsets->pre_header_padding,
- ctx->rflags, ctx);
+ ctx);
offsets->size +=
lib_ring_buffer_align(offsets->begin + offsets->size,
ctx->largest_align)
}
}
if (unlikely(offsets->switch_new_start)) {
- unsigned long sb_index;
+ unsigned long sb_index, commit_count;
/*
* We are typically not filling the previous buffer completely.
+ config->cb.subbuffer_header_size();
/* Test new buffer integrity */
sb_index = subbuf_index(offsets->begin, chan);
+ /*
+ * Read buf->offset before buf->commit_cold[sb_index].cc_sb.
+ * lib_ring_buffer_check_deliver() has the matching
+ * memory barriers required around commit_cold cc_sb
+ * updates to ensure reserve and commit counter updates
+ * are not seen reordered when updated by another CPU.
+ */
+ smp_rmb();
+ commit_count = v_read(config,
+ &buf->commit_cold[sb_index].cc_sb);
+ /* Read buf->commit_cold[sb_index].cc_sb before buf->offset. */
+ smp_rmb();
+ if (unlikely(offset_cmp != v_read(config, &buf->offset))) {
+ /*
+ * The reserve counter have been concurrently updated
+ * while we read the commit counter. This means the
+ * commit counter we read might not match buf->offset
+ * due to concurrent update. We therefore need to retry.
+ */
+ goto retry;
+ }
reserve_commit_diff =
(buf_trunc(offsets->begin, chan)
>> chan->backend.num_subbuf_order)
- - ((unsigned long) v_read(config,
- &buf->commit_cold[sb_index].cc_sb)
- & chan->commit_count_mask);
+ - (commit_count & chan->commit_count_mask);
if (likely(reserve_commit_diff == 0)) {
/* Next subbuffer not being written to. */
if (unlikely(config->mode != RING_BUFFER_OVERWRITE &&
* and we are full : record is lost.
*/
v_inc(config, &buf->records_lost_full);
- return -1;
+ return -ENOBUFS;
} else {
/*
* Next subbuffer not being written to, and we
} else {
/*
* Next subbuffer reserve offset does not match the
- * commit offset. Drop record in producer-consumer and
- * overwrite mode. Caused by either a writer OOPS or too
- * many nested writes over a reserve/commit pair.
+ * commit offset, and this did not involve update to the
+ * reserve counter. Drop record in producer-consumer and
+ * overwrite mode. Caused by either a writer OOPS or
+ * too many nested writes over a reserve/commit pair.
*/
v_inc(config, &buf->records_lost_wrap);
- return -1;
+ return -EIO;
}
offsets->size =
config->cb.record_header_size(config, chan,
offsets->begin,
- ctx->data_size,
&offsets->pre_header_padding,
- ctx->rflags, ctx);
+ ctx);
offsets->size +=
lib_ring_buffer_align(offsets->begin + offsets->size,
ctx->largest_align)
* complete the sub-buffer switch.
*/
v_inc(config, &buf->records_lost_big);
- return -1;
+ return -ENOSPC;
} else {
/*
* We just made a successful buffer switch and the
return 0;
}
+static struct lib_ring_buffer *get_current_buf(struct channel *chan, int cpu)
+{
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+
+ if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
+ return per_cpu_ptr(chan->backend.buf, cpu);
+ else
+ return chan->backend.buf;
+}
+
+void lib_ring_buffer_lost_event_too_big(struct channel *chan)
+{
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
+ struct lib_ring_buffer *buf = get_current_buf(chan, smp_processor_id());
+
+ v_inc(config, &buf->records_lost_big);
+}
+EXPORT_SYMBOL_GPL(lib_ring_buffer_lost_event_too_big);
+
/**
* lib_ring_buffer_reserve_slow - Atomic slot reservation in a buffer.
* @ctx: ring buffer context.
*
- * Return : -ENOSPC if not enough space, else returns 0.
+ * Return : -NOBUFS if not enough space, -ENOSPC if event size too large,
+ * -EIO for other errors, else returns 0.
* It will take care of sub-buffer switching.
*/
int lib_ring_buffer_reserve_slow(struct lib_ring_buffer_ctx *ctx)
{
struct channel *chan = ctx->chan;
- const struct lib_ring_buffer_config *config = chan->backend.config;
+ const struct lib_ring_buffer_config *config = &chan->backend.config;
struct lib_ring_buffer *buf;
struct switch_offsets offsets;
+ int ret;
- if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
- buf = per_cpu_ptr(chan->backend.buf, ctx->cpu);
- else
- buf = chan->backend.buf;
- ctx->buf = buf;
-
+ ctx->buf = buf = get_current_buf(chan, ctx->cpu);
offsets.size = 0;
do {
- if (unlikely(lib_ring_buffer_try_reserve_slow(buf, chan, &offsets,
- ctx)))
- return -ENOSPC;
+ ret = lib_ring_buffer_try_reserve_slow(buf, chan, &offsets,
+ ctx);
+ if (unlikely(ret))
+ return ret;
} while (unlikely(v_cmpxchg(config, &buf->offset, offsets.old,
offsets.end)
!= offsets.old));
return 0;
}
EXPORT_SYMBOL_GPL(lib_ring_buffer_reserve_slow);
+
+static
+void lib_ring_buffer_vmcore_check_deliver(const struct lib_ring_buffer_config *config,
+ struct lib_ring_buffer *buf,
+ unsigned long commit_count,
+ unsigned long idx)
+{
+ if (config->oops == RING_BUFFER_OOPS_CONSISTENCY)
+ v_set(config, &buf->commit_hot[idx].seq, commit_count);
+}
+
+/*
+ * The ring buffer can count events recorded and overwritten per buffer,
+ * but it is disabled by default due to its performance overhead.
+ */
+#ifdef LTTNG_RING_BUFFER_COUNT_EVENTS
+static
+void deliver_count_events(const struct lib_ring_buffer_config *config,
+ struct lib_ring_buffer *buf,
+ unsigned long idx)
+{
+ v_add(config, subbuffer_get_records_count(config,
+ &buf->backend, idx),
+ &buf->records_count);
+ v_add(config, subbuffer_count_records_overrun(config,
+ &buf->backend, idx),
+ &buf->records_overrun);
+}
+#else /* LTTNG_RING_BUFFER_COUNT_EVENTS */
+static
+void deliver_count_events(const struct lib_ring_buffer_config *config,
+ struct lib_ring_buffer *buf,
+ unsigned long idx)
+{
+}
+#endif /* #else LTTNG_RING_BUFFER_COUNT_EVENTS */
+
+
+void lib_ring_buffer_check_deliver_slow(const struct lib_ring_buffer_config *config,
+ struct lib_ring_buffer *buf,
+ struct channel *chan,
+ unsigned long offset,
+ unsigned long commit_count,
+ unsigned long idx,
+ u64 tsc)
+{
+ unsigned long old_commit_count = commit_count
+ - chan->backend.subbuf_size;
+
+ /*
+ * If we succeeded at updating cc_sb below, we are the subbuffer
+ * writer delivering the subbuffer. Deals with concurrent
+ * updates of the "cc" value without adding a add_return atomic
+ * operation to the fast path.
+ *
+ * We are doing the delivery in two steps:
+ * - First, we cmpxchg() cc_sb to the new value
+ * old_commit_count + 1. This ensures that we are the only
+ * subbuffer user successfully filling the subbuffer, but we
+ * do _not_ set the cc_sb value to "commit_count" yet.
+ * Therefore, other writers that would wrap around the ring
+ * buffer and try to start writing to our subbuffer would
+ * have to drop records, because it would appear as
+ * non-filled.
+ * We therefore have exclusive access to the subbuffer control
+ * structures. This mutual exclusion with other writers is
+ * crucially important to perform record overruns count in
+ * flight recorder mode locklessly.
+ * - When we are ready to release the subbuffer (either for
+ * reading or for overrun by other writers), we simply set the
+ * cc_sb value to "commit_count" and perform delivery.
+ *
+ * The subbuffer size is least 2 bytes (minimum size: 1 page).
+ * This guarantees that old_commit_count + 1 != commit_count.
+ */
+
+ /*
+ * Order prior updates to reserve count prior to the
+ * commit_cold cc_sb update.
+ */
+ smp_wmb();
+ if (likely(v_cmpxchg(config, &buf->commit_cold[idx].cc_sb,
+ old_commit_count, old_commit_count + 1)
+ == old_commit_count)) {
+ u64 *ts_end;
+
+ /*
+ * Start of exclusive subbuffer access. We are
+ * guaranteed to be the last writer in this subbuffer
+ * and any other writer trying to access this subbuffer
+ * in this state is required to drop records.
+ *
+ * We can read the ts_end for the current sub-buffer
+ * which has been saved by the very last space
+ * reservation for the current sub-buffer.
+ *
+ * Order increment of commit counter before reading ts_end.
+ */
+ smp_mb();
+ ts_end = &buf->ts_end[idx];
+ deliver_count_events(config, buf, idx);
+ config->cb.buffer_end(buf, *ts_end, idx,
+ lib_ring_buffer_get_data_size(config,
+ buf,
+ idx));
+
+ /*
+ * Increment the packet counter while we have exclusive
+ * access.
+ */
+ subbuffer_inc_packet_count(config, &buf->backend, idx);
+
+ /*
+ * Set noref flag and offset for this subbuffer id.
+ * Contains a memory barrier that ensures counter stores
+ * are ordered before set noref and offset.
+ */
+ lib_ring_buffer_set_noref_offset(config, &buf->backend, idx,
+ buf_trunc_val(offset, chan));
+
+ /*
+ * Order set_noref and record counter updates before the
+ * end of subbuffer exclusive access. Orders with
+ * respect to writers coming into the subbuffer after
+ * wrap around, and also order wrt concurrent readers.
+ */
+ smp_mb();
+ /* End of exclusive subbuffer access */
+ v_set(config, &buf->commit_cold[idx].cc_sb,
+ commit_count);
+ /*
+ * Order later updates to reserve count after
+ * the commit_cold cc_sb update.
+ */
+ smp_wmb();
+ lib_ring_buffer_vmcore_check_deliver(config, buf,
+ commit_count, idx);
+
+ /*
+ * RING_BUFFER_WAKEUP_BY_WRITER wakeup is not lock-free.
+ */
+ if (config->wakeup == RING_BUFFER_WAKEUP_BY_WRITER
+ && atomic_long_read(&buf->active_readers)
+ && lib_ring_buffer_poll_deliver(config, buf, chan)) {
+ wake_up_interruptible(&buf->read_wait);
+ wake_up_interruptible(&chan->read_wait);
+ }
+
+ }
+}
+EXPORT_SYMBOL_GPL(lib_ring_buffer_check_deliver_slow);
+
+int __init init_lib_ring_buffer_frontend(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ spin_lock_init(&per_cpu(ring_buffer_nohz_lock, cpu));
+ return 0;
+}
+
+module_init(init_lib_ring_buffer_frontend);
+
+void __exit exit_lib_ring_buffer_frontend(void)
+{
+}
+
+module_exit(exit_lib_ring_buffer_frontend);