X-Git-Url: http://git.liburcu.org/?p=lttng-modules.git;a=blobdiff_plain;f=lib%2Fringbuffer%2Fring_buffer_frontend.c;h=fca37fbc3a5b143a08cb55b94f762bf664fb7187;hp=3003dd8324458ee62c8e99460b295bc3ab8e481d;hb=2459130397d7e7eecc44a5f06a39d65c78257eef;hpb=ba1d61bc8909b4796516221bf97130631e8d0bc6 diff --git a/lib/ringbuffer/ring_buffer_frontend.c b/lib/ringbuffer/ring_buffer_frontend.c index 3003dd83..fca37fbc 100644 --- a/lib/ringbuffer/ring_buffer_frontend.c +++ b/lib/ringbuffer/ring_buffer_frontend.c @@ -1,7 +1,8 @@ -/* +/* SPDX-License-Identifier: (GPL-2.0-only OR LGPL-2.1-only) + * * ring_buffer_frontend.c * - * (C) Copyright 2005-2010 - Mathieu Desnoyers + * Copyright (C) 2005-2012 Mathieu Desnoyers * * Ring buffer wait-free buffer synchronization. Producer-consumer and flight * recorder (overwrite) modes. See thesis: @@ -34,19 +35,23 @@ * - splice one subbuffer worth of data to a pipe * - splice the data from pipe to disk/network * - put_subbuf - * - * Dual LGPL v2.1/GPL v2 license. */ #include #include #include - -#include "../../wrapper/ringbuffer/config.h" -#include "../../wrapper/ringbuffer/backend.h" -#include "../../wrapper/ringbuffer/frontend.h" -#include "../../wrapper/ringbuffer/iterator.h" -#include "../../wrapper/ringbuffer/nohz.h" +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* * Internal structure representing offsets to use at a sub-buffer switch. @@ -76,6 +81,50 @@ EXPORT_PER_CPU_SYMBOL(lib_ring_buffer_nesting); static void lib_ring_buffer_print_errors(struct channel *chan, struct lib_ring_buffer *buf, int cpu); +static +void _lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf, + enum switch_mode mode); + +static +int lib_ring_buffer_poll_deliver(const struct lib_ring_buffer_config *config, + struct lib_ring_buffer *buf, + struct channel *chan) +{ + unsigned long consumed_old, consumed_idx, commit_count, write_offset; + + consumed_old = atomic_long_read(&buf->consumed); + consumed_idx = subbuf_index(consumed_old, chan); + commit_count = v_read(config, &buf->commit_cold[consumed_idx].cc_sb); + /* + * No memory barrier here, since we are only interested + * in a statistically correct polling result. The next poll will + * get the data is we are racing. The mb() that ensures correct + * memory order is in get_subbuf. + */ + write_offset = v_read(config, &buf->offset); + + /* + * Check that the subbuffer we are trying to consume has been + * already fully committed. + */ + + if (((commit_count - chan->backend.subbuf_size) + & chan->commit_count_mask) + - (buf_trunc(consumed_old, chan) + >> chan->backend.num_subbuf_order) + != 0) + return 0; + + /* + * Check that we are not about to read the same subbuffer in + * which the writer head is. + */ + if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_old, chan) + == 0) + return 0; + + return 1; +} /* * Must be called under cpu hotplug protection. @@ -85,8 +134,9 @@ void lib_ring_buffer_free(struct lib_ring_buffer *buf) struct channel *chan = buf->backend.chan; lib_ring_buffer_print_errors(chan, buf, buf->backend.cpu); - kfree(buf->commit_hot); - kfree(buf->commit_cold); + lttng_kvfree(buf->commit_hot); + lttng_kvfree(buf->commit_cold); + lttng_kvfree(buf->ts_end); lib_ring_buffer_backend_free(&buf->backend); } @@ -103,7 +153,7 @@ void lib_ring_buffer_free(struct lib_ring_buffer *buf) void lib_ring_buffer_reset(struct lib_ring_buffer *buf) { struct channel *chan = buf->backend.chan; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned int i; /* @@ -116,6 +166,7 @@ void lib_ring_buffer_reset(struct lib_ring_buffer *buf) v_set(config, &buf->commit_hot[i].cc, 0); v_set(config, &buf->commit_hot[i].seq, 0); v_set(config, &buf->commit_cold[i].cc_sb, 0); + buf->ts_end[i] = 0; } atomic_long_set(&buf->consumed, 0); atomic_set(&buf->record_disabled, 0); @@ -161,7 +212,7 @@ EXPORT_SYMBOL_GPL(channel_reset); int lib_ring_buffer_create(struct lib_ring_buffer *buf, struct channel_backend *chanb, int cpu) { - const struct lib_ring_buffer_config *config = chanb->config; + const struct lib_ring_buffer_config *config = &chanb->config; struct channel *chan = container_of(chanb, struct channel, backend); void *priv = chanb->priv; size_t subbuf_header_size; @@ -183,25 +234,38 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf, return ret; buf->commit_hot = - kzalloc_node(ALIGN(sizeof(*buf->commit_hot) + lttng_kvzalloc_node(ALIGN(sizeof(*buf->commit_hot) * chan->backend.num_subbuf, 1 << INTERNODE_CACHE_SHIFT), - GFP_KERNEL, cpu_to_node(max(cpu, 0))); + GFP_KERNEL | __GFP_NOWARN, + cpu_to_node(max(cpu, 0))); if (!buf->commit_hot) { ret = -ENOMEM; goto free_chanbuf; } buf->commit_cold = - kzalloc_node(ALIGN(sizeof(*buf->commit_cold) + lttng_kvzalloc_node(ALIGN(sizeof(*buf->commit_cold) * chan->backend.num_subbuf, 1 << INTERNODE_CACHE_SHIFT), - GFP_KERNEL, cpu_to_node(max(cpu, 0))); + GFP_KERNEL | __GFP_NOWARN, + cpu_to_node(max(cpu, 0))); if (!buf->commit_cold) { ret = -ENOMEM; goto free_commit; } + buf->ts_end = + lttng_kvzalloc_node(ALIGN(sizeof(*buf->ts_end) + * chan->backend.num_subbuf, + 1 << INTERNODE_CACHE_SHIFT), + GFP_KERNEL | __GFP_NOWARN, + cpu_to_node(max(cpu, 0))); + if (!buf->ts_end) { + ret = -ENOMEM; + goto free_commit_cold; + } + init_waitqueue_head(&buf->read_wait); init_waitqueue_head(&buf->write_wait); raw_spin_lock_init(&buf->raw_tick_nohz_spinlock); @@ -241,19 +305,21 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf, /* Error handling */ free_init: - kfree(buf->commit_cold); + lttng_kvfree(buf->ts_end); +free_commit_cold: + lttng_kvfree(buf->commit_cold); free_commit: - kfree(buf->commit_hot); + lttng_kvfree(buf->commit_hot); free_chanbuf: lib_ring_buffer_backend_free(&buf->backend); return ret; } -static void switch_buffer_timer(unsigned long data) +static void switch_buffer_timer(LTTNG_TIMER_FUNC_ARG_TYPE t) { - struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data; + struct lib_ring_buffer *buf = lttng_from_timer(buf, t, switch_timer); struct channel *chan = buf->backend.chan; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; /* * Only flush buffers periodically if readers are active. @@ -262,7 +328,7 @@ static void switch_buffer_timer(unsigned long data) lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE); if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) - mod_timer_pinned(&buf->switch_timer, + lttng_mod_timer_pinned(&buf->switch_timer, jiffies + chan->switch_timer_interval); else mod_timer(&buf->switch_timer, @@ -275,18 +341,23 @@ static void switch_buffer_timer(unsigned long data) static void lib_ring_buffer_start_switch_timer(struct lib_ring_buffer *buf) { struct channel *chan = buf->backend.chan; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; + unsigned int flags = 0; if (!chan->switch_timer_interval || buf->switch_timer_enabled) return; - init_timer(&buf->switch_timer); - buf->switch_timer.function = switch_buffer_timer; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) + flags = LTTNG_TIMER_PINNED; + + lttng_timer_setup(&buf->switch_timer, switch_buffer_timer, flags, buf); buf->switch_timer.expires = jiffies + chan->switch_timer_interval; - buf->switch_timer.data = (unsigned long)buf; + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) add_timer_on(&buf->switch_timer, buf->backend.cpu); else add_timer(&buf->switch_timer); + buf->switch_timer_enabled = 1; } @@ -307,11 +378,11 @@ static void lib_ring_buffer_stop_switch_timer(struct lib_ring_buffer *buf) /* * Polling timer to check the channels for data. */ -static void read_buffer_timer(unsigned long data) +static void read_buffer_timer(LTTNG_TIMER_FUNC_ARG_TYPE t) { - struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data; + struct lib_ring_buffer *buf = lttng_from_timer(buf, t, read_timer); struct channel *chan = buf->backend.chan; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; CHAN_WARN_ON(chan, !buf->backend.allocated); @@ -322,7 +393,7 @@ static void read_buffer_timer(unsigned long data) } if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) - mod_timer_pinned(&buf->read_timer, + lttng_mod_timer_pinned(&buf->read_timer, jiffies + chan->read_timer_interval); else mod_timer(&buf->read_timer, @@ -335,22 +406,25 @@ static void read_buffer_timer(unsigned long data) static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf) { struct channel *chan = buf->backend.chan; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; + unsigned int flags = 0; if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER || !chan->read_timer_interval || buf->read_timer_enabled) return; - init_timer(&buf->read_timer); - buf->read_timer.function = read_buffer_timer; + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) + flags = LTTNG_TIMER_PINNED; + + lttng_timer_setup(&buf->read_timer, read_buffer_timer, flags, buf); buf->read_timer.expires = jiffies + chan->read_timer_interval; - buf->read_timer.data = (unsigned long)buf; if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) add_timer_on(&buf->read_timer, buf->backend.cpu); else add_timer(&buf->read_timer); + buf->read_timer_enabled = 1; } @@ -360,7 +434,7 @@ static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf) static void lib_ring_buffer_stop_read_timer(struct lib_ring_buffer *buf) { struct channel *chan = buf->backend.chan; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER || !chan->read_timer_interval @@ -379,7 +453,81 @@ static void lib_ring_buffer_stop_read_timer(struct lib_ring_buffer *buf) buf->read_timer_enabled = 0; } +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) + +enum cpuhp_state lttng_rb_hp_prepare; +enum cpuhp_state lttng_rb_hp_online; + +void lttng_rb_set_hp_prepare(enum cpuhp_state val) +{ + lttng_rb_hp_prepare = val; +} +EXPORT_SYMBOL_GPL(lttng_rb_set_hp_prepare); + +void lttng_rb_set_hp_online(enum cpuhp_state val) +{ + lttng_rb_hp_online = val; +} +EXPORT_SYMBOL_GPL(lttng_rb_set_hp_online); + +int lttng_cpuhp_rb_frontend_dead(unsigned int cpu, + struct lttng_cpuhp_node *node) +{ + struct channel *chan = container_of(node, struct channel, + cpuhp_prepare); + struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, cpu); + const struct lib_ring_buffer_config *config = &chan->backend.config; + + CHAN_WARN_ON(chan, config->alloc == RING_BUFFER_ALLOC_GLOBAL); + + /* + * Performing a buffer switch on a remote CPU. Performed by + * the CPU responsible for doing the hotunplug after the target + * CPU stopped running completely. Ensures that all data + * from that remote CPU is flushed. + */ + lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE); + return 0; +} +EXPORT_SYMBOL_GPL(lttng_cpuhp_rb_frontend_dead); + +int lttng_cpuhp_rb_frontend_online(unsigned int cpu, + struct lttng_cpuhp_node *node) +{ + struct channel *chan = container_of(node, struct channel, + cpuhp_online); + struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, cpu); + const struct lib_ring_buffer_config *config = &chan->backend.config; + + CHAN_WARN_ON(chan, config->alloc == RING_BUFFER_ALLOC_GLOBAL); + + wake_up_interruptible(&chan->hp_wait); + lib_ring_buffer_start_switch_timer(buf); + lib_ring_buffer_start_read_timer(buf); + return 0; +} +EXPORT_SYMBOL_GPL(lttng_cpuhp_rb_frontend_online); + +int lttng_cpuhp_rb_frontend_offline(unsigned int cpu, + struct lttng_cpuhp_node *node) +{ + struct channel *chan = container_of(node, struct channel, + cpuhp_online); + struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, cpu); + const struct lib_ring_buffer_config *config = &chan->backend.config; + + CHAN_WARN_ON(chan, config->alloc == RING_BUFFER_ALLOC_GLOBAL); + + lib_ring_buffer_stop_switch_timer(buf); + lib_ring_buffer_stop_read_timer(buf); + return 0; +} +EXPORT_SYMBOL_GPL(lttng_cpuhp_rb_frontend_offline); + +#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */ + #ifdef CONFIG_HOTPLUG_CPU + /** * lib_ring_buffer_cpu_hp_callback - CPU hotplug callback * @nb: notifier block @@ -389,7 +537,7 @@ static void lib_ring_buffer_stop_read_timer(struct lib_ring_buffer *buf) * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD) */ static -int __cpuinit lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb, +int lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { @@ -397,7 +545,7 @@ int __cpuinit lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb, struct channel *chan = container_of(nb, struct channel, cpu_hp_notifier); struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, cpu); - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; if (!chan->cpu_hp_enable) return NOTIFY_DONE; @@ -435,8 +583,11 @@ int __cpuinit lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb, return NOTIFY_DONE; } } + #endif +#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */ + #if defined(CONFIG_NO_HZ) && defined(CONFIG_LIB_RING_BUFFER) /* * For per-cpu buffers, call the reader wakeups before switching the buffer, so @@ -452,7 +603,7 @@ static int notrace ring_buffer_tick_nohz_callback(struct notifier_block *nb, { struct channel *chan = container_of(nb, struct channel, tick_nohz_notifier); - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; struct lib_ring_buffer *buf; int cpu = smp_processor_id(); @@ -484,16 +635,16 @@ static int notrace ring_buffer_tick_nohz_callback(struct notifier_block *nb, raw_spin_unlock(&buf->raw_tick_nohz_spinlock); break; case TICK_NOHZ_STOP: - spin_lock(&__get_cpu_var(ring_buffer_nohz_lock)); + spin_lock(lttng_this_cpu_ptr(&ring_buffer_nohz_lock)); lib_ring_buffer_stop_switch_timer(buf); lib_ring_buffer_stop_read_timer(buf); - spin_unlock(&__get_cpu_var(ring_buffer_nohz_lock)); + spin_unlock(lttng_this_cpu_ptr(&ring_buffer_nohz_lock)); break; case TICK_NOHZ_RESTART: - spin_lock(&__get_cpu_var(ring_buffer_nohz_lock)); + spin_lock(lttng_this_cpu_ptr(&ring_buffer_nohz_lock)); lib_ring_buffer_start_read_timer(buf); lib_ring_buffer_start_switch_timer(buf); - spin_unlock(&__get_cpu_var(ring_buffer_nohz_lock)); + spin_unlock(lttng_this_cpu_ptr(&ring_buffer_nohz_lock)); break; } @@ -524,8 +675,7 @@ void notrace lib_ring_buffer_tick_nohz_restart(void) */ static void channel_unregister_notifiers(struct channel *chan) { - const struct lib_ring_buffer_config *config = chan->backend.config; - int cpu; + const struct lib_ring_buffer_config *config = &chan->backend.config; channel_iterator_unregister_notifiers(chan); if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { @@ -542,36 +692,113 @@ static void channel_unregister_notifiers(struct channel *chan) * concurrency. */ #endif /* CONFIG_NO_HZ */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) + { + int ret; + + ret = cpuhp_state_remove_instance(lttng_rb_hp_online, + &chan->cpuhp_online.node); + WARN_ON(ret); + ret = cpuhp_state_remove_instance_nocalls(lttng_rb_hp_prepare, + &chan->cpuhp_prepare.node); + WARN_ON(ret); + } +#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */ + { + int cpu; + #ifdef CONFIG_HOTPLUG_CPU + get_online_cpus(); + chan->cpu_hp_enable = 0; + for_each_online_cpu(cpu) { + struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, + cpu); + lib_ring_buffer_stop_switch_timer(buf); + lib_ring_buffer_stop_read_timer(buf); + } + put_online_cpus(); + unregister_cpu_notifier(&chan->cpu_hp_notifier); +#else + for_each_possible_cpu(cpu) { + struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, + cpu); + lib_ring_buffer_stop_switch_timer(buf); + lib_ring_buffer_stop_read_timer(buf); + } +#endif + } +#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */ + } else { + struct lib_ring_buffer *buf = chan->backend.buf; + + lib_ring_buffer_stop_switch_timer(buf); + lib_ring_buffer_stop_read_timer(buf); + } + channel_backend_unregister_notifiers(&chan->backend); +} + +static void lib_ring_buffer_set_quiescent(struct lib_ring_buffer *buf) +{ + if (!buf->quiescent) { + buf->quiescent = true; + _lib_ring_buffer_switch_remote(buf, SWITCH_FLUSH); + } +} + +static void lib_ring_buffer_clear_quiescent(struct lib_ring_buffer *buf) +{ + buf->quiescent = false; +} + +void lib_ring_buffer_set_quiescent_channel(struct channel *chan) +{ + int cpu; + const struct lib_ring_buffer_config *config = &chan->backend.config; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { get_online_cpus(); - chan->cpu_hp_enable = 0; - for_each_online_cpu(cpu) { + for_each_channel_cpu(cpu, chan) { struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, cpu); - lib_ring_buffer_stop_switch_timer(buf); - lib_ring_buffer_stop_read_timer(buf); + + lib_ring_buffer_set_quiescent(buf); } put_online_cpus(); - unregister_cpu_notifier(&chan->cpu_hp_notifier); -#else - for_each_possible_cpu(cpu) { + } else { + struct lib_ring_buffer *buf = chan->backend.buf; + + lib_ring_buffer_set_quiescent(buf); + } +} +EXPORT_SYMBOL_GPL(lib_ring_buffer_set_quiescent_channel); + +void lib_ring_buffer_clear_quiescent_channel(struct channel *chan) +{ + int cpu; + const struct lib_ring_buffer_config *config = &chan->backend.config; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { + get_online_cpus(); + for_each_channel_cpu(cpu, chan) { struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, cpu); - lib_ring_buffer_stop_switch_timer(buf); - lib_ring_buffer_stop_read_timer(buf); + + lib_ring_buffer_clear_quiescent(buf); } -#endif + put_online_cpus(); } else { struct lib_ring_buffer *buf = chan->backend.buf; - lib_ring_buffer_stop_switch_timer(buf); - lib_ring_buffer_stop_read_timer(buf); + lib_ring_buffer_clear_quiescent(buf); } - channel_backend_unregister_notifiers(&chan->backend); } +EXPORT_SYMBOL_GPL(lib_ring_buffer_clear_quiescent_channel); static void channel_free(struct channel *chan) { + if (chan->backend.release_priv_ops) { + chan->backend.release_priv_ops(chan->backend.priv_ops); + } channel_iterator_free(chan); channel_backend_free(&chan->backend); kfree(chan); @@ -601,7 +828,7 @@ struct channel *channel_create(const struct lib_ring_buffer_config *config, size_t num_subbuf, unsigned int switch_timer_interval, unsigned int read_timer_interval) { - int ret, cpu; + int ret; struct channel *chan; if (lib_ring_buffer_check_config(config, switch_timer_interval, @@ -629,6 +856,56 @@ struct channel *channel_create(const struct lib_ring_buffer_config *config, init_waitqueue_head(&chan->hp_wait); if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) + chan->cpuhp_prepare.component = LTTNG_RING_BUFFER_FRONTEND; + ret = cpuhp_state_add_instance_nocalls(lttng_rb_hp_prepare, + &chan->cpuhp_prepare.node); + if (ret) + goto cpuhp_prepare_error; + + chan->cpuhp_online.component = LTTNG_RING_BUFFER_FRONTEND; + ret = cpuhp_state_add_instance(lttng_rb_hp_online, + &chan->cpuhp_online.node); + if (ret) + goto cpuhp_online_error; +#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */ + { + int cpu; + /* + * In case of non-hotplug cpu, if the ring-buffer is allocated + * in early initcall, it will not be notified of secondary cpus. + * In that off case, we need to allocate for all possible cpus. + */ +#ifdef CONFIG_HOTPLUG_CPU + chan->cpu_hp_notifier.notifier_call = + lib_ring_buffer_cpu_hp_callback; + chan->cpu_hp_notifier.priority = 6; + register_cpu_notifier(&chan->cpu_hp_notifier); + + get_online_cpus(); + for_each_online_cpu(cpu) { + struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, + cpu); + spin_lock(&per_cpu(ring_buffer_nohz_lock, cpu)); + lib_ring_buffer_start_switch_timer(buf); + lib_ring_buffer_start_read_timer(buf); + spin_unlock(&per_cpu(ring_buffer_nohz_lock, cpu)); + } + chan->cpu_hp_enable = 1; + put_online_cpus(); +#else + for_each_possible_cpu(cpu) { + struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, + cpu); + spin_lock(&per_cpu(ring_buffer_nohz_lock, cpu)); + lib_ring_buffer_start_switch_timer(buf); + lib_ring_buffer_start_read_timer(buf); + spin_unlock(&per_cpu(ring_buffer_nohz_lock, cpu)); + } +#endif + } +#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */ + #if defined(CONFIG_NO_HZ) && defined(CONFIG_LIB_RING_BUFFER) /* Only benefit from NO_HZ idle with per-cpu buffers for now. */ chan->tick_nohz_notifier.notifier_call = @@ -638,38 +915,6 @@ struct channel *channel_create(const struct lib_ring_buffer_config *config, &chan->tick_nohz_notifier); #endif /* defined(CONFIG_NO_HZ) && defined(CONFIG_LIB_RING_BUFFER) */ - /* - * In case of non-hotplug cpu, if the ring-buffer is allocated - * in early initcall, it will not be notified of secondary cpus. - * In that off case, we need to allocate for all possible cpus. - */ -#ifdef CONFIG_HOTPLUG_CPU - chan->cpu_hp_notifier.notifier_call = - lib_ring_buffer_cpu_hp_callback; - chan->cpu_hp_notifier.priority = 6; - register_cpu_notifier(&chan->cpu_hp_notifier); - - get_online_cpus(); - for_each_online_cpu(cpu) { - struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, - cpu); - spin_lock(&per_cpu(ring_buffer_nohz_lock, cpu)); - lib_ring_buffer_start_switch_timer(buf); - lib_ring_buffer_start_read_timer(buf); - spin_unlock(&per_cpu(ring_buffer_nohz_lock, cpu)); - } - chan->cpu_hp_enable = 1; - put_online_cpus(); -#else - for_each_possible_cpu(cpu) { - struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, - cpu); - spin_lock(&per_cpu(ring_buffer_nohz_lock, cpu)); - lib_ring_buffer_start_switch_timer(buf); - lib_ring_buffer_start_read_timer(buf); - spin_unlock(&per_cpu(ring_buffer_nohz_lock, cpu)); - } -#endif } else { struct lib_ring_buffer *buf = chan->backend.buf; @@ -679,6 +924,13 @@ struct channel *channel_create(const struct lib_ring_buffer_config *config, return chan; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) +cpuhp_online_error: + ret = cpuhp_state_remove_instance_nocalls(lttng_rb_hp_prepare, + &chan->cpuhp_prepare.node); + WARN_ON(ret); +cpuhp_prepare_error: +#endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */ error_free_backend: channel_backend_free(&chan->backend); error: @@ -708,7 +960,7 @@ void channel_release(struct kref *kref) void *channel_destroy(struct channel *chan) { int cpu; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; void *priv; channel_unregister_notifiers(chan); @@ -726,13 +978,11 @@ void *channel_destroy(struct channel *chan) config->cb.buffer_finalize(buf, chan->backend.priv, cpu); - if (buf->backend.allocated) - lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH); /* * Perform flush before writing to finalized. */ smp_wmb(); - ACCESS_ONCE(buf->finalized) = 1; + WRITE_ONCE(buf->finalized, 1); wake_up_interruptible(&buf->read_wait); } } else { @@ -740,16 +990,14 @@ void *channel_destroy(struct channel *chan) if (config->cb.buffer_finalize) config->cb.buffer_finalize(buf, chan->backend.priv, -1); - if (buf->backend.allocated) - lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH); /* * Perform flush before writing to finalized. */ smp_wmb(); - ACCESS_ONCE(buf->finalized) = 1; + WRITE_ONCE(buf->finalized, 1); wake_up_interruptible(&buf->read_wait); } - ACCESS_ONCE(chan->finalized) = 1; + WRITE_ONCE(chan->finalized, 1); wake_up_interruptible(&chan->hp_wait); wake_up_interruptible(&chan->read_wait); priv = chan->backend.priv; @@ -775,8 +1023,11 @@ int lib_ring_buffer_open_read(struct lib_ring_buffer *buf) if (!atomic_long_add_unless(&buf->active_readers, 1, 1)) return -EBUSY; - kref_get(&chan->ref); - smp_mb__after_atomic_inc(); + if (!lttng_kref_get(&chan->ref)) { + atomic_long_dec(&buf->active_readers); + return -EOVERFLOW; + } + lttng_smp_mb__after_atomic(); return 0; } EXPORT_SYMBOL_GPL(lib_ring_buffer_open_read); @@ -786,7 +1037,7 @@ void lib_ring_buffer_release_read(struct lib_ring_buffer *buf) struct channel *chan = buf->backend.chan; CHAN_WARN_ON(chan, atomic_long_read(&buf->active_readers) != 1); - smp_mb__before_atomic_dec(); + lttng_smp_mb__before_atomic(); atomic_long_dec(&buf->active_readers); kref_put(&chan->ref, channel_release); } @@ -818,12 +1069,12 @@ int lib_ring_buffer_snapshot(struct lib_ring_buffer *buf, unsigned long *consumed, unsigned long *produced) { struct channel *chan = buf->backend.chan; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned long consumed_cur, write_offset; int finalized; retry: - finalized = ACCESS_ONCE(buf->finalized); + finalized = READ_ONCE(buf->finalized); /* * Read finalized before counters. */ @@ -866,6 +1117,37 @@ nodata: } EXPORT_SYMBOL_GPL(lib_ring_buffer_snapshot); +/** + * Performs the same function as lib_ring_buffer_snapshot(), but the positions + * are saved regardless of whether the consumed and produced positions are + * in the same subbuffer. + * @buf: ring buffer + * @consumed: consumed byte count indicating the last position read + * @produced: produced byte count indicating the last position written + * + * This function is meant to provide information on the exact producer and + * consumer positions without regard for the "snapshot" feature. + */ +int lib_ring_buffer_snapshot_sample_positions(struct lib_ring_buffer *buf, + unsigned long *consumed, unsigned long *produced) +{ + struct channel *chan = buf->backend.chan; + const struct lib_ring_buffer_config *config = &chan->backend.config; + + smp_rmb(); + *consumed = atomic_long_read(&buf->consumed); + /* + * No need to issue a memory barrier between consumed count read and + * write offset read, because consumed count can only change + * concurrently in overwrite mode, and we keep a sequence counter + * identifier derived from the write offset to check we are getting + * the same sub-buffer we are expecting (the sub-buffers are atomically + * "tagged" upon writes, tags are checked upon read). + */ + *produced = v_read(config, &buf->offset); + return 0; +} + /** * lib_ring_buffer_put_snapshot - move consumed counter forward * @@ -896,6 +1178,47 @@ void lib_ring_buffer_move_consumer(struct lib_ring_buffer *buf, } EXPORT_SYMBOL_GPL(lib_ring_buffer_move_consumer); +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +static void lib_ring_buffer_flush_read_subbuf_dcache( + const struct lib_ring_buffer_config *config, + struct channel *chan, + struct lib_ring_buffer *buf) +{ + struct lib_ring_buffer_backend_pages *pages; + unsigned long sb_bindex, id, i, nr_pages; + + if (config->output != RING_BUFFER_MMAP) + return; + + /* + * Architectures with caches aliased on virtual addresses may + * use different cache lines for the linear mapping vs + * user-space memory mapping. Given that the ring buffer is + * based on the kernel linear mapping, aligning it with the + * user-space mapping is not straightforward, and would require + * extra TLB entries. Therefore, simply flush the dcache for the + * entire sub-buffer before reading it. + */ + id = buf->backend.buf_rsb.id; + sb_bindex = subbuffer_id_get_index(config, id); + pages = buf->backend.array[sb_bindex]; + nr_pages = buf->backend.num_pages_per_subbuf; + for (i = 0; i < nr_pages; i++) { + struct lib_ring_buffer_backend_page *backend_page; + + backend_page = &pages->p[i]; + flush_dcache_page(pfn_to_page(backend_page->pfn)); + } +} +#else +static void lib_ring_buffer_flush_read_subbuf_dcache( + const struct lib_ring_buffer_config *config, + struct channel *chan, + struct lib_ring_buffer *buf) +{ +} +#endif + /** * lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading * @buf: ring buffer @@ -909,13 +1232,20 @@ int lib_ring_buffer_get_subbuf(struct lib_ring_buffer *buf, unsigned long consumed) { struct channel *chan = buf->backend.chan; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned long consumed_cur, consumed_idx, commit_count, write_offset; int ret; int finalized; + if (buf->get_subbuf) { + /* + * Reader is trying to get a subbuffer twice. + */ + CHAN_WARN_ON(chan, 1); + return -EBUSY; + } retry: - finalized = ACCESS_ONCE(buf->finalized); + finalized = READ_ONCE(buf->finalized); /* * Read finalized before counters. */ @@ -1002,7 +1332,7 @@ retry: */ if (((commit_count - chan->backend.subbuf_size) & chan->commit_count_mask) - - (buf_trunc(consumed_cur, chan) + - (buf_trunc(consumed, chan) >> chan->backend.num_subbuf_order) != 0) goto nodata; @@ -1011,7 +1341,7 @@ retry: * Check that we are not about to read the same subbuffer in * which the writer head is. */ - if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan) + if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed, chan) == 0) goto nodata; @@ -1031,6 +1361,8 @@ retry: buf->get_subbuf_consumed = consumed; buf->get_subbuf = 1; + lib_ring_buffer_flush_read_subbuf_dcache(config, chan, buf); + return 0; nodata: @@ -1055,7 +1387,7 @@ void lib_ring_buffer_put_subbuf(struct lib_ring_buffer *buf) { struct lib_ring_buffer_backend *bufb = &buf->backend; struct channel *chan = bufb->chan; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned long read_sb_bindex, consumed_idx, consumed; CHAN_WARN_ON(chan, atomic_long_read(&buf->active_readers) != 1); @@ -1114,7 +1446,7 @@ void lib_ring_buffer_print_subbuffer_errors(struct lib_ring_buffer *buf, unsigned long cons_offset, int cpu) { - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned long cons_idx, commit_count, commit_count_sb; cons_idx = subbuf_index(cons_offset, chan); @@ -1140,15 +1472,9 @@ void lib_ring_buffer_print_buffer_errors(struct lib_ring_buffer *buf, struct channel *chan, void *priv, int cpu) { - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned long write_offset, cons_offset; - /* - * Can be called in the error path of allocation when - * trans_channel_data is not yet set. - */ - if (!chan) - return; /* * No need to order commit_count, write_offset and cons_offset reads * because we execute at teardown when no more writer nor reader @@ -1157,7 +1483,7 @@ void lib_ring_buffer_print_buffer_errors(struct lib_ring_buffer *buf, write_offset = v_read(config, &buf->offset); cons_offset = atomic_long_read(&buf->consumed); if (write_offset != cons_offset) - printk(KERN_WARNING + printk(KERN_DEBUG "ring buffer %s, cpu %d: " "non-consumed data\n" " [ %lu bytes written, %lu bytes read ]\n", @@ -1172,31 +1498,58 @@ void lib_ring_buffer_print_buffer_errors(struct lib_ring_buffer *buf, cpu); } +#ifdef LTTNG_RING_BUFFER_COUNT_EVENTS +static +void lib_ring_buffer_print_records_count(struct channel *chan, + struct lib_ring_buffer *buf, + int cpu) +{ + const struct lib_ring_buffer_config *config = &chan->backend.config; + + if (!strcmp(chan->backend.name, "relay-metadata")) { + printk(KERN_DEBUG "ring buffer %s: %lu records written, " + "%lu records overrun\n", + chan->backend.name, + v_read(config, &buf->records_count), + v_read(config, &buf->records_overrun)); + } else { + printk(KERN_DEBUG "ring buffer %s, cpu %d: %lu records written, " + "%lu records overrun\n", + chan->backend.name, cpu, + v_read(config, &buf->records_count), + v_read(config, &buf->records_overrun)); + } +} +#else +static +void lib_ring_buffer_print_records_count(struct channel *chan, + struct lib_ring_buffer *buf, + int cpu) +{ +} +#endif + static void lib_ring_buffer_print_errors(struct channel *chan, struct lib_ring_buffer *buf, int cpu) { - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; void *priv = chan->backend.priv; - printk(KERN_DEBUG "ring buffer %s, cpu %d: %lu records written, " - "%lu records overrun\n", - chan->backend.name, cpu, - v_read(config, &buf->records_count), - v_read(config, &buf->records_overrun)); - - if (v_read(config, &buf->records_lost_full) - || v_read(config, &buf->records_lost_wrap) - || v_read(config, &buf->records_lost_big)) - printk(KERN_WARNING - "ring buffer %s, cpu %d: records were lost. Caused by:\n" - " [ %lu buffer full, %lu nest buffer wrap-around, " - "%lu event too big ]\n", - chan->backend.name, cpu, - v_read(config, &buf->records_lost_full), - v_read(config, &buf->records_lost_wrap), - v_read(config, &buf->records_lost_big)); - + lib_ring_buffer_print_records_count(chan, buf, cpu); + if (strcmp(chan->backend.name, "relay-metadata")) { + if (v_read(config, &buf->records_lost_full) + || v_read(config, &buf->records_lost_wrap) + || v_read(config, &buf->records_lost_big)) + printk(KERN_WARNING + "ring buffer %s, cpu %d: records were lost. Caused by:\n" + " [ %lu buffer full, %lu nest buffer wrap-around, " + "%lu event too big ]\n", + chan->backend.name, cpu, + v_read(config, &buf->records_lost_full), + v_read(config, &buf->records_lost_wrap), + v_read(config, &buf->records_lost_big)); + } lib_ring_buffer_print_buffer_errors(buf, chan, priv, cpu); } @@ -1211,9 +1564,10 @@ void lib_ring_buffer_switch_old_start(struct lib_ring_buffer *buf, struct switch_offsets *offsets, u64 tsc) { - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned long oldidx = subbuf_index(offsets->old, chan); unsigned long commit_count; + struct commit_counters_hot *cc_hot; config->cb.buffer_begin(buf, tsc, oldidx); @@ -1230,15 +1584,15 @@ void lib_ring_buffer_switch_old_start(struct lib_ring_buffer *buf, barrier(); } else smp_wmb(); - v_add(config, config->cb.subbuffer_header_size(), - &buf->commit_hot[oldidx].cc); - commit_count = v_read(config, &buf->commit_hot[oldidx].cc); + cc_hot = &buf->commit_hot[oldidx]; + v_add(config, config->cb.subbuffer_header_size(), &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); /* Check if the written buffer has to be delivered */ lib_ring_buffer_check_deliver(config, buf, chan, offsets->old, - commit_count, oldidx); - lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx, - offsets->old, commit_count, - config->cb.subbuffer_header_size()); + commit_count, oldidx, tsc); + lib_ring_buffer_write_commit_counter(config, buf, chan, + offsets->old + config->cb.subbuffer_header_size(), + commit_count, cc_hot); } /* @@ -1255,17 +1609,30 @@ void lib_ring_buffer_switch_old_end(struct lib_ring_buffer *buf, struct switch_offsets *offsets, u64 tsc) { - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned long oldidx = subbuf_index(offsets->old - 1, chan); unsigned long commit_count, padding_size, data_size; + struct commit_counters_hot *cc_hot; + u64 *ts_end; data_size = subbuf_offset(offsets->old - 1, chan) + 1; padding_size = chan->backend.subbuf_size - data_size; subbuffer_set_data_size(config, &buf->backend, oldidx, data_size); + ts_end = &buf->ts_end[oldidx]; /* - * Order all writes to buffer before the commit count update that will - * determine that the subbuffer is full. + * This is the last space reservation in that sub-buffer before + * it gets delivered. This provides exclusive access to write to + * this sub-buffer's ts_end. There are also no concurrent + * readers of that ts_end because delivery of that sub-buffer is + * postponed until the commit counter is incremented for the + * current space reservation. + */ + *ts_end = tsc; + + /* + * Order all writes to buffer and store to ts_end before the commit + * count update that will determine that the subbuffer is full. */ if (config->ipi == RING_BUFFER_IPI_BARRIER) { /* @@ -1276,13 +1643,14 @@ void lib_ring_buffer_switch_old_end(struct lib_ring_buffer *buf, barrier(); } else smp_wmb(); - v_add(config, padding_size, &buf->commit_hot[oldidx].cc); - commit_count = v_read(config, &buf->commit_hot[oldidx].cc); + cc_hot = &buf->commit_hot[oldidx]; + v_add(config, padding_size, &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1, - commit_count, oldidx); - lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx, - offsets->old, commit_count, - padding_size); + commit_count, oldidx, tsc); + lib_ring_buffer_write_commit_counter(config, buf, chan, + offsets->old + padding_size, commit_count, + cc_hot); } /* @@ -1298,9 +1666,10 @@ void lib_ring_buffer_switch_new_start(struct lib_ring_buffer *buf, struct switch_offsets *offsets, u64 tsc) { - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned long beginidx = subbuf_index(offsets->begin, chan); unsigned long commit_count; + struct commit_counters_hot *cc_hot; config->cb.buffer_begin(buf, tsc, beginidx); @@ -1317,22 +1686,24 @@ void lib_ring_buffer_switch_new_start(struct lib_ring_buffer *buf, barrier(); } else smp_wmb(); - v_add(config, config->cb.subbuffer_header_size(), - &buf->commit_hot[beginidx].cc); - commit_count = v_read(config, &buf->commit_hot[beginidx].cc); + cc_hot = &buf->commit_hot[beginidx]; + v_add(config, config->cb.subbuffer_header_size(), &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); /* Check if the written buffer has to be delivered */ lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin, - commit_count, beginidx); - lib_ring_buffer_write_commit_counter(config, buf, chan, beginidx, - offsets->begin, commit_count, - config->cb.subbuffer_header_size()); + commit_count, beginidx, tsc); + lib_ring_buffer_write_commit_counter(config, buf, chan, + offsets->begin + config->cb.subbuffer_header_size(), + commit_count, cc_hot); } /* * lib_ring_buffer_switch_new_end: finish switching current subbuffer * - * The only remaining threads could be the ones with pending commits. They will - * have to do the deliver themselves. + * Calls subbuffer_set_data_size() to set the data size of the current + * sub-buffer. We do not need to perform check_deliver nor commit here, + * since this task will be done by the "commit" of the event for which + * we are currently doing the space reservation. */ static void lib_ring_buffer_switch_new_end(struct lib_ring_buffer *buf, @@ -1340,34 +1711,23 @@ void lib_ring_buffer_switch_new_end(struct lib_ring_buffer *buf, struct switch_offsets *offsets, u64 tsc) { - const struct lib_ring_buffer_config *config = chan->backend.config; - unsigned long endidx = subbuf_index(offsets->end - 1, chan); - unsigned long commit_count, padding_size, data_size; + const struct lib_ring_buffer_config *config = &chan->backend.config; + unsigned long endidx, data_size; + u64 *ts_end; + endidx = subbuf_index(offsets->end - 1, chan); data_size = subbuf_offset(offsets->end - 1, chan) + 1; - padding_size = chan->backend.subbuf_size - data_size; subbuffer_set_data_size(config, &buf->backend, endidx, data_size); - + ts_end = &buf->ts_end[endidx]; /* - * Order all writes to buffer before the commit count update that will - * determine that the subbuffer is full. + * This is the last space reservation in that sub-buffer before + * it gets delivered. This provides exclusive access to write to + * this sub-buffer's ts_end. There are also no concurrent + * readers of that ts_end because delivery of that sub-buffer is + * postponed until the commit counter is incremented for the + * current space reservation. */ - if (config->ipi == RING_BUFFER_IPI_BARRIER) { - /* - * Must write slot data before incrementing commit count. This - * compiler barrier is upgraded into a smp_mb() by the IPI sent - * by get_subbuf(). - */ - barrier(); - } else - smp_wmb(); - v_add(config, padding_size, &buf->commit_hot[endidx].cc); - commit_count = v_read(config, &buf->commit_hot[endidx].cc); - lib_ring_buffer_check_deliver(config, buf, chan, offsets->end - 1, - commit_count, endidx); - lib_ring_buffer_write_commit_counter(config, buf, chan, endidx, - offsets->end, commit_count, - padding_size); + *ts_end = tsc; } /* @@ -1382,8 +1742,8 @@ int lib_ring_buffer_try_switch_slow(enum switch_mode mode, struct switch_offsets *offsets, u64 *tsc) { - const struct lib_ring_buffer_config *config = chan->backend.config; - unsigned long off; + const struct lib_ring_buffer_config *config = &chan->backend.config; + unsigned long off, reserve_commit_diff; offsets->begin = v_read(config, &buf->offset); offsets->old = offsets->begin; @@ -1408,23 +1768,68 @@ int lib_ring_buffer_try_switch_slow(enum switch_mode mode, * (records and header timestamps) are visible to the reader. This is * required for quiescence guarantees for the fusion merge. */ - if (mode == SWITCH_FLUSH || off > 0) { - if (unlikely(off == 0)) { - /* - * The client does not save any header information. - * Don't switch empty subbuffer on finalize, because it - * is invalid to deliver a completely empty subbuffer. - */ - if (!config->cb.subbuffer_header_size()) + if (mode != SWITCH_FLUSH && !off) + return -1; /* we do not have to switch : buffer is empty */ + + if (unlikely(off == 0)) { + unsigned long sb_index, commit_count; + + /* + * We are performing a SWITCH_FLUSH. At this stage, there are no + * concurrent writes into the buffer. + * + * The client does not save any header information. Don't + * switch empty subbuffer on finalize, because it is invalid to + * deliver a completely empty subbuffer. + */ + if (!config->cb.subbuffer_header_size()) + return -1; + + /* Test new buffer integrity */ + sb_index = subbuf_index(offsets->begin, chan); + commit_count = v_read(config, + &buf->commit_cold[sb_index].cc_sb); + reserve_commit_diff = + (buf_trunc(offsets->begin, chan) + >> chan->backend.num_subbuf_order) + - (commit_count & chan->commit_count_mask); + if (likely(reserve_commit_diff == 0)) { + /* Next subbuffer not being written to. */ + if (unlikely(config->mode != RING_BUFFER_OVERWRITE && + subbuf_trunc(offsets->begin, chan) + - subbuf_trunc((unsigned long) + atomic_long_read(&buf->consumed), chan) + >= chan->backend.buf_size)) { + /* + * We do not overwrite non consumed buffers + * and we are full : don't switch. + */ return -1; + } else { + /* + * Next subbuffer not being written to, and we + * are either in overwrite mode or the buffer is + * not full. It's safe to write in this new + * subbuffer. + */ + } + } else { /* - * Need to write the subbuffer start header on finalize. + * Next subbuffer reserve offset does not match the + * commit offset. Don't perform switch in + * producer-consumer and overwrite mode. Caused by + * either a writer OOPS or too many nested writes over a + * reserve/commit pair. */ - offsets->switch_old_start = 1; + return -1; } - offsets->begin = subbuf_align(offsets->begin, chan); - } else - return -1; /* we do not have to switch : buffer is empty */ + + /* + * Need to write the subbuffer start header on finalize. + */ + offsets->switch_old_start = 1; + } + offsets->begin = subbuf_align(offsets->begin, chan); /* Note: old points to the next subbuf at offset 0 */ offsets->end = offsets->begin; return 0; @@ -1441,7 +1846,7 @@ int lib_ring_buffer_try_switch_slow(enum switch_mode mode, void lib_ring_buffer_switch_slow(struct lib_ring_buffer *buf, enum switch_mode mode) { struct channel *chan = buf->backend.chan; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; struct switch_offsets offsets; unsigned long oldidx; u64 tsc; @@ -1489,6 +1894,79 @@ void lib_ring_buffer_switch_slow(struct lib_ring_buffer *buf, enum switch_mode m } EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_slow); +struct switch_param { + struct lib_ring_buffer *buf; + enum switch_mode mode; +}; + +static void remote_switch(void *info) +{ + struct switch_param *param = info; + struct lib_ring_buffer *buf = param->buf; + + lib_ring_buffer_switch_slow(buf, param->mode); +} + +static void _lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf, + enum switch_mode mode) +{ + struct channel *chan = buf->backend.chan; + const struct lib_ring_buffer_config *config = &chan->backend.config; + int ret; + struct switch_param param; + + /* + * With global synchronization we don't need to use the IPI scheme. + */ + if (config->sync == RING_BUFFER_SYNC_GLOBAL) { + lib_ring_buffer_switch_slow(buf, mode); + return; + } + + /* + * Disabling preemption ensures two things: first, that the + * target cpu is not taken concurrently offline while we are within + * smp_call_function_single(). Secondly, if it happens that the + * CPU is not online, our own call to lib_ring_buffer_switch_slow() + * needs to be protected from CPU hotplug handlers, which can + * also perform a remote subbuffer switch. + */ + preempt_disable(); + param.buf = buf; + param.mode = mode; + ret = smp_call_function_single(buf->backend.cpu, + remote_switch, ¶m, 1); + if (ret) { + /* Remote CPU is offline, do it ourself. */ + lib_ring_buffer_switch_slow(buf, mode); + } + preempt_enable(); +} + +/* Switch sub-buffer if current sub-buffer is non-empty. */ +void lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf) +{ + _lib_ring_buffer_switch_remote(buf, SWITCH_ACTIVE); +} +EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_remote); + +/* Switch sub-buffer even if current sub-buffer is empty. */ +void lib_ring_buffer_switch_remote_empty(struct lib_ring_buffer *buf) +{ + _lib_ring_buffer_switch_remote(buf, SWITCH_FLUSH); +} +EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_remote_empty); + +void lib_ring_buffer_clear(struct lib_ring_buffer *buf) +{ + struct lib_ring_buffer_backend *bufb = &buf->backend; + struct channel *chan = bufb->chan; + + lib_ring_buffer_switch_remote(buf); + lib_ring_buffer_clear_reader(buf, chan); +} +EXPORT_SYMBOL_GPL(lib_ring_buffer_clear); + /* * Returns : * 0 if ok @@ -1500,12 +1978,14 @@ static int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf, struct channel *chan, struct switch_offsets *offsets, - struct lib_ring_buffer_ctx *ctx) + struct lib_ring_buffer_ctx *ctx, + void *client_ctx) { - const struct lib_ring_buffer_config *config = chan->backend.config; - unsigned long reserve_commit_diff; + const struct lib_ring_buffer_config *config = &chan->backend.config; + unsigned long reserve_commit_diff, offset_cmp; - offsets->begin = v_read(config, &buf->offset); +retry: + offsets->begin = offset_cmp = v_read(config, &buf->offset); offsets->old = offsets->begin; offsets->switch_new_start = 0; offsets->switch_new_end = 0; @@ -1525,7 +2005,7 @@ int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf, offsets->size = config->cb.record_header_size(config, chan, offsets->begin, &offsets->pre_header_padding, - ctx); + ctx, client_ctx); offsets->size += lib_ring_buffer_align(offsets->begin + offsets->size, ctx->largest_align) @@ -1537,7 +2017,7 @@ int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf, } } if (unlikely(offsets->switch_new_start)) { - unsigned long sb_index; + unsigned long sb_index, commit_count; /* * We are typically not filling the previous buffer completely. @@ -1548,12 +2028,31 @@ int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf, + config->cb.subbuffer_header_size(); /* Test new buffer integrity */ sb_index = subbuf_index(offsets->begin, chan); + /* + * Read buf->offset before buf->commit_cold[sb_index].cc_sb. + * lib_ring_buffer_check_deliver() has the matching + * memory barriers required around commit_cold cc_sb + * updates to ensure reserve and commit counter updates + * are not seen reordered when updated by another CPU. + */ + smp_rmb(); + commit_count = v_read(config, + &buf->commit_cold[sb_index].cc_sb); + /* Read buf->commit_cold[sb_index].cc_sb before buf->offset. */ + smp_rmb(); + if (unlikely(offset_cmp != v_read(config, &buf->offset))) { + /* + * The reserve counter have been concurrently updated + * while we read the commit counter. This means the + * commit counter we read might not match buf->offset + * due to concurrent update. We therefore need to retry. + */ + goto retry; + } reserve_commit_diff = (buf_trunc(offsets->begin, chan) >> chan->backend.num_subbuf_order) - - ((unsigned long) v_read(config, - &buf->commit_cold[sb_index].cc_sb) - & chan->commit_count_mask); + - (commit_count & chan->commit_count_mask); if (likely(reserve_commit_diff == 0)) { /* Next subbuffer not being written to. */ if (unlikely(config->mode != RING_BUFFER_OVERWRITE && @@ -1578,9 +2077,10 @@ int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf, } else { /* * Next subbuffer reserve offset does not match the - * commit offset. Drop record in producer-consumer and - * overwrite mode. Caused by either a writer OOPS or too - * many nested writes over a reserve/commit pair. + * commit offset, and this did not involve update to the + * reserve counter. Drop record in producer-consumer and + * overwrite mode. Caused by either a writer OOPS or + * too many nested writes over a reserve/commit pair. */ v_inc(config, &buf->records_lost_wrap); return -EIO; @@ -1589,7 +2089,7 @@ int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf, config->cb.record_header_size(config, chan, offsets->begin, &offsets->pre_header_padding, - ctx); + ctx, client_ctx); offsets->size += lib_ring_buffer_align(offsets->begin + offsets->size, ctx->largest_align) @@ -1626,6 +2126,25 @@ int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf, return 0; } +static struct lib_ring_buffer *get_current_buf(struct channel *chan, int cpu) +{ + const struct lib_ring_buffer_config *config = &chan->backend.config; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) + return per_cpu_ptr(chan->backend.buf, cpu); + else + return chan->backend.buf; +} + +void lib_ring_buffer_lost_event_too_big(struct channel *chan) +{ + const struct lib_ring_buffer_config *config = &chan->backend.config; + struct lib_ring_buffer *buf = get_current_buf(chan, smp_processor_id()); + + v_inc(config, &buf->records_lost_big); +} +EXPORT_SYMBOL_GPL(lib_ring_buffer_lost_event_too_big); + /** * lib_ring_buffer_reserve_slow - Atomic slot reservation in a buffer. * @ctx: ring buffer context. @@ -1634,25 +2153,21 @@ int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf, * -EIO for other errors, else returns 0. * It will take care of sub-buffer switching. */ -int lib_ring_buffer_reserve_slow(struct lib_ring_buffer_ctx *ctx) +int lib_ring_buffer_reserve_slow(struct lib_ring_buffer_ctx *ctx, + void *client_ctx) { struct channel *chan = ctx->chan; - const struct lib_ring_buffer_config *config = chan->backend.config; + const struct lib_ring_buffer_config *config = &chan->backend.config; struct lib_ring_buffer *buf; struct switch_offsets offsets; int ret; - if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) - buf = per_cpu_ptr(chan->backend.buf, ctx->cpu); - else - buf = chan->backend.buf; - ctx->buf = buf; - + ctx->buf = buf = get_current_buf(chan, ctx->cpu); offsets.size = 0; do { ret = lib_ring_buffer_try_reserve_slow(buf, chan, &offsets, - ctx); + ctx, client_ctx); if (unlikely(ret)) return ret; } while (unlikely(v_cmpxchg(config, &buf->offset, offsets.old, @@ -1702,3 +2217,171 @@ int lib_ring_buffer_reserve_slow(struct lib_ring_buffer_ctx *ctx) return 0; } EXPORT_SYMBOL_GPL(lib_ring_buffer_reserve_slow); + +static +void lib_ring_buffer_vmcore_check_deliver(const struct lib_ring_buffer_config *config, + struct lib_ring_buffer *buf, + unsigned long commit_count, + unsigned long idx) +{ + if (config->oops == RING_BUFFER_OOPS_CONSISTENCY) + v_set(config, &buf->commit_hot[idx].seq, commit_count); +} + +/* + * The ring buffer can count events recorded and overwritten per buffer, + * but it is disabled by default due to its performance overhead. + */ +#ifdef LTTNG_RING_BUFFER_COUNT_EVENTS +static +void deliver_count_events(const struct lib_ring_buffer_config *config, + struct lib_ring_buffer *buf, + unsigned long idx) +{ + v_add(config, subbuffer_get_records_count(config, + &buf->backend, idx), + &buf->records_count); + v_add(config, subbuffer_count_records_overrun(config, + &buf->backend, idx), + &buf->records_overrun); +} +#else /* LTTNG_RING_BUFFER_COUNT_EVENTS */ +static +void deliver_count_events(const struct lib_ring_buffer_config *config, + struct lib_ring_buffer *buf, + unsigned long idx) +{ +} +#endif /* #else LTTNG_RING_BUFFER_COUNT_EVENTS */ + + +void lib_ring_buffer_check_deliver_slow(const struct lib_ring_buffer_config *config, + struct lib_ring_buffer *buf, + struct channel *chan, + unsigned long offset, + unsigned long commit_count, + unsigned long idx, + u64 tsc) +{ + unsigned long old_commit_count = commit_count + - chan->backend.subbuf_size; + + /* + * If we succeeded at updating cc_sb below, we are the subbuffer + * writer delivering the subbuffer. Deals with concurrent + * updates of the "cc" value without adding a add_return atomic + * operation to the fast path. + * + * We are doing the delivery in two steps: + * - First, we cmpxchg() cc_sb to the new value + * old_commit_count + 1. This ensures that we are the only + * subbuffer user successfully filling the subbuffer, but we + * do _not_ set the cc_sb value to "commit_count" yet. + * Therefore, other writers that would wrap around the ring + * buffer and try to start writing to our subbuffer would + * have to drop records, because it would appear as + * non-filled. + * We therefore have exclusive access to the subbuffer control + * structures. This mutual exclusion with other writers is + * crucially important to perform record overruns count in + * flight recorder mode locklessly. + * - When we are ready to release the subbuffer (either for + * reading or for overrun by other writers), we simply set the + * cc_sb value to "commit_count" and perform delivery. + * + * The subbuffer size is least 2 bytes (minimum size: 1 page). + * This guarantees that old_commit_count + 1 != commit_count. + */ + + /* + * Order prior updates to reserve count prior to the + * commit_cold cc_sb update. + */ + smp_wmb(); + if (likely(v_cmpxchg(config, &buf->commit_cold[idx].cc_sb, + old_commit_count, old_commit_count + 1) + == old_commit_count)) { + u64 *ts_end; + + /* + * Start of exclusive subbuffer access. We are + * guaranteed to be the last writer in this subbuffer + * and any other writer trying to access this subbuffer + * in this state is required to drop records. + * + * We can read the ts_end for the current sub-buffer + * which has been saved by the very last space + * reservation for the current sub-buffer. + * + * Order increment of commit counter before reading ts_end. + */ + smp_mb(); + ts_end = &buf->ts_end[idx]; + deliver_count_events(config, buf, idx); + config->cb.buffer_end(buf, *ts_end, idx, + lib_ring_buffer_get_data_size(config, + buf, + idx)); + + /* + * Increment the packet counter while we have exclusive + * access. + */ + subbuffer_inc_packet_count(config, &buf->backend, idx); + + /* + * Set noref flag and offset for this subbuffer id. + * Contains a memory barrier that ensures counter stores + * are ordered before set noref and offset. + */ + lib_ring_buffer_set_noref_offset(config, &buf->backend, idx, + buf_trunc_val(offset, chan)); + + /* + * Order set_noref and record counter updates before the + * end of subbuffer exclusive access. Orders with + * respect to writers coming into the subbuffer after + * wrap around, and also order wrt concurrent readers. + */ + smp_mb(); + /* End of exclusive subbuffer access */ + v_set(config, &buf->commit_cold[idx].cc_sb, + commit_count); + /* + * Order later updates to reserve count after + * the commit_cold cc_sb update. + */ + smp_wmb(); + lib_ring_buffer_vmcore_check_deliver(config, buf, + commit_count, idx); + + /* + * RING_BUFFER_WAKEUP_BY_WRITER wakeup is not lock-free. + */ + if (config->wakeup == RING_BUFFER_WAKEUP_BY_WRITER + && atomic_long_read(&buf->active_readers) + && lib_ring_buffer_poll_deliver(config, buf, chan)) { + wake_up_interruptible(&buf->read_wait); + wake_up_interruptible(&chan->read_wait); + } + + } +} +EXPORT_SYMBOL_GPL(lib_ring_buffer_check_deliver_slow); + +int __init init_lib_ring_buffer_frontend(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + spin_lock_init(&per_cpu(ring_buffer_nohz_lock, cpu)); + return 0; +} + +module_init(init_lib_ring_buffer_frontend); + +void __exit exit_lib_ring_buffer_frontend(void) +{ +} + +module_exit(exit_lib_ring_buffer_frontend);