From: Michael Jeanson Date: Fri, 2 Apr 2021 22:45:31 +0000 (-0400) Subject: Move libringbuffer to 'src/common/' X-Git-Tag: v2.13.0-rc1~132 X-Git-Url: http://git.liburcu.org/?a=commitdiff_plain;h=e4db8f987126e1c1e586e5ae1e1ba73d0ab43438;p=lttng-ust.git Move libringbuffer to 'src/common/' This is part of an effort to standardize our autotools setup across projects to simplify maintenance. Change-Id: I8f19a59bd0a72970199ede1b81c5d5971d613f31 Signed-off-by: Michael Jeanson Signed-off-by: Mathieu Desnoyers --- diff --git a/.gitignore b/.gitignore index 9f38fa9c..2dc1914c 100644 --- a/.gitignore +++ b/.gitignore @@ -146,7 +146,6 @@ cscope.* /src/lib/lttng-ust-python-agent/Makefile /src/lib/Makefile /src/liblttng-ust/Makefile -/src/libringbuffer/Makefile /src/python-lttngust/Makefile /src/Makefile /tests/Makefile diff --git a/configure.ac b/configure.ac index 285d269d..21dee733 100644 --- a/configure.ac +++ b/configure.ac @@ -540,7 +540,6 @@ AC_CONFIG_FILES([ src/liblttng-ust/Makefile src/lib/lttng-ust-python-agent/Makefile src/lib/Makefile - src/libringbuffer/Makefile src/lttng-ust-ctl.pc src/lttng-ust.pc src/Makefile diff --git a/src/Makefile.am b/src/Makefile.am index 4fca446e..5f958375 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2,7 +2,6 @@ SUBDIRS = \ common \ - libringbuffer \ liblttng-ust-comm \ liblttng-ust \ lib diff --git a/src/common/Makefile.am b/src/common/Makefile.am index a198b685..2fbc8fff 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -40,6 +40,7 @@ noinst_HEADERS += \ noinst_LTLIBRARIES = \ libcounter.la \ msgpack/libmsgpack.la \ + libringbuffer.la \ snprintf/libsnprintf.la \ libcommon.la @@ -73,6 +74,40 @@ msgpack_libmsgpack_la_SOURCES = \ msgpack_libmsgpack_la_CFLAGS = -DUST_COMPONENT="libmsgpack" $(AM_CFLAGS) +# ringbuffer +libringbuffer_la_SOURCES = \ + ringbuffer/api.h \ + ringbuffer/backend.h \ + ringbuffer/backend_internal.h \ + ringbuffer/backend_types.h \ + ringbuffer/frontend_api.h \ + ringbuffer/frontend.h \ + ringbuffer/frontend_internal.h \ + ringbuffer/frontend_types.h \ + ringbuffer/getcpu.h \ + ringbuffer/mmap.h \ + ringbuffer/nohz.h \ + ringbuffer/rb-init.h \ + ringbuffer/ring_buffer_backend.c \ + ringbuffer/ringbuffer-config.h \ + ringbuffer/ring_buffer_frontend.c \ + ringbuffer/shm.c \ + ringbuffer/shm.h \ + ringbuffer/shm_internal.h \ + ringbuffer/shm_types.h \ + ringbuffer/smp.c \ + ringbuffer/smp.h \ + ringbuffer/vatomic.h + +libringbuffer_la_LIBADD = \ + -lrt + +if ENABLE_NUMA +libringbuffer_la_LIBADD += -lnuma +endif + +libringbuffer_la_CFLAGS = -DUST_COMPONENT="libringbuffer" $(AM_CFLAGS) + # snprintf snprintf_libsnprintf_la_SOURCES = \ snprintf/fflush.c \ diff --git a/src/common/counter/counter-api.h b/src/common/counter/counter-api.h index 08511d3a..81bb551b 100644 --- a/src/common/counter/counter-api.h +++ b/src/common/counter/counter-api.h @@ -16,7 +16,7 @@ #include #include #include "common/bitmap.h" -#include "libringbuffer/getcpu.h" +#include "common/ringbuffer/getcpu.h" /* * Using unsigned arithmetic because overflow is defined. diff --git a/src/common/counter/shm.c b/src/common/counter/shm.c index d1116c9c..596d5e59 100644 --- a/src/common/counter/shm.c +++ b/src/common/counter/shm.c @@ -29,7 +29,7 @@ #include "common/macros.h" #include "common/ust-fd.h" -#include "libringbuffer/mmap.h" +#include "common/ringbuffer/mmap.h" /* * Ensure we have the required amount of space available by writing 0 diff --git a/src/common/ringbuffer/api.h b/src/common/ringbuffer/api.h new file mode 100644 index 00000000..ec4f5229 --- /dev/null +++ b/src/common/ringbuffer/api.h @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2010-2012 Mathieu Desnoyers + * + * Ring Buffer API. + */ + +#ifndef _LTTNG_RING_BUFFER_API_H +#define _LTTNG_RING_BUFFER_API_H + +#include "backend.h" +#include "frontend.h" +#include + +/* + * ring_buffer_frontend_api.h contains static inline functions that depend on + * client static inlines. Hence the inclusion of this "api" header only + * within the client. + */ +#include "frontend_api.h" + +#endif /* _LTTNG_RING_BUFFER_API_H */ diff --git a/src/common/ringbuffer/backend.h b/src/common/ringbuffer/backend.h new file mode 100644 index 00000000..0b482182 --- /dev/null +++ b/src/common/ringbuffer/backend.h @@ -0,0 +1,318 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + * + * Ring buffer backend (API). + * + * Credits to Steven Rostedt for proposing to use an extra-subbuffer owned by + * the reader in flight recorder mode. + */ + +#ifndef _LTTNG_RING_BUFFER_BACKEND_H +#define _LTTNG_RING_BUFFER_BACKEND_H + +#include +#include + +/* Internal helpers */ +#include "backend_internal.h" +#include "frontend_internal.h" + +/* Ring buffer backend API */ + +/* Ring buffer backend access (read/write) */ + +extern size_t lib_ring_buffer_read(struct lttng_ust_lib_ring_buffer_backend *bufb, + size_t offset, void *dest, size_t len, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +extern int lib_ring_buffer_read_cstr(struct lttng_ust_lib_ring_buffer_backend *bufb, + size_t offset, void *dest, size_t len, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +/* + * Return the address where a given offset is located. + * Should be used to get the current subbuffer header pointer. Given we know + * it's never on a page boundary, it's safe to write directly to this address, + * as long as the write is never bigger than a page size. + */ +extern void * +lib_ring_buffer_offset_address(struct lttng_ust_lib_ring_buffer_backend *bufb, + size_t offset, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +extern void * +lib_ring_buffer_read_offset_address(struct lttng_ust_lib_ring_buffer_backend *bufb, + size_t offset, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +/** + * lib_ring_buffer_write - write data to a buffer backend + * @config : ring buffer instance configuration + * @ctx: ring buffer context. (input arguments only) + * @src : source pointer to copy from + * @len : length of data to copy + * + * This function copies "len" bytes of data from a source pointer to a buffer + * backend, at the current context offset. This is more or less a buffer + * backend-specific memcpy() operation. Calls the slow path (_ring_buffer_write) + * if copy is crossing a page boundary. + */ +static inline +void lib_ring_buffer_write(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_ctx *ctx, + const void *src, size_t len) + __attribute__((always_inline)); +static inline +void lib_ring_buffer_write(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_ctx *ctx, + const void *src, size_t len) +{ + struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; + struct channel_backend *chanb = &ctx_private->chan->backend; + struct lttng_ust_shm_handle *handle = ctx_private->chan->handle; + size_t offset = ctx_private->buf_offset; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + void *p; + + if (caa_unlikely(!len)) + return; + /* + * Underlying layer should never ask for writes across + * subbuffers. + */ + CHAN_WARN_ON(chanb, (offset & (chanb->buf_size - 1)) + len > chanb->buf_size); + backend_pages = lib_ring_buffer_get_backend_pages_from_ctx(config, ctx); + if (caa_unlikely(!backend_pages)) { + if (lib_ring_buffer_backend_get_pages(config, ctx, &backend_pages)) + return; + } + p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); + if (caa_unlikely(!p)) + return; + lib_ring_buffer_do_copy(config, p, src, len); + ctx_private->buf_offset += len; +} + +/* + * Copy up to @len string bytes from @src to @dest. Stop whenever a NULL + * terminating character is found in @src. Returns the number of bytes + * copied. Does *not* terminate @dest with NULL terminating character. + */ +static inline +size_t lib_ring_buffer_do_strcpy(const struct lttng_ust_lib_ring_buffer_config *config, + char *dest, const char *src, size_t len) + __attribute__((always_inline)); +static inline +size_t lib_ring_buffer_do_strcpy( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + char *dest, const char *src, size_t len) +{ + size_t count; + + for (count = 0; count < len; count++) { + char c; + + /* + * Only read source character once, in case it is + * modified concurrently. + */ + c = CMM_LOAD_SHARED(src[count]); + if (!c) + break; + lib_ring_buffer_do_copy(config, &dest[count], &c, 1); + } + return count; +} + +/** + * lib_ring_buffer_strcpy - write string data to a buffer backend + * @config : ring buffer instance configuration + * @ctx: ring buffer context. (input arguments only) + * @src : source pointer to copy from + * @len : length of data to copy + * @pad : character to use for padding + * + * This function copies @len - 1 bytes of string data from a source + * pointer to a buffer backend, followed by a terminating '\0' + * character, at the current context offset. This is more or less a + * buffer backend-specific strncpy() operation. If a terminating '\0' + * character is found in @src before @len - 1 characters are copied, pad + * the buffer with @pad characters (e.g. '#'). + */ +static inline +void lib_ring_buffer_strcpy(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_ctx *ctx, + const char *src, size_t len, char pad) + __attribute__((always_inline)); +static inline +void lib_ring_buffer_strcpy(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_ctx *ctx, + const char *src, size_t len, char pad) +{ + struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; + struct channel_backend *chanb = &ctx_private->chan->backend; + struct lttng_ust_shm_handle *handle = ctx_private->chan->handle; + size_t count; + size_t offset = ctx_private->buf_offset; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + void *p; + + if (caa_unlikely(!len)) + return; + /* + * Underlying layer should never ask for writes across + * subbuffers. + */ + CHAN_WARN_ON(chanb, (offset & (chanb->buf_size - 1)) + len > chanb->buf_size); + backend_pages = lib_ring_buffer_get_backend_pages_from_ctx(config, ctx); + if (caa_unlikely(!backend_pages)) { + if (lib_ring_buffer_backend_get_pages(config, ctx, &backend_pages)) + return; + } + p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); + if (caa_unlikely(!p)) + return; + + count = lib_ring_buffer_do_strcpy(config, p, src, len - 1); + offset += count; + /* Padding */ + if (caa_unlikely(count < len - 1)) { + size_t pad_len = len - 1 - count; + + p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); + if (caa_unlikely(!p)) + return; + lib_ring_buffer_do_memset(p, pad, pad_len); + offset += pad_len; + } + /* Final '\0' */ + p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); + if (caa_unlikely(!p)) + return; + lib_ring_buffer_do_memset(p, '\0', 1); + ctx_private->buf_offset += len; +} + +/** + * lib_ring_buffer_pstrcpy - write to a buffer backend P-string + * @config : ring buffer instance configuration + * @ctx: ring buffer context. (input arguments only) + * @src : source pointer to copy from + * @len : length of data to copy + * @pad : character to use for padding + * + * This function copies up to @len bytes of data from a source pointer + * to a Pascal String into the buffer backend. If a terminating '\0' + * character is found in @src before @len characters are copied, pad the + * buffer with @pad characters (e.g. '\0'). + * + * The length of the pascal strings in the ring buffer is explicit: it + * is either the array or sequence length. + */ +static inline +void lib_ring_buffer_pstrcpy(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_ctx *ctx, + const char *src, size_t len, char pad) + __attribute__((always_inline)); +static inline +void lib_ring_buffer_pstrcpy(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_ctx *ctx, + const char *src, size_t len, char pad) +{ + struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; + struct channel_backend *chanb = &ctx_private->chan->backend; + struct lttng_ust_shm_handle *handle = ctx_private->chan->handle; + size_t count; + size_t offset = ctx_private->buf_offset; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + void *p; + + if (caa_unlikely(!len)) + return; + /* + * Underlying layer should never ask for writes across + * subbuffers. + */ + CHAN_WARN_ON(chanb, (offset & (chanb->buf_size - 1)) + len > chanb->buf_size); + backend_pages = lib_ring_buffer_get_backend_pages_from_ctx(config, ctx); + if (caa_unlikely(!backend_pages)) { + if (lib_ring_buffer_backend_get_pages(config, ctx, &backend_pages)) + return; + } + p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); + if (caa_unlikely(!p)) + return; + + count = lib_ring_buffer_do_strcpy(config, p, src, len); + offset += count; + /* Padding */ + if (caa_unlikely(count < len)) { + size_t pad_len = len - count; + + p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); + if (caa_unlikely(!p)) + return; + lib_ring_buffer_do_memset(p, pad, pad_len); + } + ctx_private->buf_offset += len; +} + +/* + * This accessor counts the number of unread records in a buffer. + * It only provides a consistent value if no reads not writes are performed + * concurrently. + */ +static inline +unsigned long lib_ring_buffer_get_records_unread( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend; + unsigned long records_unread = 0, sb_bindex; + unsigned int i; + struct lttng_ust_lib_ring_buffer_channel *chan; + + chan = shmp(handle, bufb->chan); + if (!chan) + return 0; + for (i = 0; i < chan->backend.num_subbuf; i++) { + struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + + wsb = shmp_index(handle, bufb->buf_wsb, i); + if (!wsb) + return 0; + sb_bindex = subbuffer_id_get_index(config, wsb->id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return 0; + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return 0; + records_unread += v_read(config, &backend_pages->records_unread); + } + if (config->mode == RING_BUFFER_OVERWRITE) { + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + + sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return 0; + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return 0; + records_unread += v_read(config, &backend_pages->records_unread); + } + return records_unread; +} + +#endif /* _LTTNG_RING_BUFFER_BACKEND_H */ diff --git a/src/common/ringbuffer/backend_internal.h b/src/common/ringbuffer/backend_internal.h new file mode 100644 index 00000000..a325875c --- /dev/null +++ b/src/common/ringbuffer/backend_internal.h @@ -0,0 +1,696 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2005-2012 Mathieu Desnoyers + * + * Ring buffer backend (internal helpers). + */ + +#ifndef _LTTNG_RING_BUFFER_BACKEND_INTERNAL_H +#define _LTTNG_RING_BUFFER_BACKEND_INTERNAL_H + +#include +#include +#include +#include + +#include +#include "ringbuffer-config.h" +#include "backend_types.h" +#include "frontend_types.h" +#include "shm.h" + +/* Ring buffer backend API presented to the frontend */ + +/* Ring buffer and channel backend create/free */ + +int lib_ring_buffer_backend_create(struct lttng_ust_lib_ring_buffer_backend *bufb, + struct channel_backend *chan, + int cpu, + struct lttng_ust_shm_handle *handle, + struct shm_object *shmobj) + __attribute__((visibility("hidden"))); + +void channel_backend_unregister_notifiers(struct channel_backend *chanb) + __attribute__((visibility("hidden"))); + +void lib_ring_buffer_backend_free(struct lttng_ust_lib_ring_buffer_backend *bufb) + __attribute__((visibility("hidden"))); + +int channel_backend_init(struct channel_backend *chanb, + const char *name, + const struct lttng_ust_lib_ring_buffer_config *config, + size_t subbuf_size, + size_t num_subbuf, struct lttng_ust_shm_handle *handle, + const int *stream_fds) + __attribute__((visibility("hidden"))); + +void channel_backend_free(struct channel_backend *chanb, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +void lib_ring_buffer_backend_reset(struct lttng_ust_lib_ring_buffer_backend *bufb, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +void channel_backend_reset(struct channel_backend *chanb) + __attribute__((visibility("hidden"))); + +int lib_ring_buffer_backend_init(void) + __attribute__((visibility("hidden"))); + +void lib_ring_buffer_backend_exit(void) + __attribute__((visibility("hidden"))); + +extern void _lib_ring_buffer_write(struct lttng_ust_lib_ring_buffer_backend *bufb, + size_t offset, const void *src, size_t len, + ssize_t pagecpy) + __attribute__((visibility("hidden"))); + +/* + * Subbuffer ID bits for overwrite mode. Need to fit within a single word to be + * exchanged atomically. + * + * Top half word, except lowest bit, belongs to "offset", which is used to keep + * to count the produced buffers. For overwrite mode, this provides the + * consumer with the capacity to read subbuffers in order, handling the + * situation where producers would write up to 2^15 buffers (or 2^31 for 64-bit + * systems) concurrently with a single execution of get_subbuf (between offset + * sampling and subbuffer ID exchange). + */ + +#define HALF_ULONG_BITS (CAA_BITS_PER_LONG >> 1) + +#define SB_ID_OFFSET_SHIFT (HALF_ULONG_BITS + 1) +#define SB_ID_OFFSET_COUNT (1UL << SB_ID_OFFSET_SHIFT) +#define SB_ID_OFFSET_MASK (~(SB_ID_OFFSET_COUNT - 1)) +/* + * Lowest bit of top word half belongs to noref. Used only for overwrite mode. + */ +#define SB_ID_NOREF_SHIFT (SB_ID_OFFSET_SHIFT - 1) +#define SB_ID_NOREF_COUNT (1UL << SB_ID_NOREF_SHIFT) +#define SB_ID_NOREF_MASK SB_ID_NOREF_COUNT +/* + * In overwrite mode: lowest half of word is used for index. + * Limit of 2^16 subbuffers per buffer on 32-bit, 2^32 on 64-bit. + * In producer-consumer mode: whole word used for index. + */ +#define SB_ID_INDEX_SHIFT 0 +#define SB_ID_INDEX_COUNT (1UL << SB_ID_INDEX_SHIFT) +#define SB_ID_INDEX_MASK (SB_ID_NOREF_COUNT - 1) + +/* + * Construct the subbuffer id from offset, index and noref. Use only the index + * for producer-consumer mode (offset and noref are only used in overwrite + * mode). + */ +static inline +unsigned long subbuffer_id(const struct lttng_ust_lib_ring_buffer_config *config, + unsigned long offset, unsigned long noref, + unsigned long index) +{ + if (config->mode == RING_BUFFER_OVERWRITE) + return (offset << SB_ID_OFFSET_SHIFT) + | (noref << SB_ID_NOREF_SHIFT) + | index; + else + return index; +} + +/* + * Compare offset with the offset contained within id. Return 1 if the offset + * bits are identical, else 0. + */ +static inline +int subbuffer_id_compare_offset( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + unsigned long id, unsigned long offset) +{ + return (id & SB_ID_OFFSET_MASK) == (offset << SB_ID_OFFSET_SHIFT); +} + +static inline +unsigned long subbuffer_id_get_index(const struct lttng_ust_lib_ring_buffer_config *config, + unsigned long id) +{ + if (config->mode == RING_BUFFER_OVERWRITE) + return id & SB_ID_INDEX_MASK; + else + return id; +} + +static inline +unsigned long subbuffer_id_is_noref(const struct lttng_ust_lib_ring_buffer_config *config, + unsigned long id) +{ + if (config->mode == RING_BUFFER_OVERWRITE) + return !!(id & SB_ID_NOREF_MASK); + else + return 1; +} + +/* + * Only used by reader on subbuffer ID it has exclusive access to. No volatile + * needed. + */ +static inline +void subbuffer_id_set_noref(const struct lttng_ust_lib_ring_buffer_config *config, + unsigned long *id) +{ + if (config->mode == RING_BUFFER_OVERWRITE) + *id |= SB_ID_NOREF_MASK; +} + +static inline +void subbuffer_id_set_noref_offset(const struct lttng_ust_lib_ring_buffer_config *config, + unsigned long *id, unsigned long offset) +{ + unsigned long tmp; + + if (config->mode == RING_BUFFER_OVERWRITE) { + tmp = *id; + tmp &= ~SB_ID_OFFSET_MASK; + tmp |= offset << SB_ID_OFFSET_SHIFT; + tmp |= SB_ID_NOREF_MASK; + /* Volatile store, read concurrently by readers. */ + CMM_ACCESS_ONCE(*id) = tmp; + } +} + +/* No volatile access, since already used locally */ +static inline +void subbuffer_id_clear_noref(const struct lttng_ust_lib_ring_buffer_config *config, + unsigned long *id) +{ + if (config->mode == RING_BUFFER_OVERWRITE) + *id &= ~SB_ID_NOREF_MASK; +} + +/* + * For overwrite mode, cap the number of subbuffers per buffer to: + * 2^16 on 32-bit architectures + * 2^32 on 64-bit architectures + * This is required to fit in the index part of the ID. Return 0 on success, + * -EPERM on failure. + */ +static inline +int subbuffer_id_check_index(const struct lttng_ust_lib_ring_buffer_config *config, + unsigned long num_subbuf) +{ + if (config->mode == RING_BUFFER_OVERWRITE) + return (num_subbuf > (1UL << HALF_ULONG_BITS)) ? -EPERM : 0; + else + return 0; +} + +static inline +int lib_ring_buffer_backend_get_pages(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_ctx *ctx, + struct lttng_ust_lib_ring_buffer_backend_pages **backend_pages) +{ + struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; + struct lttng_ust_lib_ring_buffer_backend *bufb = &ctx_private->buf->backend; + struct channel_backend *chanb = &ctx_private->chan->backend; + struct lttng_ust_shm_handle *handle = ctx_private->chan->handle; + size_t sbidx; + size_t offset = ctx_private->buf_offset; + struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + unsigned long sb_bindex, id; + struct lttng_ust_lib_ring_buffer_backend_pages *_backend_pages; + + offset &= chanb->buf_size - 1; + sbidx = offset >> chanb->subbuf_size_order; + wsb = shmp_index(handle, bufb->buf_wsb, sbidx); + if (caa_unlikely(!wsb)) + return -1; + id = wsb->id; + sb_bindex = subbuffer_id_get_index(config, id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (caa_unlikely(!rpages)) + return -1; + CHAN_WARN_ON(ctx_private->chan, + config->mode == RING_BUFFER_OVERWRITE + && subbuffer_id_is_noref(config, id)); + _backend_pages = shmp(handle, rpages->shmp); + if (caa_unlikely(!_backend_pages)) + return -1; + *backend_pages = _backend_pages; + return 0; +} + +/* Get backend pages from cache. */ +static inline +struct lttng_ust_lib_ring_buffer_backend_pages * + lib_ring_buffer_get_backend_pages_from_ctx( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer_ctx *ctx) +{ + return ctx->priv->backend_pages; +} + +/* + * The ring buffer can count events recorded and overwritten per buffer, + * but it is disabled by default due to its performance overhead. + */ +#ifdef LTTNG_RING_BUFFER_COUNT_EVENTS +static inline +void subbuffer_count_record(const struct lttng_ust_lib_ring_buffer_config *config, + const struct lttng_ust_lib_ring_buffer_ctx *ctx, + struct lttng_ust_lib_ring_buffer_backend *bufb, + unsigned long idx, struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + + backend_pages = lib_ring_buffer_get_backend_pages_from_ctx(config, ctx); + if (caa_unlikely(!backend_pages)) { + if (lib_ring_buffer_backend_get_pages(config, ctx, &backend_pages)) + return; + } + v_inc(config, &backend_pages->records_commit); +} +#else /* LTTNG_RING_BUFFER_COUNT_EVENTS */ +static inline +void subbuffer_count_record(const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + const struct lttng_ust_lib_ring_buffer_ctx *ctx __attribute__((unused)), + struct lttng_ust_lib_ring_buffer_backend *bufb __attribute__((unused)), + unsigned long idx __attribute__((unused)), + struct lttng_ust_shm_handle *handle __attribute__((unused))) +{ +} +#endif /* #else LTTNG_RING_BUFFER_COUNT_EVENTS */ + +/* + * Reader has exclusive subbuffer access for record consumption. No need to + * perform the decrement atomically. + */ +static inline +void subbuffer_consume_record(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_backend *bufb, + struct lttng_ust_shm_handle *handle) +{ + unsigned long sb_bindex; + struct lttng_ust_lib_ring_buffer_channel *chan; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *pages_shmp; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + + sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id); + chan = shmp(handle, bufb->chan); + if (!chan) + return; + pages_shmp = shmp_index(handle, bufb->array, sb_bindex); + if (!pages_shmp) + return; + backend_pages = shmp(handle, pages_shmp->shmp); + if (!backend_pages) + return; + CHAN_WARN_ON(chan, !v_read(config, &backend_pages->records_unread)); + /* Non-atomic decrement protected by exclusive subbuffer access */ + _v_dec(config, &backend_pages->records_unread); + v_inc(config, &bufb->records_read); +} + +static inline +unsigned long subbuffer_get_records_count( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_backend *bufb, + unsigned long idx, + struct lttng_ust_shm_handle *handle) +{ + unsigned long sb_bindex; + struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + + wsb = shmp_index(handle, bufb->buf_wsb, idx); + if (!wsb) + return 0; + sb_bindex = subbuffer_id_get_index(config, wsb->id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return 0; + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return 0; + return v_read(config, &backend_pages->records_commit); +} + +/* + * Must be executed at subbuffer delivery when the writer has _exclusive_ + * subbuffer access. See lib_ring_buffer_check_deliver() for details. + * lib_ring_buffer_get_records_count() must be called to get the records + * count before this function, because it resets the records_commit + * count. + */ +static inline +unsigned long subbuffer_count_records_overrun( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_backend *bufb, + unsigned long idx, + struct lttng_ust_shm_handle *handle) +{ + unsigned long overruns, sb_bindex; + struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + + wsb = shmp_index(handle, bufb->buf_wsb, idx); + if (!wsb) + return 0; + sb_bindex = subbuffer_id_get_index(config, wsb->id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return 0; + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return 0; + overruns = v_read(config, &backend_pages->records_unread); + v_set(config, &backend_pages->records_unread, + v_read(config, &backend_pages->records_commit)); + v_set(config, &backend_pages->records_commit, 0); + + return overruns; +} + +static inline +void subbuffer_set_data_size(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_backend *bufb, + unsigned long idx, + unsigned long data_size, + struct lttng_ust_shm_handle *handle) +{ + unsigned long sb_bindex; + struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + + wsb = shmp_index(handle, bufb->buf_wsb, idx); + if (!wsb) + return; + sb_bindex = subbuffer_id_get_index(config, wsb->id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return; + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return; + backend_pages->data_size = data_size; +} + +static inline +unsigned long subbuffer_get_read_data_size( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_backend *bufb, + struct lttng_ust_shm_handle *handle) +{ + unsigned long sb_bindex; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *pages_shmp; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + + sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id); + pages_shmp = shmp_index(handle, bufb->array, sb_bindex); + if (!pages_shmp) + return 0; + backend_pages = shmp(handle, pages_shmp->shmp); + if (!backend_pages) + return 0; + return backend_pages->data_size; +} + +static inline +unsigned long subbuffer_get_data_size( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_backend *bufb, + unsigned long idx, + struct lttng_ust_shm_handle *handle) +{ + unsigned long sb_bindex; + struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + + wsb = shmp_index(handle, bufb->buf_wsb, idx); + if (!wsb) + return 0; + sb_bindex = subbuffer_id_get_index(config, wsb->id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return 0; + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return 0; + return backend_pages->data_size; +} + +static inline +void subbuffer_inc_packet_count( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer_backend *bufb, + unsigned long idx, struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_backend_counts *counts; + + counts = shmp_index(handle, bufb->buf_cnt, idx); + if (!counts) + return; + counts->seq_cnt++; +} + +/** + * lib_ring_buffer_clear_noref - Clear the noref subbuffer flag, called by + * writer. + */ +static inline +void lib_ring_buffer_clear_noref(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_backend *bufb, + unsigned long idx, + struct lttng_ust_shm_handle *handle) +{ + unsigned long id, new_id; + struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; + + if (config->mode != RING_BUFFER_OVERWRITE) + return; + + /* + * Performing a volatile access to read the sb_pages, because we want to + * read a coherent version of the pointer and the associated noref flag. + */ + wsb = shmp_index(handle, bufb->buf_wsb, idx); + if (!wsb) + return; + id = CMM_ACCESS_ONCE(wsb->id); + for (;;) { + /* This check is called on the fast path for each record. */ + if (caa_likely(!subbuffer_id_is_noref(config, id))) { + /* + * Store after load dependency ordering the writes to + * the subbuffer after load and test of the noref flag + * matches the memory barrier implied by the cmpxchg() + * in update_read_sb_index(). + */ + return; /* Already writing to this buffer */ + } + new_id = id; + subbuffer_id_clear_noref(config, &new_id); + new_id = uatomic_cmpxchg(&wsb->id, id, new_id); + if (caa_likely(new_id == id)) + break; + id = new_id; + } +} + +/** + * lib_ring_buffer_set_noref_offset - Set the noref subbuffer flag and offset, + * called by writer. + */ +static inline +void lib_ring_buffer_set_noref_offset(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_backend *bufb, + unsigned long idx, unsigned long offset, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; + struct lttng_ust_lib_ring_buffer_channel *chan; + + if (config->mode != RING_BUFFER_OVERWRITE) + return; + + wsb = shmp_index(handle, bufb->buf_wsb, idx); + if (!wsb) + return; + /* + * Because ring_buffer_set_noref() is only called by a single thread + * (the one which updated the cc_sb value), there are no concurrent + * updates to take care of: other writers have not updated cc_sb, so + * they cannot set the noref flag, and concurrent readers cannot modify + * the pointer because the noref flag is not set yet. + * The smp_wmb() in ring_buffer_commit() takes care of ordering writes + * to the subbuffer before this set noref operation. + * subbuffer_set_noref() uses a volatile store to deal with concurrent + * readers of the noref flag. + */ + chan = shmp(handle, bufb->chan); + if (!chan) + return; + CHAN_WARN_ON(chan, subbuffer_id_is_noref(config, wsb->id)); + /* + * Memory barrier that ensures counter stores are ordered before set + * noref and offset. + */ + cmm_smp_mb(); + subbuffer_id_set_noref_offset(config, &wsb->id, offset); +} + +/** + * update_read_sb_index - Read-side subbuffer index update. + */ +static inline +int update_read_sb_index(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_backend *bufb, + struct channel_backend *chanb __attribute__((unused)), + unsigned long consumed_idx, + unsigned long consumed_count, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; + unsigned long old_id, new_id; + + wsb = shmp_index(handle, bufb->buf_wsb, consumed_idx); + if (caa_unlikely(!wsb)) + return -EPERM; + + if (config->mode == RING_BUFFER_OVERWRITE) { + struct lttng_ust_lib_ring_buffer_channel *chan; + + /* + * Exchange the target writer subbuffer with our own unused + * subbuffer. No need to use CMM_ACCESS_ONCE() here to read the + * old_wpage, because the value read will be confirmed by the + * following cmpxchg(). + */ + old_id = wsb->id; + if (caa_unlikely(!subbuffer_id_is_noref(config, old_id))) + return -EAGAIN; + /* + * Make sure the offset count we are expecting matches the one + * indicated by the writer. + */ + if (caa_unlikely(!subbuffer_id_compare_offset(config, old_id, + consumed_count))) + return -EAGAIN; + chan = shmp(handle, bufb->chan); + if (caa_unlikely(!chan)) + return -EPERM; + CHAN_WARN_ON(chan, !subbuffer_id_is_noref(config, bufb->buf_rsb.id)); + subbuffer_id_set_noref_offset(config, &bufb->buf_rsb.id, + consumed_count); + new_id = uatomic_cmpxchg(&wsb->id, old_id, bufb->buf_rsb.id); + if (caa_unlikely(old_id != new_id)) + return -EAGAIN; + bufb->buf_rsb.id = new_id; + } else { + /* No page exchange, use the writer page directly */ + bufb->buf_rsb.id = wsb->id; + } + return 0; +} + +#ifndef inline_memcpy +#define inline_memcpy(dest, src, n) memcpy(dest, src, n) +#endif + +static inline +void lttng_inline_memcpy(void *dest, const void *src, + unsigned long len) + __attribute__((always_inline)); +static inline +void lttng_inline_memcpy(void *dest, const void *src, + unsigned long len) +{ + switch (len) { + case 1: + *(uint8_t *) dest = *(const uint8_t *) src; + break; + case 2: + *(uint16_t *) dest = *(const uint16_t *) src; + break; + case 4: + *(uint32_t *) dest = *(const uint32_t *) src; + break; + case 8: + *(uint64_t *) dest = *(const uint64_t *) src; + break; + default: + inline_memcpy(dest, src, len); + } +} + +/* + * Use the architecture-specific memcpy implementation for constant-sized + * inputs, but rely on an inline memcpy for length statically unknown. + * The function call to memcpy is just way too expensive for a fast path. + */ +#define lib_ring_buffer_do_copy(config, dest, src, len) \ +do { \ + size_t __len = (len); \ + if (__builtin_constant_p(len)) \ + memcpy(dest, src, __len); \ + else \ + lttng_inline_memcpy(dest, src, __len); \ +} while (0) + +/* + * write len bytes to dest with c + */ +static inline +void lib_ring_buffer_do_memset(char *dest, char c, unsigned long len) +{ + unsigned long i; + + for (i = 0; i < len; i++) + dest[i] = c; +} + +/* arch-agnostic implementation */ + +static inline int lttng_ust_fls(unsigned int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xFFFF0000U)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xFF000000U)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xF0000000U)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xC0000000U)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000U)) { + /* No need to bit shift on last operation */ + r -= 1; + } + return r; +} + +static inline int get_count_order(unsigned int count) +{ + int order; + + order = lttng_ust_fls(count) - 1; + if (count & (count - 1)) + order++; + return order; +} + +#endif /* _LTTNG_RING_BUFFER_BACKEND_INTERNAL_H */ diff --git a/src/common/ringbuffer/backend_types.h b/src/common/ringbuffer/backend_types.h new file mode 100644 index 00000000..19d6513d --- /dev/null +++ b/src/common/ringbuffer/backend_types.h @@ -0,0 +1,98 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2008-2012 Mathieu Desnoyers + * + * Ring buffer backend (types). + */ + +#ifndef _LTTNG_RING_BUFFER_BACKEND_TYPES_H +#define _LTTNG_RING_BUFFER_BACKEND_TYPES_H + +#include +#include +#include "shm_internal.h" +#include "vatomic.h" + +#define RB_BACKEND_PAGES_PADDING 16 +struct lttng_ust_lib_ring_buffer_backend_pages { + unsigned long mmap_offset; /* offset of the subbuffer in mmap */ + union v_atomic records_commit; /* current records committed count */ + union v_atomic records_unread; /* records to read */ + unsigned long data_size; /* Amount of data to read from subbuf */ + DECLARE_SHMP(char, p); /* Backing memory map */ + char padding[RB_BACKEND_PAGES_PADDING]; +}; + +struct lttng_ust_lib_ring_buffer_backend_subbuffer { + /* Identifier for subbuf backend pages. Exchanged atomically. */ + unsigned long id; /* backend subbuffer identifier */ +}; + +struct lttng_ust_lib_ring_buffer_backend_counts { + /* + * Counter specific to the sub-buffer location within the ring buffer. + * The actual sequence number of the packet within the entire ring + * buffer can be derived from the formula nr_subbuffers * seq_cnt + + * subbuf_idx. + */ + uint64_t seq_cnt; /* packet sequence number */ +}; + +/* + * Forward declaration of frontend-specific channel and ring_buffer. + */ +struct lttng_ust_lib_ring_buffer_channel; +struct lttng_ust_lib_ring_buffer; + +struct lttng_ust_lib_ring_buffer_backend_pages_shmp { + DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_backend_pages, shmp); +}; + +#define RB_BACKEND_RING_BUFFER_PADDING 64 +struct lttng_ust_lib_ring_buffer_backend { + /* Array of ring_buffer_backend_subbuffer for writer */ + DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_backend_subbuffer, buf_wsb); + /* ring_buffer_backend_subbuffer for reader */ + struct lttng_ust_lib_ring_buffer_backend_subbuffer buf_rsb; + /* Array of lib_ring_buffer_backend_counts for the packet counter */ + DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_backend_counts, buf_cnt); + /* + * Pointer array of backend pages, for whole buffer. + * Indexed by ring_buffer_backend_subbuffer identifier (id) index. + */ + DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_backend_pages_shmp, array); + DECLARE_SHMP(char, memory_map); /* memory mapping */ + + DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_channel, chan); /* Associated channel */ + int cpu; /* This buffer's cpu. -1 if global. */ + union v_atomic records_read; /* Number of records read */ + unsigned int allocated:1; /* is buffer allocated ? */ + char padding[RB_BACKEND_RING_BUFFER_PADDING]; +}; + +struct lttng_ust_lib_ring_buffer_shmp { + DECLARE_SHMP(struct lttng_ust_lib_ring_buffer, shmp); /* Channel per-cpu buffers */ +}; + +#define RB_BACKEND_CHANNEL_PADDING 64 +struct channel_backend { + unsigned long buf_size; /* Size of the buffer */ + unsigned long subbuf_size; /* Sub-buffer size */ + unsigned int subbuf_size_order; /* Order of sub-buffer size */ + unsigned int num_subbuf_order; /* + * Order of number of sub-buffers/buffer + * for writer. + */ + unsigned int buf_size_order; /* Order of buffer size */ + unsigned int extra_reader_sb:1; /* has extra reader subbuffer ? */ + unsigned long num_subbuf; /* Number of sub-buffers for writer */ + uint64_t start_tsc; /* Channel creation TSC value */ + DECLARE_SHMP(void *, priv_data);/* Client-specific information */ + struct lttng_ust_lib_ring_buffer_config config; /* Ring buffer configuration */ + char name[NAME_MAX]; /* Channel name */ + char padding[RB_BACKEND_CHANNEL_PADDING]; + struct lttng_ust_lib_ring_buffer_shmp buf[]; +}; + +#endif /* _LTTNG_RING_BUFFER_BACKEND_TYPES_H */ diff --git a/src/common/ringbuffer/frontend.h b/src/common/ringbuffer/frontend.h new file mode 100644 index 00000000..12e21005 --- /dev/null +++ b/src/common/ringbuffer/frontend.h @@ -0,0 +1,297 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2005-2012 Mathieu Desnoyers + * + * Ring Buffer Library Synchronization Header (API). + * + * See ring_buffer_frontend.c for more information on wait-free algorithms. + */ + +#ifndef _LTTNG_RING_BUFFER_FRONTEND_H +#define _LTTNG_RING_BUFFER_FRONTEND_H + +#include +#include + +#include +#include + +#include "smp.h" + +/* Internal helpers */ +#include "frontend_internal.h" + +/* Buffer creation/removal and setup operations */ + +/* + * switch_timer_interval is the time interval (in us) to fill sub-buffers with + * padding to let readers get those sub-buffers. Used for live streaming. + * + * read_timer_interval is the time interval (in us) to wake up pending readers. + * + * buf_addr is a pointer the the beginning of the preallocated buffer contiguous + * address mapping. It is used only by RING_BUFFER_STATIC configuration. It can + * be set to NULL for other backends. + * + * private data is a memory area for configuration data. This memory is + * managed by lib ring buffer. priv_data_align is the alignment required + * for the private data area. + */ + +extern +struct lttng_ust_shm_handle *channel_create(const struct lttng_ust_lib_ring_buffer_config *config, + const char *name, + size_t priv_data_align, + size_t priv_data_size, + void *priv_data_init, + void *priv, + void *buf_addr, + size_t subbuf_size, size_t num_subbuf, + unsigned int switch_timer_interval, + unsigned int read_timer_interval, + const int *stream_fds, int nr_stream_fds, + int64_t blocking_timeout) + __attribute__((visibility("hidden"))); + +/* + * channel_destroy finalizes all channel's buffers, waits for readers to + * release all references, and destroys the channel. + */ +void channel_destroy(struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle, + int consumer) + __attribute__((visibility("hidden"))); + + +/* Buffer read operations */ + +/* + * Iteration on channel cpumask needs to issue a read barrier to match the write + * barrier in cpu hotplug. It orders the cpumask read before read of per-cpu + * buffer data. The per-cpu buffer is never removed by cpu hotplug; teardown is + * only performed at channel destruction. + */ +#define for_each_channel_cpu(cpu, chan) \ + for_each_possible_cpu(cpu) + +extern struct lttng_ust_lib_ring_buffer *channel_get_ring_buffer( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_channel *chan, int cpu, + struct lttng_ust_shm_handle *handle, + int *shm_fd, int *wait_fd, + int *wakeup_fd, + uint64_t *memory_map_size) + __attribute__((visibility("hidden"))); + +extern +int ring_buffer_channel_close_wait_fd(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +extern +int ring_buffer_channel_close_wakeup_fd(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +extern +int ring_buffer_stream_close_wait_fd(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle, + int cpu) + __attribute__((visibility("hidden"))); + +extern +int ring_buffer_stream_close_wakeup_fd(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle, + int cpu) + __attribute__((visibility("hidden"))); + +extern int lib_ring_buffer_open_read(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +extern void lib_ring_buffer_release_read(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +/* + * Initialize signals for ring buffer. Should be called early e.g. by + * main() in the program to affect all threads. + */ +void lib_ringbuffer_signal_init(void) + __attribute__((visibility("hidden"))); + +/* + * Read sequence: snapshot, many get_subbuf/put_subbuf, move_consumer. + */ +extern int lib_ring_buffer_snapshot(struct lttng_ust_lib_ring_buffer *buf, + unsigned long *consumed, + unsigned long *produced, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +extern int lib_ring_buffer_snapshot_sample_positions( + struct lttng_ust_lib_ring_buffer *buf, + unsigned long *consumed, + unsigned long *produced, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +extern void lib_ring_buffer_move_consumer(struct lttng_ust_lib_ring_buffer *buf, + unsigned long consumed_new, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +extern int lib_ring_buffer_get_subbuf(struct lttng_ust_lib_ring_buffer *buf, + unsigned long consumed, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +extern void lib_ring_buffer_put_subbuf(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +/* + * lib_ring_buffer_get_next_subbuf/lib_ring_buffer_put_next_subbuf are helpers + * to read sub-buffers sequentially. + */ +static inline int lib_ring_buffer_get_next_subbuf(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) +{ + int ret; + + ret = lib_ring_buffer_snapshot(buf, &buf->cons_snapshot, + &buf->prod_snapshot, handle); + if (ret) + return ret; + ret = lib_ring_buffer_get_subbuf(buf, buf->cons_snapshot, handle); + return ret; +} + +static inline +void lib_ring_buffer_put_next_subbuf(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_channel *chan; + + chan = shmp(handle, buf->backend.chan); + if (!chan) + return; + lib_ring_buffer_put_subbuf(buf, handle); + lib_ring_buffer_move_consumer(buf, subbuf_align(buf->cons_snapshot, chan), + handle); +} + +extern void channel_reset(struct lttng_ust_lib_ring_buffer_channel *chan) + __attribute__((visibility("hidden"))); + +extern void lib_ring_buffer_reset(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +static inline +unsigned long lib_ring_buffer_get_offset(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf) +{ + return v_read(config, &buf->offset); +} + +static inline +unsigned long lib_ring_buffer_get_consumed( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer *buf) +{ + return uatomic_read(&buf->consumed); +} + +/* + * Must call lib_ring_buffer_is_finalized before reading counters (memory + * ordering enforced with respect to trace teardown). + */ +static inline +int lib_ring_buffer_is_finalized( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer *buf) +{ + int finalized = CMM_ACCESS_ONCE(buf->finalized); + /* + * Read finalized before counters. + */ + cmm_smp_rmb(); + return finalized; +} + +static inline +int lib_ring_buffer_channel_is_finalized(const struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return chan->finalized; +} + +static inline +int lib_ring_buffer_channel_is_disabled(const struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return uatomic_read(&chan->record_disabled); +} + +static inline +unsigned long lib_ring_buffer_get_read_data_size( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) +{ + return subbuffer_get_read_data_size(config, &buf->backend, handle); +} + +static inline +unsigned long lib_ring_buffer_get_records_count( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf) +{ + return v_read(config, &buf->records_count); +} + +static inline +unsigned long lib_ring_buffer_get_records_overrun( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf) +{ + return v_read(config, &buf->records_overrun); +} + +static inline +unsigned long lib_ring_buffer_get_records_lost_full( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf) +{ + return v_read(config, &buf->records_lost_full); +} + +static inline +unsigned long lib_ring_buffer_get_records_lost_wrap( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf) +{ + return v_read(config, &buf->records_lost_wrap); +} + +static inline +unsigned long lib_ring_buffer_get_records_lost_big( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf) +{ + return v_read(config, &buf->records_lost_big); +} + +static inline +unsigned long lib_ring_buffer_get_records_read( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf) +{ + return v_read(config, &buf->backend.records_read); +} + +#endif /* _LTTNG_RING_BUFFER_FRONTEND_H */ diff --git a/src/common/ringbuffer/frontend_api.h b/src/common/ringbuffer/frontend_api.h new file mode 100644 index 00000000..ac5e31bf --- /dev/null +++ b/src/common/ringbuffer/frontend_api.h @@ -0,0 +1,370 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2005-2012 Mathieu Desnoyers + * + * See ring_buffer_frontend.c for more information on wait-free + * algorithms. + * See frontend.h for channel allocation and read-side API. + */ + +#ifndef _LTTNG_RING_BUFFER_FRONTEND_API_H +#define _LTTNG_RING_BUFFER_FRONTEND_API_H + +#include + +#include + +#include "frontend.h" + +/** + * lib_ring_buffer_nesting_inc - Ring buffer recursive use protection. + * + * The rint buffer buffer nesting count is a safety net to ensure tracer + * client code will never trigger an endless recursion. + * Returns a nesting level >= 0 on success, -EPERM on failure (nesting + * count too high). + * + * asm volatile and "memory" clobber prevent the compiler from moving + * instructions out of the ring buffer nesting count. This is required to ensure + * that probe side-effects which can cause recursion (e.g. unforeseen traps, + * divisions by 0, ...) are triggered within the incremented nesting count + * section. + */ +static inline +int lib_ring_buffer_nesting_inc( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused))) +{ + int nesting; + + nesting = ++URCU_TLS(lib_ring_buffer_nesting); + cmm_barrier(); + if (caa_unlikely(nesting >= LIB_RING_BUFFER_MAX_NESTING)) { + WARN_ON_ONCE(1); + URCU_TLS(lib_ring_buffer_nesting)--; + return -EPERM; + } + return nesting - 1; +} + +static inline +int lib_ring_buffer_nesting_count( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused))) +{ + return URCU_TLS(lib_ring_buffer_nesting); +} + +static inline +void lib_ring_buffer_nesting_dec( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused))) +{ + cmm_barrier(); + URCU_TLS(lib_ring_buffer_nesting)--; /* TLS */ +} + +/* + * lib_ring_buffer_try_reserve is called by lib_ring_buffer_reserve(). It is not + * part of the API per se. + * + * returns 0 if reserve ok, or 1 if the slow path must be taken. + */ +static inline +int lib_ring_buffer_try_reserve(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_ctx *ctx, + void *client_ctx, + unsigned long *o_begin, unsigned long *o_end, + unsigned long *o_old, size_t *before_hdr_pad) +{ + struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; + struct lttng_ust_lib_ring_buffer_channel *chan = ctx_private->chan; + struct lttng_ust_lib_ring_buffer *buf = ctx_private->buf; + *o_begin = v_read(config, &buf->offset); + *o_old = *o_begin; + + ctx_private->tsc = lib_ring_buffer_clock_read(chan); + if ((int64_t) ctx_private->tsc == -EIO) + return 1; + + /* + * Prefetch cacheline for read because we have to read the previous + * commit counter to increment it and commit seq value to compare it to + * the commit counter. + */ + //prefetch(&buf->commit_hot[subbuf_index(*o_begin, chan)]); + + if (last_tsc_overflow(config, buf, ctx_private->tsc)) + ctx_private->rflags |= RING_BUFFER_RFLAG_FULL_TSC; + + if (caa_unlikely(subbuf_offset(*o_begin, chan) == 0)) + return 1; + + ctx_private->slot_size = record_header_size(config, chan, *o_begin, + before_hdr_pad, ctx, client_ctx); + ctx_private->slot_size += + lttng_ust_lib_ring_buffer_align(*o_begin + ctx_private->slot_size, + ctx->largest_align) + ctx->data_size; + if (caa_unlikely((subbuf_offset(*o_begin, chan) + ctx_private->slot_size) + > chan->backend.subbuf_size)) + return 1; + + /* + * Record fits in the current buffer and we are not on a switch + * boundary. It's safe to write. + */ + *o_end = *o_begin + ctx_private->slot_size; + + if (caa_unlikely((subbuf_offset(*o_end, chan)) == 0)) + /* + * The offset_end will fall at the very beginning of the next + * subbuffer. + */ + return 1; + + return 0; +} + +/** + * lib_ring_buffer_reserve - Reserve space in a ring buffer. + * @config: ring buffer instance configuration. + * @ctx: ring buffer context. (input and output) Must be already initialized. + * + * Atomic wait-free slot reservation. The reserved space starts at the context + * "pre_offset". Its length is "slot_size". The associated time-stamp is "tsc". + * + * Return : + * 0 on success. + * -EAGAIN if channel is disabled. + * -ENOSPC if event size is too large for packet. + * -ENOBUFS if there is currently not enough space in buffer for the event. + * -EIO if data cannot be written into the buffer for any other reason. + */ + +static inline +int lib_ring_buffer_reserve(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_ctx *ctx, + void *client_ctx) +{ + struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; + struct lttng_ust_lib_ring_buffer_channel *chan = ctx_private->chan; + struct lttng_ust_shm_handle *handle = chan->handle; + struct lttng_ust_lib_ring_buffer *buf; + unsigned long o_begin, o_end, o_old; + size_t before_hdr_pad = 0; + + if (caa_unlikely(uatomic_read(&chan->record_disabled))) + return -EAGAIN; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { + ctx_private->reserve_cpu = lttng_ust_get_cpu(); + buf = shmp(handle, chan->backend.buf[ctx_private->reserve_cpu].shmp); + } else { + buf = shmp(handle, chan->backend.buf[0].shmp); + } + if (caa_unlikely(!buf)) + return -EIO; + if (caa_unlikely(uatomic_read(&buf->record_disabled))) + return -EAGAIN; + ctx_private->buf = buf; + + /* + * Perform retryable operations. + */ + if (caa_unlikely(lib_ring_buffer_try_reserve(config, ctx, client_ctx, &o_begin, + &o_end, &o_old, &before_hdr_pad))) + goto slow_path; + + if (caa_unlikely(v_cmpxchg(config, &buf->offset, o_old, o_end) + != o_old)) + goto slow_path; + + /* + * Atomically update last_tsc. This update races against concurrent + * atomic updates, but the race will always cause supplementary full TSC + * record headers, never the opposite (missing a full TSC record header + * when it would be needed). + */ + save_last_tsc(config, buf, ctx_private->tsc); + + /* + * Push the reader if necessary + */ + lib_ring_buffer_reserve_push_reader(buf, chan, o_end - 1); + + /* + * Clear noref flag for this subbuffer. + */ + lib_ring_buffer_clear_noref(config, &buf->backend, + subbuf_index(o_end - 1, chan), handle); + + ctx_private->pre_offset = o_begin; + ctx_private->buf_offset = o_begin + before_hdr_pad; + return 0; +slow_path: + return lib_ring_buffer_reserve_slow(ctx, client_ctx); +} + +/** + * lib_ring_buffer_switch - Perform a sub-buffer switch for a per-cpu buffer. + * @config: ring buffer instance configuration. + * @buf: buffer + * @mode: buffer switch mode (SWITCH_ACTIVE or SWITCH_FLUSH) + * + * This operation is completely reentrant : can be called while tracing is + * active with absolutely no lock held. + * + * Note, however, that as a v_cmpxchg is used for some atomic operations and + * requires to be executed locally for per-CPU buffers, this function must be + * called from the CPU which owns the buffer for a ACTIVE flush, with preemption + * disabled, for RING_BUFFER_SYNC_PER_CPU configuration. + */ +static inline +void lib_ring_buffer_switch( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer *buf, enum switch_mode mode, + struct lttng_ust_shm_handle *handle) +{ + lib_ring_buffer_switch_slow(buf, mode, handle); +} + +/* See ring_buffer_frontend_api.h for lib_ring_buffer_reserve(). */ + +/** + * lib_ring_buffer_commit - Commit an record. + * @config: ring buffer instance configuration. + * @ctx: ring buffer context. (input arguments only) + * + * Atomic unordered slot commit. Increments the commit count in the + * specified sub-buffer, and delivers it if necessary. + */ +static inline +void lib_ring_buffer_commit(const struct lttng_ust_lib_ring_buffer_config *config, + const struct lttng_ust_lib_ring_buffer_ctx *ctx) +{ + struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; + struct lttng_ust_lib_ring_buffer_channel *chan = ctx_private->chan; + struct lttng_ust_shm_handle *handle = chan->handle; + struct lttng_ust_lib_ring_buffer *buf = ctx_private->buf; + unsigned long offset_end = ctx_private->buf_offset; + unsigned long endidx = subbuf_index(offset_end - 1, chan); + unsigned long commit_count; + struct commit_counters_hot *cc_hot = shmp_index(handle, + buf->commit_hot, endidx); + + if (caa_unlikely(!cc_hot)) + return; + + /* + * Must count record before incrementing the commit count. + */ + subbuffer_count_record(config, ctx, &buf->backend, endidx, handle); + + /* + * Order all writes to buffer before the commit count update that will + * determine that the subbuffer is full. + */ + cmm_smp_wmb(); + + v_add(config, ctx_private->slot_size, &cc_hot->cc); + + /* + * commit count read can race with concurrent OOO commit count updates. + * This is only needed for lib_ring_buffer_check_deliver (for + * non-polling delivery only) and for + * lib_ring_buffer_write_commit_counter. The race can only cause the + * counter to be read with the same value more than once, which could + * cause : + * - Multiple delivery for the same sub-buffer (which is handled + * gracefully by the reader code) if the value is for a full + * sub-buffer. It's important that we can never miss a sub-buffer + * delivery. Re-reading the value after the v_add ensures this. + * - Reading a commit_count with a higher value that what was actually + * added to it for the lib_ring_buffer_write_commit_counter call + * (again caused by a concurrent committer). It does not matter, + * because this function is interested in the fact that the commit + * count reaches back the reserve offset for a specific sub-buffer, + * which is completely independent of the order. + */ + commit_count = v_read(config, &cc_hot->cc); + + lib_ring_buffer_check_deliver(config, buf, chan, offset_end - 1, + commit_count, endidx, handle, ctx_private->tsc); + /* + * Update used size at each commit. It's needed only for extracting + * ring_buffer buffers from vmcore, after crash. + */ + lib_ring_buffer_write_commit_counter(config, buf, chan, + offset_end, commit_count, handle, cc_hot); +} + +/** + * lib_ring_buffer_try_discard_reserve - Try discarding a record. + * @config: ring buffer instance configuration. + * @ctx: ring buffer context. (input arguments only) + * + * Only succeeds if no other record has been written after the record to + * discard. If discard fails, the record must be committed to the buffer. + * + * Returns 0 upon success, -EPERM if the record cannot be discarded. + */ +static inline +int lib_ring_buffer_try_discard_reserve(const struct lttng_ust_lib_ring_buffer_config *config, + const struct lttng_ust_lib_ring_buffer_ctx *ctx) +{ + struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; + struct lttng_ust_lib_ring_buffer *buf = ctx_private->buf; + unsigned long end_offset = ctx_private->pre_offset + ctx_private->slot_size; + + /* + * We need to ensure that if the cmpxchg succeeds and discards the + * record, the next record will record a full TSC, because it cannot + * rely on the last_tsc associated with the discarded record to detect + * overflows. The only way to ensure this is to set the last_tsc to 0 + * (assuming no 64-bit TSC overflow), which forces to write a 64-bit + * timestamp in the next record. + * + * Note: if discard fails, we must leave the TSC in the record header. + * It is needed to keep track of TSC overflows for the following + * records. + */ + save_last_tsc(config, buf, 0ULL); + + if (caa_likely(v_cmpxchg(config, &buf->offset, end_offset, ctx_private->pre_offset) + != end_offset)) + return -EPERM; + else + return 0; +} + +static inline +void channel_record_disable( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer_channel *chan) +{ + uatomic_inc(&chan->record_disabled); +} + +static inline +void channel_record_enable( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer_channel *chan) +{ + uatomic_dec(&chan->record_disabled); +} + +static inline +void lib_ring_buffer_record_disable( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer *buf) +{ + uatomic_inc(&buf->record_disabled); +} + +static inline +void lib_ring_buffer_record_enable( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer *buf) +{ + uatomic_dec(&buf->record_disabled); +} + +#endif /* _LTTNG_RING_BUFFER_FRONTEND_API_H */ diff --git a/src/common/ringbuffer/frontend_internal.h b/src/common/ringbuffer/frontend_internal.h new file mode 100644 index 00000000..7d905d4b --- /dev/null +++ b/src/common/ringbuffer/frontend_internal.h @@ -0,0 +1,367 @@ +/* + * SPDX-License-Identifier: (LGPL-2.1-only or GPL-2.0-only) + * + * Copyright (C) 2005-2012 Mathieu Desnoyers + * + * Ring Buffer Library Synchronization Header (internal helpers). + * + * See ring_buffer_frontend.c for more information on wait-free algorithms. + */ + +#ifndef _LTTNG_RING_BUFFER_FRONTEND_INTERNAL_H +#define _LTTNG_RING_BUFFER_FRONTEND_INTERNAL_H + +#include +#include +#include +#include +#include + +#include +#include "ringbuffer-config.h" +#include "backend_types.h" +#include "backend_internal.h" +#include "frontend_types.h" +#include "shm.h" + +/* Buffer offset macros */ + +/* buf_trunc mask selects only the buffer number. */ +static inline +unsigned long buf_trunc(unsigned long offset, + struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return offset & ~(chan->backend.buf_size - 1); + +} + +/* Select the buffer number value (counter). */ +static inline +unsigned long buf_trunc_val(unsigned long offset, + struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return buf_trunc(offset, chan) >> chan->backend.buf_size_order; +} + +/* buf_offset mask selects only the offset within the current buffer. */ +static inline +unsigned long buf_offset(unsigned long offset, + struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return offset & (chan->backend.buf_size - 1); +} + +/* subbuf_offset mask selects the offset within the current subbuffer. */ +static inline +unsigned long subbuf_offset(unsigned long offset, + struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return offset & (chan->backend.subbuf_size - 1); +} + +/* subbuf_trunc mask selects the subbuffer number. */ +static inline +unsigned long subbuf_trunc(unsigned long offset, + struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return offset & ~(chan->backend.subbuf_size - 1); +} + +/* subbuf_align aligns the offset to the next subbuffer. */ +static inline +unsigned long subbuf_align(unsigned long offset, + struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return (offset + chan->backend.subbuf_size) + & ~(chan->backend.subbuf_size - 1); +} + +/* subbuf_index returns the index of the current subbuffer within the buffer. */ +static inline +unsigned long subbuf_index(unsigned long offset, + struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return buf_offset(offset, chan) >> chan->backend.subbuf_size_order; +} + +/* + * Last TSC comparison functions. Check if the current TSC overflows tsc_bits + * bits from the last TSC read. When overflows are detected, the full 64-bit + * timestamp counter should be written in the record header. Reads and writes + * last_tsc atomically. + */ + +#if (CAA_BITS_PER_LONG == 32) +static inline +void save_last_tsc(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc) +{ + if (config->tsc_bits == 0 || config->tsc_bits == 64) + return; + + /* + * Ensure the compiler performs this update in a single instruction. + */ + v_set(config, &buf->last_tsc, (unsigned long)(tsc >> config->tsc_bits)); +} + +static inline +int last_tsc_overflow(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc) +{ + unsigned long tsc_shifted; + + if (config->tsc_bits == 0 || config->tsc_bits == 64) + return 0; + + tsc_shifted = (unsigned long)(tsc >> config->tsc_bits); + if (caa_unlikely(tsc_shifted + - (unsigned long)v_read(config, &buf->last_tsc))) + return 1; + else + return 0; +} +#else +static inline +void save_last_tsc(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc) +{ + if (config->tsc_bits == 0 || config->tsc_bits == 64) + return; + + v_set(config, &buf->last_tsc, (unsigned long)tsc); +} + +static inline +int last_tsc_overflow(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc) +{ + if (config->tsc_bits == 0 || config->tsc_bits == 64) + return 0; + + if (caa_unlikely((tsc - v_read(config, &buf->last_tsc)) + >> config->tsc_bits)) + return 1; + else + return 0; +} +#endif + +extern +int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx, + void *client_ctx) + __attribute__((visibility("hidden"))); + +extern +void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, + enum switch_mode mode, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +void lib_ring_buffer_check_deliver_slow(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + unsigned long offset, + unsigned long commit_count, + unsigned long idx, + struct lttng_ust_shm_handle *handle, + uint64_t tsc) + __attribute__((visibility("hidden"))); + +/* Buffer write helpers */ + +static inline +void lib_ring_buffer_reserve_push_reader(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + unsigned long offset) +{ + unsigned long consumed_old, consumed_new; + + do { + consumed_old = uatomic_read(&buf->consumed); + /* + * If buffer is in overwrite mode, push the reader consumed + * count if the write position has reached it and we are not + * at the first iteration (don't push the reader farther than + * the writer). This operation can be done concurrently by many + * writers in the same buffer, the writer being at the farthest + * write position sub-buffer index in the buffer being the one + * which will win this loop. + */ + if (caa_unlikely(subbuf_trunc(offset, chan) + - subbuf_trunc(consumed_old, chan) + >= chan->backend.buf_size)) + consumed_new = subbuf_align(consumed_old, chan); + else + return; + } while (caa_unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old, + consumed_new) != consumed_old)); +} + +/* + * Move consumed position to the beginning of subbuffer in which the + * write offset is. Should only be used on ring buffers that are not + * actively being written into, because clear_reader does not take into + * account the commit counters when moving the consumed position, which + * can make concurrent trace producers or consumers observe consumed + * position further than the write offset, which breaks ring buffer + * algorithm guarantees. + */ +static inline +void lib_ring_buffer_clear_reader(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; + unsigned long offset, consumed_old, consumed_new; + + chan = shmp(handle, buf->backend.chan); + if (!chan) + return; + config = &chan->backend.config; + + do { + offset = v_read(config, &buf->offset); + consumed_old = uatomic_read(&buf->consumed); + CHAN_WARN_ON(chan, (long) (subbuf_trunc(offset, chan) + - subbuf_trunc(consumed_old, chan)) + < 0); + consumed_new = subbuf_trunc(offset, chan); + } while (caa_unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old, + consumed_new) != consumed_old)); +} + +static inline +int lib_ring_buffer_pending_data(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return !!subbuf_offset(v_read(config, &buf->offset), chan); +} + +static inline +unsigned long lib_ring_buffer_get_data_size(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + unsigned long idx, + struct lttng_ust_shm_handle *handle) +{ + return subbuffer_get_data_size(config, &buf->backend, idx, handle); +} + +/* + * Check if all space reservation in a buffer have been committed. This helps + * knowing if an execution context is nested (for per-cpu buffers only). + * This is a very specific ftrace use-case, so we keep this as "internal" API. + */ +static inline +int lib_ring_buffer_reserve_committed(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle) +{ + unsigned long offset, idx, commit_count; + struct commit_counters_hot *cc_hot; + + CHAN_WARN_ON(chan, config->alloc != RING_BUFFER_ALLOC_PER_CPU); + CHAN_WARN_ON(chan, config->sync != RING_BUFFER_SYNC_PER_CPU); + + /* + * Read offset and commit count in a loop so they are both read + * atomically wrt interrupts. By deal with interrupt concurrency by + * restarting both reads if the offset has been pushed. Note that given + * we only have to deal with interrupt concurrency here, an interrupt + * modifying the commit count will also modify "offset", so it is safe + * to only check for offset modifications. + */ + do { + offset = v_read(config, &buf->offset); + idx = subbuf_index(offset, chan); + cc_hot = shmp_index(handle, buf->commit_hot, idx); + if (caa_unlikely(!cc_hot)) + return 0; + commit_count = v_read(config, &cc_hot->cc); + } while (offset != v_read(config, &buf->offset)); + + return ((buf_trunc(offset, chan) >> chan->backend.num_subbuf_order) + - (commit_count & chan->commit_count_mask) == 0); +} + +/* + * Receive end of subbuffer TSC as parameter. It has been read in the + * space reservation loop of either reserve or switch, which ensures it + * progresses monotonically with event records in the buffer. Therefore, + * it ensures that the end timestamp of a subbuffer is <= begin + * timestamp of the following subbuffers. + */ +static inline +void lib_ring_buffer_check_deliver(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + unsigned long offset, + unsigned long commit_count, + unsigned long idx, + struct lttng_ust_shm_handle *handle, + uint64_t tsc) +{ + unsigned long old_commit_count = commit_count + - chan->backend.subbuf_size; + + /* Check if all commits have been done */ + if (caa_unlikely((buf_trunc(offset, chan) >> chan->backend.num_subbuf_order) + - (old_commit_count & chan->commit_count_mask) == 0)) + lib_ring_buffer_check_deliver_slow(config, buf, chan, offset, + commit_count, idx, handle, tsc); +} + +/* + * lib_ring_buffer_write_commit_counter + * + * For flight recording. must be called after commit. + * This function increments the subbuffer's commit_seq counter each time the + * commit count reaches back the reserve offset (modulo subbuffer size). It is + * useful for crash dump. + */ +static inline +void lib_ring_buffer_write_commit_counter( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf __attribute__((unused)), + struct lttng_ust_lib_ring_buffer_channel *chan, + unsigned long buf_offset, + unsigned long commit_count, + struct lttng_ust_shm_handle *handle __attribute__((unused)), + struct commit_counters_hot *cc_hot) +{ + unsigned long commit_seq_old; + + if (config->oops != RING_BUFFER_OOPS_CONSISTENCY) + return; + + /* + * subbuf_offset includes commit_count_mask. We can simply + * compare the offsets within the subbuffer without caring about + * buffer full/empty mismatch because offset is never zero here + * (subbuffer header and record headers have non-zero length). + */ + if (caa_unlikely(subbuf_offset(buf_offset - commit_count, chan))) + return; + + commit_seq_old = v_read(config, &cc_hot->seq); + if (caa_likely((long) (commit_seq_old - commit_count) < 0)) + v_set(config, &cc_hot->seq, commit_count); +} + +extern int lib_ring_buffer_create(struct lttng_ust_lib_ring_buffer *buf, + struct channel_backend *chanb, int cpu, + struct lttng_ust_shm_handle *handle, + struct shm_object *shmobj) + __attribute__((visibility("hidden"))); + +extern void lib_ring_buffer_free(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +/* Keep track of trap nesting inside ring buffer code */ +extern DECLARE_URCU_TLS(unsigned int, lib_ring_buffer_nesting) + __attribute__((visibility("hidden"))); + +#endif /* _LTTNG_RING_BUFFER_FRONTEND_INTERNAL_H */ diff --git a/src/common/ringbuffer/frontend_types.h b/src/common/ringbuffer/frontend_types.h new file mode 100644 index 00000000..467ece73 --- /dev/null +++ b/src/common/ringbuffer/frontend_types.h @@ -0,0 +1,330 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2005-2012 Mathieu Desnoyers + * + * Ring Buffer Library Synchronization Header (types). + * + * See ring_buffer_frontend.c for more information on wait-free algorithms. + */ + +#ifndef _LTTNG_RING_BUFFER_FRONTEND_TYPES_H +#define _LTTNG_RING_BUFFER_FRONTEND_TYPES_H + +#include +#include +#include /* for timer_t */ + +#include +#include + +#include +#include "ringbuffer-config.h" +#include "common/logging.h" +#include "backend_types.h" +#include "shm_internal.h" +#include "shm_types.h" +#include "vatomic.h" + +#define LIB_RING_BUFFER_MAX_NESTING 5 + +/* + * A switch is done during tracing or as a final flush after tracing (so it + * won't write in the new sub-buffer). + */ +enum switch_mode { SWITCH_ACTIVE, SWITCH_FLUSH }; + +/* channel: collection of per-cpu ring buffers. */ +#define RB_CHANNEL_PADDING 32 +struct lttng_ust_lib_ring_buffer_channel { + int record_disabled; + unsigned long commit_count_mask; /* + * Commit count mask, removing + * the MSBs corresponding to + * bits used to represent the + * subbuffer index. + */ + + unsigned long switch_timer_interval; /* Buffer flush (us) */ + timer_t switch_timer; + int switch_timer_enabled; + + unsigned long read_timer_interval; /* Reader wakeup (us) */ + timer_t read_timer; + int read_timer_enabled; + + int finalized; /* Has channel been finalized */ + size_t priv_data_offset; /* Offset of private data channel config */ + unsigned int nr_streams; /* Number of streams */ + struct lttng_ust_shm_handle *handle; + /* Extended options. */ + union { + struct { + int32_t blocking_timeout_ms; + void *priv; /* Private data pointer. */ + } s; + char padding[RB_CHANNEL_PADDING]; + } u; + /* + * Associated backend contains a variable-length array. Needs to + * be last member. + */ + struct channel_backend backend; /* Associated backend */ +} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + +/* Per-subbuffer commit counters used on the hot path */ +#define RB_COMMIT_COUNT_HOT_PADDING 16 +struct commit_counters_hot { + union v_atomic cc; /* Commit counter */ + union v_atomic seq; /* Consecutive commits */ + char padding[RB_COMMIT_COUNT_HOT_PADDING]; +} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + +/* Per-subbuffer commit counters used only on cold paths */ +#define RB_COMMIT_COUNT_COLD_PADDING 24 +struct commit_counters_cold { + union v_atomic cc_sb; /* Incremented _once_ at sb switch */ + char padding[RB_COMMIT_COUNT_COLD_PADDING]; +} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + +/* ring buffer state */ +#define RB_CRASH_DUMP_ABI_LEN 256 +#define RB_RING_BUFFER_PADDING 60 + +#define RB_CRASH_DUMP_ABI_MAGIC_LEN 16 + +/* + * The 128-bit magic number is xor'd in the process data so it does not + * cause a false positive when searching for buffers by scanning memory. + * The actual magic number is: + * 0x17, 0x7B, 0xF1, 0x77, 0xBF, 0x17, 0x7B, 0xF1, + * 0x77, 0xBF, 0x17, 0x7B, 0xF1, 0x77, 0xBF, 0x17, + */ +#define RB_CRASH_DUMP_ABI_MAGIC_XOR \ + { \ + 0x17 ^ 0xFF, 0x7B ^ 0xFF, 0xF1 ^ 0xFF, 0x77 ^ 0xFF, \ + 0xBF ^ 0xFF, 0x17 ^ 0xFF, 0x7B ^ 0xFF, 0xF1 ^ 0xFF, \ + 0x77 ^ 0xFF, 0xBF ^ 0xFF, 0x17 ^ 0xFF, 0x7B ^ 0xFF, \ + 0xF1 ^ 0xFF, 0x77 ^ 0xFF, 0xBF ^ 0xFF, 0x17 ^ 0xFF, \ + } + +#define RB_CRASH_ENDIAN 0x1234 + +#define RB_CRASH_DUMP_ABI_MAJOR 0 +#define RB_CRASH_DUMP_ABI_MINOR 0 + +enum lttng_crash_type { + LTTNG_CRASH_TYPE_UST = 0, + LTTNG_CRASH_TYPE_KERNEL = 1, +}; + +struct lttng_crash_abi { + uint8_t magic[RB_CRASH_DUMP_ABI_MAGIC_LEN]; + uint64_t mmap_length; /* Overall lenght of crash record */ + uint16_t endian; /* + * { 0x12, 0x34 }: big endian + * { 0x34, 0x12 }: little endian + */ + uint16_t major; /* Major number. */ + uint16_t minor; /* Minor number. */ + uint8_t word_size; /* Word size (bytes). */ + uint8_t layout_type; /* enum lttng_crash_type */ + + struct { + uint32_t prod_offset; + uint32_t consumed_offset; + uint32_t commit_hot_array; + uint32_t commit_hot_seq; + uint32_t buf_wsb_array; + uint32_t buf_wsb_id; + uint32_t sb_array; + uint32_t sb_array_shmp_offset; + uint32_t sb_backend_p_offset; + uint32_t content_size; + uint32_t packet_size; + } __attribute__((packed)) offset; + struct { + uint8_t prod_offset; + uint8_t consumed_offset; + uint8_t commit_hot_seq; + uint8_t buf_wsb_id; + uint8_t sb_array_shmp_offset; + uint8_t sb_backend_p_offset; + uint8_t content_size; + uint8_t packet_size; + } __attribute__((packed)) length; + struct { + uint32_t commit_hot_array; + uint32_t buf_wsb_array; + uint32_t sb_array; + } __attribute__((packed)) stride; + + uint64_t buf_size; /* Size of the buffer */ + uint64_t subbuf_size; /* Sub-buffer size */ + uint64_t num_subbuf; /* Number of sub-buffers for writer */ + uint32_t mode; /* Buffer mode: 0: overwrite, 1: discard */ +} __attribute__((packed)); + +struct lttng_ust_lib_ring_buffer { + /* First 32 bytes are for the buffer crash dump ABI */ + struct lttng_crash_abi crash_abi; + + /* 32 bytes cache-hot cacheline */ + union v_atomic __attribute__((aligned(32))) offset; + /* Current offset in the buffer */ + DECLARE_SHMP(struct commit_counters_hot, commit_hot); + /* Commit count per sub-buffer */ + long consumed; /* + * Current offset in the buffer + * standard atomic access (shared) + */ + int record_disabled; + /* End of cache-hot 32 bytes cacheline */ + + union v_atomic last_tsc; /* + * Last timestamp written in the buffer. + */ + + struct lttng_ust_lib_ring_buffer_backend backend; + /* Associated backend */ + + DECLARE_SHMP(struct commit_counters_cold, commit_cold); + /* Commit count per sub-buffer */ + DECLARE_SHMP(uint64_t, ts_end); /* + * timestamp_end per sub-buffer. + * Time is sampled by the + * switch_*_end() callbacks + * which are the last space + * reservation performed in the + * sub-buffer before it can be + * fully committed and + * delivered. This time value is + * then read by the deliver + * callback, performed by the + * last commit before the buffer + * becomes readable. + */ + long active_readers; /* + * Active readers count + * standard atomic access (shared) + */ + /* Dropped records */ + union v_atomic records_lost_full; /* Buffer full */ + union v_atomic records_lost_wrap; /* Nested wrap-around */ + union v_atomic records_lost_big; /* Events too big */ + union v_atomic records_count; /* Number of records written */ + union v_atomic records_overrun; /* Number of overwritten records */ + //wait_queue_head_t read_wait; /* reader buffer-level wait queue */ + int finalized; /* buffer has been finalized */ + unsigned long get_subbuf_consumed; /* Read-side consumed */ + unsigned long prod_snapshot; /* Producer count snapshot */ + unsigned long cons_snapshot; /* Consumer count snapshot */ + unsigned int get_subbuf:1; /* Sub-buffer being held by reader */ + /* shmp pointer to self */ + DECLARE_SHMP(struct lttng_ust_lib_ring_buffer, self); + char padding[RB_RING_BUFFER_PADDING]; +} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + +/* + * ring buffer private context + * + * Private context passed to lib_ring_buffer_reserve(), lib_ring_buffer_commit(), + * lib_ring_buffer_try_discard_reserve(), lttng_ust_lib_ring_buffer_align_ctx() and + * lib_ring_buffer_write(). + * + * This context is allocated on an internal shadow-stack by a successful reserve + * operation, used by align/write, and freed by commit. + */ + +struct lttng_ust_lib_ring_buffer_ctx_private { + /* input received by lib_ring_buffer_reserve(). */ + struct lttng_ust_lib_ring_buffer_ctx *pub; + struct lttng_ust_lib_ring_buffer_channel *chan; /* channel */ + + /* output from lib_ring_buffer_reserve() */ + int reserve_cpu; /* processor id updated by the reserve */ + size_t slot_size; /* size of the reserved slot */ + unsigned long buf_offset; /* offset following the record header */ + unsigned long pre_offset; /* + * Initial offset position _before_ + * the record is written. Positioned + * prior to record header alignment + * padding. + */ + uint64_t tsc; /* time-stamp counter value */ + unsigned int rflags; /* reservation flags */ + void *ip; /* caller ip address */ + + struct lttng_ust_lib_ring_buffer *buf; /* + * buffer corresponding to processor id + * for this channel + */ + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; +}; + +static inline +void *channel_get_private_config(struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return ((char *) chan) + chan->priv_data_offset; +} + +static inline +void *channel_get_private(struct lttng_ust_lib_ring_buffer_channel *chan) +{ + return chan->u.s.priv; +} + +static inline +void channel_set_private(struct lttng_ust_lib_ring_buffer_channel *chan, void *priv) +{ + chan->u.s.priv = priv; +} + +#ifndef __rb_same_type +#define __rb_same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) +#endif + +/* + * Issue warnings and disable channels upon internal error. + * Can receive struct lttng_ust_lib_ring_buffer or struct lttng_ust_lib_ring_buffer_backend + * parameters. + */ +#define CHAN_WARN_ON(c, cond) \ + ({ \ + struct lttng_ust_lib_ring_buffer_channel *__chan; \ + int _____ret = caa_unlikely(cond); \ + if (_____ret) { \ + if (__rb_same_type(*(c), struct channel_backend)) \ + __chan = caa_container_of((void *) (c), \ + struct lttng_ust_lib_ring_buffer_channel, \ + backend); \ + else if (__rb_same_type(*(c), \ + struct lttng_ust_lib_ring_buffer_channel)) \ + __chan = (void *) (c); \ + else \ + BUG_ON(1); \ + uatomic_inc(&__chan->record_disabled); \ + WARN_ON(1); \ + } \ + _____ret = _____ret; /* For clang "unused result". */ \ + }) + +/** + * lttng_ust_lib_ring_buffer_align_ctx - Align context offset on "alignment" + * @ctx: ring buffer context. + */ +static inline +void lttng_ust_lib_ring_buffer_align_ctx(struct lttng_ust_lib_ring_buffer_ctx *ctx, + size_t alignment) + lttng_ust_notrace; +static inline +void lttng_ust_lib_ring_buffer_align_ctx(struct lttng_ust_lib_ring_buffer_ctx *ctx, + size_t alignment) +{ + struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; + + ctx_private->buf_offset += lttng_ust_lib_ring_buffer_align(ctx_private->buf_offset, + alignment); +} + +#endif /* _LTTNG_RING_BUFFER_FRONTEND_TYPES_H */ diff --git a/src/common/ringbuffer/getcpu.h b/src/common/ringbuffer/getcpu.h new file mode 100644 index 00000000..52c74413 --- /dev/null +++ b/src/common/ringbuffer/getcpu.h @@ -0,0 +1,104 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2011 Mathieu Desnoyers + */ + +#ifndef _LTTNG_GETCPU_H +#define _LTTNG_GETCPU_H + +#include +#include +#include + +void lttng_ust_getcpu_init(void) + __attribute__((visibility("hidden"))); + +extern int (*lttng_get_cpu)(void) + __attribute__((visibility("hidden"))); + +#ifdef LTTNG_UST_DEBUG_VALGRIND + +/* + * Fallback on cpu 0 if liblttng-ust is build with Valgrind support. + * get_cpu() returns the current CPU number. It may change due to + * migration, so it is only statistically accurate. + */ +static inline +int lttng_ust_get_cpu_internal(void) +{ + return 0; +} + +#else + +/* + * sched_getcpu. + */ +#ifdef __linux__ + +#if !HAVE_SCHED_GETCPU +#include +#define __getcpu(cpu, node, cache) syscall(__NR_getcpu, cpu, node, cache) +/* + * If getcpu is not implemented in the kernel, use cpu 0 as fallback. + */ +static inline +int lttng_ust_get_cpu_internal(void) +{ + int cpu, ret; + + ret = __getcpu(&cpu, NULL, NULL); + if (caa_unlikely(ret < 0)) + return 0; + return cpu; +} +#else /* HAVE_SCHED_GETCPU */ +#include + +/* + * If getcpu is not implemented in the kernel, use cpu 0 as fallback. + */ +static inline +int lttng_ust_get_cpu_internal(void) +{ + int cpu; + + cpu = sched_getcpu(); + if (caa_unlikely(cpu < 0)) + return 0; + return cpu; +} +#endif /* HAVE_SCHED_GETCPU */ + +#elif (defined(__FreeBSD__) || defined(__CYGWIN__)) + +/* + * FreeBSD and Cygwin do not allow query of CPU ID. Always use CPU + * number 0, with the assocated performance degradation on SMP. + */ +static inline +int lttng_ust_get_cpu_internal(void) +{ + return 0; +} + +#else +#error "Please add support for your OS into liblttng-ust/compat.h." +#endif + +#endif + +static inline +int lttng_ust_get_cpu(void) +{ + int (*getcpu)(void) = CMM_LOAD_SHARED(lttng_get_cpu); + + if (caa_likely(!getcpu)) { + return lttng_ust_get_cpu_internal(); + } else { + return getcpu(); + } +} + +#endif /* _LTTNG_GETCPU_H */ diff --git a/src/common/ringbuffer/mmap.h b/src/common/ringbuffer/mmap.h new file mode 100644 index 00000000..39c06246 --- /dev/null +++ b/src/common/ringbuffer/mmap.h @@ -0,0 +1,18 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2019 Jonathan Rajotte + */ + +#ifndef _LTTNG_MMAP_H +#define _LTTNG_MMAP_H + +#include + +#if defined(__linux__) && defined(MAP_POPULATE) +# define LTTNG_MAP_POPULATE MAP_POPULATE +#else +# define LTTNG_MAP_POPULATE 0 +#endif /* __linux__ && MAP_POPULATE */ + +#endif /* _LTTNG_MMAP_H */ diff --git a/src/common/ringbuffer/nohz.h b/src/common/ringbuffer/nohz.h new file mode 100644 index 00000000..5d416780 --- /dev/null +++ b/src/common/ringbuffer/nohz.h @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + */ + +#ifndef _LTTNG_RING_BUFFER_NOHZ_H +#define _LTTNG_RING_BUFFER_NOHZ_H + +#ifdef CONFIG_LIB_RING_BUFFER +void lib_ring_buffer_tick_nohz_flush(void) + __attribute__((visibility("hidden"))); + +void lib_ring_buffer_tick_nohz_stop(void) + __attribute__((visibility("hidden"))); + +void lib_ring_buffer_tick_nohz_restart(void) + __attribute__((visibility("hidden"))); + +#else + +static inline void lib_ring_buffer_tick_nohz_flush(void) +{ +} + +static inline void lib_ring_buffer_tick_nohz_stop(void) +{ +} + +static inline void lib_ring_buffer_tick_nohz_restart(void) +{ +} +#endif + +#endif /* _LTTNG_RING_BUFFER_NOHZ_H */ diff --git a/src/common/ringbuffer/rb-init.h b/src/common/ringbuffer/rb-init.h new file mode 100644 index 00000000..b3eb7568 --- /dev/null +++ b/src/common/ringbuffer/rb-init.h @@ -0,0 +1,16 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2012-2016 Mathieu Desnoyers + */ + +#ifndef _LTTNG_UST_LIB_RINGBUFFER_RB_INIT_H +#define _LTTNG_UST_LIB_RINGBUFFER_RB_INIT_H + +void lttng_fixup_ringbuffer_tls(void) + __attribute__((visibility("hidden"))); + +void lttng_ust_ringbuffer_set_allow_blocking(void) + __attribute__((visibility("hidden"))); + +#endif /* _LTTNG_UST_LIB_RINGBUFFER_RB_INIT_H */ diff --git a/src/common/ringbuffer/ring_buffer_backend.c b/src/common/ringbuffer/ring_buffer_backend.c new file mode 100644 index 00000000..b0a7c513 --- /dev/null +++ b/src/common/ringbuffer/ring_buffer_backend.c @@ -0,0 +1,593 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2005-2012 Mathieu Desnoyers + */ + +#define _LGPL_SOURCE +#include +#include +#include +#include +#include + +#include +#include + +#include "ringbuffer-config.h" +#include "vatomic.h" +#include "backend.h" +#include "frontend.h" +#include "smp.h" +#include "shm.h" +#include "common/align.h" + +/** + * lib_ring_buffer_backend_allocate - allocate a channel buffer + * @config: ring buffer instance configuration + * @buf: the buffer struct + * @size: total size of the buffer + * @num_subbuf: number of subbuffers + * @extra_reader_sb: need extra subbuffer for reader + */ +static +int lib_ring_buffer_backend_allocate(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_backend *bufb, + size_t size __attribute__((unused)), size_t num_subbuf, + int extra_reader_sb, + struct lttng_ust_shm_handle *handle, + struct shm_object *shmobj) +{ + struct channel_backend *chanb; + unsigned long subbuf_size, mmap_offset = 0; + unsigned long num_subbuf_alloc; + unsigned long i; + long page_size; + + chanb = &shmp(handle, bufb->chan)->backend; + if (!chanb) + return -EINVAL; + + subbuf_size = chanb->subbuf_size; + num_subbuf_alloc = num_subbuf; + + if (extra_reader_sb) + num_subbuf_alloc++; + + page_size = LTTNG_UST_PAGE_SIZE; + if (page_size <= 0) { + goto page_size_error; + } + + align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer_backend_pages_shmp)); + set_shmp(bufb->array, zalloc_shm(shmobj, + sizeof(struct lttng_ust_lib_ring_buffer_backend_pages_shmp) * num_subbuf_alloc)); + if (caa_unlikely(!shmp(handle, bufb->array))) + goto array_error; + + /* + * This is the largest element (the buffer pages) which needs to + * be aligned on page size. + */ + align_shm(shmobj, page_size); + set_shmp(bufb->memory_map, zalloc_shm(shmobj, + subbuf_size * num_subbuf_alloc)); + if (caa_unlikely(!shmp(handle, bufb->memory_map))) + goto memory_map_error; + + /* Allocate backend pages array elements */ + for (i = 0; i < num_subbuf_alloc; i++) { + align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer_backend_pages)); + set_shmp(shmp_index(handle, bufb->array, i)->shmp, + zalloc_shm(shmobj, + sizeof(struct lttng_ust_lib_ring_buffer_backend_pages))); + if (!shmp(handle, shmp_index(handle, bufb->array, i)->shmp)) + goto free_array; + } + + /* Allocate write-side subbuffer table */ + align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer_backend_subbuffer)); + set_shmp(bufb->buf_wsb, zalloc_shm(shmobj, + sizeof(struct lttng_ust_lib_ring_buffer_backend_subbuffer) + * num_subbuf)); + if (caa_unlikely(!shmp(handle, bufb->buf_wsb))) + goto free_array; + + for (i = 0; i < num_subbuf; i++) { + struct lttng_ust_lib_ring_buffer_backend_subbuffer *sb; + + sb = shmp_index(handle, bufb->buf_wsb, i); + if (!sb) + goto free_array; + sb->id = subbuffer_id(config, 0, 1, i); + } + + /* Assign read-side subbuffer table */ + if (extra_reader_sb) + bufb->buf_rsb.id = subbuffer_id(config, 0, 1, + num_subbuf_alloc - 1); + else + bufb->buf_rsb.id = subbuffer_id(config, 0, 1, 0); + + /* Allocate subbuffer packet counter table */ + align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer_backend_counts)); + set_shmp(bufb->buf_cnt, zalloc_shm(shmobj, + sizeof(struct lttng_ust_lib_ring_buffer_backend_counts) + * num_subbuf)); + if (caa_unlikely(!shmp(handle, bufb->buf_cnt))) + goto free_wsb; + + /* Assign pages to page index */ + for (i = 0; i < num_subbuf_alloc; i++) { + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *sbp; + struct lttng_ust_lib_ring_buffer_backend_pages *pages; + struct shm_ref ref; + + ref.index = bufb->memory_map._ref.index; + ref.offset = bufb->memory_map._ref.offset; + ref.offset += i * subbuf_size; + + sbp = shmp_index(handle, bufb->array, i); + if (!sbp) + goto free_array; + pages = shmp(handle, sbp->shmp); + if (!pages) + goto free_array; + set_shmp(pages->p, ref); + if (config->output == RING_BUFFER_MMAP) { + pages->mmap_offset = mmap_offset; + mmap_offset += subbuf_size; + } + } + return 0; + +free_wsb: + /* bufb->buf_wsb will be freed by shm teardown */ +free_array: + /* bufb->array[i] will be freed by shm teardown */ +memory_map_error: + /* bufb->array will be freed by shm teardown */ +array_error: +page_size_error: + return -ENOMEM; +} + +int lib_ring_buffer_backend_create(struct lttng_ust_lib_ring_buffer_backend *bufb, + struct channel_backend *chanb, int cpu, + struct lttng_ust_shm_handle *handle, + struct shm_object *shmobj) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config; + + set_shmp(bufb->chan, handle->chan._ref); + bufb->cpu = cpu; + + return lib_ring_buffer_backend_allocate(config, bufb, chanb->buf_size, + chanb->num_subbuf, + chanb->extra_reader_sb, + handle, shmobj); +} + +void lib_ring_buffer_backend_reset(struct lttng_ust_lib_ring_buffer_backend *bufb, + struct lttng_ust_shm_handle *handle) +{ + struct channel_backend *chanb; + const struct lttng_ust_lib_ring_buffer_config *config; + unsigned long num_subbuf_alloc; + unsigned int i; + + chanb = &shmp(handle, bufb->chan)->backend; + if (!chanb) + return; + config = &chanb->config; + + num_subbuf_alloc = chanb->num_subbuf; + if (chanb->extra_reader_sb) + num_subbuf_alloc++; + + for (i = 0; i < chanb->num_subbuf; i++) { + struct lttng_ust_lib_ring_buffer_backend_subbuffer *sb; + + sb = shmp_index(handle, bufb->buf_wsb, i); + if (!sb) + return; + sb->id = subbuffer_id(config, 0, 1, i); + } + if (chanb->extra_reader_sb) + bufb->buf_rsb.id = subbuffer_id(config, 0, 1, + num_subbuf_alloc - 1); + else + bufb->buf_rsb.id = subbuffer_id(config, 0, 1, 0); + + for (i = 0; i < num_subbuf_alloc; i++) { + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *sbp; + struct lttng_ust_lib_ring_buffer_backend_pages *pages; + + sbp = shmp_index(handle, bufb->array, i); + if (!sbp) + return; + pages = shmp(handle, sbp->shmp); + if (!pages) + return; + /* Don't reset mmap_offset */ + v_set(config, &pages->records_commit, 0); + v_set(config, &pages->records_unread, 0); + pages->data_size = 0; + /* Don't reset backend page and virt addresses */ + } + /* Don't reset num_pages_per_subbuf, cpu, allocated */ + v_set(config, &bufb->records_read, 0); +} + +/* + * The frontend is responsible for also calling ring_buffer_backend_reset for + * each buffer when calling channel_backend_reset. + */ +void channel_backend_reset(struct channel_backend *chanb) +{ + struct lttng_ust_lib_ring_buffer_channel *chan = caa_container_of(chanb, + struct lttng_ust_lib_ring_buffer_channel, backend); + const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config; + + /* + * Don't reset buf_size, subbuf_size, subbuf_size_order, + * num_subbuf_order, buf_size_order, extra_reader_sb, num_subbuf, + * priv, notifiers, config, cpumask and name. + */ + chanb->start_tsc = config->cb.ring_buffer_clock_read(chan); +} + +/** + * channel_backend_init - initialize a channel backend + * @chanb: channel backend + * @name: channel name + * @config: client ring buffer configuration + * @parent: dentry of parent directory, %NULL for root directory + * @subbuf_size: size of sub-buffers (> page size, power of 2) + * @num_subbuf: number of sub-buffers (power of 2) + * @lttng_ust_shm_handle: shared memory handle + * @stream_fds: stream file descriptors. + * + * Returns channel pointer if successful, %NULL otherwise. + * + * Creates per-cpu channel buffers using the sizes and attributes + * specified. The created channel buffer files will be named + * name_0...name_N-1. File permissions will be %S_IRUSR. + * + * Called with CPU hotplug disabled. + */ +int channel_backend_init(struct channel_backend *chanb, + const char *name, + const struct lttng_ust_lib_ring_buffer_config *config, + size_t subbuf_size, size_t num_subbuf, + struct lttng_ust_shm_handle *handle, + const int *stream_fds) +{ + struct lttng_ust_lib_ring_buffer_channel *chan = caa_container_of(chanb, + struct lttng_ust_lib_ring_buffer_channel, backend); + unsigned int i; + int ret; + size_t shmsize = 0, num_subbuf_alloc; + long page_size; + + if (!name) + return -EPERM; + + page_size = LTTNG_UST_PAGE_SIZE; + if (page_size <= 0) { + return -ENOMEM; + } + /* Check that the subbuffer size is larger than a page. */ + if (subbuf_size < page_size) + return -EINVAL; + + /* + * Make sure the number of subbuffers and subbuffer size are + * power of 2, and nonzero. + */ + if (!subbuf_size || (subbuf_size & (subbuf_size - 1))) + return -EINVAL; + if (!num_subbuf || (num_subbuf & (num_subbuf - 1))) + return -EINVAL; + /* + * Overwrite mode buffers require at least 2 subbuffers per + * buffer. + */ + if (config->mode == RING_BUFFER_OVERWRITE && num_subbuf < 2) + return -EINVAL; + + ret = subbuffer_id_check_index(config, num_subbuf); + if (ret) + return ret; + + chanb->buf_size = num_subbuf * subbuf_size; + chanb->subbuf_size = subbuf_size; + chanb->buf_size_order = get_count_order(chanb->buf_size); + chanb->subbuf_size_order = get_count_order(subbuf_size); + chanb->num_subbuf_order = get_count_order(num_subbuf); + chanb->extra_reader_sb = + (config->mode == RING_BUFFER_OVERWRITE) ? 1 : 0; + chanb->num_subbuf = num_subbuf; + strncpy(chanb->name, name, NAME_MAX); + chanb->name[NAME_MAX - 1] = '\0'; + memcpy(&chanb->config, config, sizeof(*config)); + + /* Per-cpu buffer size: control (prior to backend) */ + shmsize = lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer)); + shmsize += sizeof(struct lttng_ust_lib_ring_buffer); + shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct commit_counters_hot)); + shmsize += sizeof(struct commit_counters_hot) * num_subbuf; + shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct commit_counters_cold)); + shmsize += sizeof(struct commit_counters_cold) * num_subbuf; + /* Sampled timestamp end */ + shmsize += lttng_ust_offset_align(shmsize, __alignof__(uint64_t)); + shmsize += sizeof(uint64_t) * num_subbuf; + + /* Per-cpu buffer size: backend */ + /* num_subbuf + 1 is the worse case */ + num_subbuf_alloc = num_subbuf + 1; + shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_backend_pages_shmp)); + shmsize += sizeof(struct lttng_ust_lib_ring_buffer_backend_pages_shmp) * num_subbuf_alloc; + shmsize += lttng_ust_offset_align(shmsize, page_size); + shmsize += subbuf_size * num_subbuf_alloc; + shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_backend_pages)); + shmsize += sizeof(struct lttng_ust_lib_ring_buffer_backend_pages) * num_subbuf_alloc; + shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_backend_subbuffer)); + shmsize += sizeof(struct lttng_ust_lib_ring_buffer_backend_subbuffer) * num_subbuf; + shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_backend_counts)); + shmsize += sizeof(struct lttng_ust_lib_ring_buffer_backend_counts) * num_subbuf; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { + struct lttng_ust_lib_ring_buffer *buf; + /* + * We need to allocate for all possible cpus. + */ + for_each_possible_cpu(i) { + struct shm_object *shmobj; + + shmobj = shm_object_table_alloc(handle->table, shmsize, + SHM_OBJECT_SHM, stream_fds[i], i); + if (!shmobj) + goto end; + align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer)); + set_shmp(chanb->buf[i].shmp, zalloc_shm(shmobj, sizeof(struct lttng_ust_lib_ring_buffer))); + buf = shmp(handle, chanb->buf[i].shmp); + if (!buf) + goto end; + set_shmp(buf->self, chanb->buf[i].shmp._ref); + ret = lib_ring_buffer_create(buf, chanb, i, + handle, shmobj); + if (ret) + goto free_bufs; /* cpu hotplug locked */ + } + } else { + struct shm_object *shmobj; + struct lttng_ust_lib_ring_buffer *buf; + + shmobj = shm_object_table_alloc(handle->table, shmsize, + SHM_OBJECT_SHM, stream_fds[0], -1); + if (!shmobj) + goto end; + align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer)); + set_shmp(chanb->buf[0].shmp, zalloc_shm(shmobj, sizeof(struct lttng_ust_lib_ring_buffer))); + buf = shmp(handle, chanb->buf[0].shmp); + if (!buf) + goto end; + set_shmp(buf->self, chanb->buf[0].shmp._ref); + ret = lib_ring_buffer_create(buf, chanb, -1, + handle, shmobj); + if (ret) + goto free_bufs; + } + chanb->start_tsc = config->cb.ring_buffer_clock_read(chan); + + return 0; + +free_bufs: + /* We only free the buffer data upon shm teardown */ +end: + return -ENOMEM; +} + +/** + * channel_backend_free - destroy the channel + * @chan: the channel + * + * Destroy all channel buffers and frees the channel. + */ +void channel_backend_free(struct channel_backend *chanb __attribute__((unused)), + struct lttng_ust_shm_handle *handle __attribute__((unused))) +{ + /* SHM teardown takes care of everything */ +} + +/** + * lib_ring_buffer_read - read data from ring_buffer_buffer. + * @bufb : buffer backend + * @offset : offset within the buffer + * @dest : destination address + * @len : length to copy to destination + * + * Should be protected by get_subbuf/put_subbuf. + * Returns the length copied. + */ +size_t lib_ring_buffer_read(struct lttng_ust_lib_ring_buffer_backend *bufb, size_t offset, + void *dest, size_t len, struct lttng_ust_shm_handle *handle) +{ + struct channel_backend *chanb; + const struct lttng_ust_lib_ring_buffer_config *config; + ssize_t orig_len; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + unsigned long sb_bindex, id; + void *src; + + chanb = &shmp(handle, bufb->chan)->backend; + if (!chanb) + return 0; + config = &chanb->config; + orig_len = len; + offset &= chanb->buf_size - 1; + + if (caa_unlikely(!len)) + return 0; + id = bufb->buf_rsb.id; + sb_bindex = subbuffer_id_get_index(config, id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return 0; + /* + * Underlying layer should never ask for reads across + * subbuffers. + */ + CHAN_WARN_ON(chanb, offset >= chanb->buf_size); + CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE + && subbuffer_id_is_noref(config, id)); + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return 0; + src = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); + if (caa_unlikely(!src)) + return 0; + memcpy(dest, src, len); + return orig_len; +} + +/** + * lib_ring_buffer_read_cstr - read a C-style string from ring_buffer. + * @bufb : buffer backend + * @offset : offset within the buffer + * @dest : destination address + * @len : destination's length + * + * Return string's length, or -EINVAL on error. + * Should be protected by get_subbuf/put_subbuf. + * Destination length should be at least 1 to hold '\0'. + */ +int lib_ring_buffer_read_cstr(struct lttng_ust_lib_ring_buffer_backend *bufb, size_t offset, + void *dest, size_t len, struct lttng_ust_shm_handle *handle) +{ + struct channel_backend *chanb; + const struct lttng_ust_lib_ring_buffer_config *config; + ssize_t string_len, orig_offset; + char *str; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + unsigned long sb_bindex, id; + + chanb = &shmp(handle, bufb->chan)->backend; + if (!chanb) + return -EINVAL; + config = &chanb->config; + if (caa_unlikely(!len)) + return -EINVAL; + offset &= chanb->buf_size - 1; + orig_offset = offset; + id = bufb->buf_rsb.id; + sb_bindex = subbuffer_id_get_index(config, id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return -EINVAL; + /* + * Underlying layer should never ask for reads across + * subbuffers. + */ + CHAN_WARN_ON(chanb, offset >= chanb->buf_size); + CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE + && subbuffer_id_is_noref(config, id)); + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return -EINVAL; + str = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); + if (caa_unlikely(!str)) + return -EINVAL; + string_len = strnlen(str, len); + if (dest && len) { + memcpy(dest, str, string_len); + ((char *)dest)[0] = 0; + } + return offset - orig_offset; +} + +/** + * lib_ring_buffer_read_offset_address - get address of a buffer location + * @bufb : buffer backend + * @offset : offset within the buffer. + * + * Return the address where a given offset is located (for read). + * Should be used to get the current subbuffer header pointer. Given we know + * it's never on a page boundary, it's safe to read/write directly + * from/to this address, as long as the read/write is never bigger than + * a page size. + */ +void *lib_ring_buffer_read_offset_address(struct lttng_ust_lib_ring_buffer_backend *bufb, + size_t offset, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + struct channel_backend *chanb; + const struct lttng_ust_lib_ring_buffer_config *config; + unsigned long sb_bindex, id; + + chanb = &shmp(handle, bufb->chan)->backend; + if (!chanb) + return NULL; + config = &chanb->config; + offset &= chanb->buf_size - 1; + id = bufb->buf_rsb.id; + sb_bindex = subbuffer_id_get_index(config, id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return NULL; + CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE + && subbuffer_id_is_noref(config, id)); + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return NULL; + return shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); +} + +/** + * lib_ring_buffer_offset_address - get address of a location within the buffer + * @bufb : buffer backend + * @offset : offset within the buffer. + * + * Return the address where a given offset is located. + * Should be used to get the current subbuffer header pointer. Given we know + * it's always at the beginning of a page, it's safe to write directly to this + * address, as long as the write is never bigger than a page size. + */ +void *lib_ring_buffer_offset_address(struct lttng_ust_lib_ring_buffer_backend *bufb, + size_t offset, + struct lttng_ust_shm_handle *handle) +{ + size_t sbidx; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + struct channel_backend *chanb; + const struct lttng_ust_lib_ring_buffer_config *config; + unsigned long sb_bindex, id; + struct lttng_ust_lib_ring_buffer_backend_subbuffer *sb; + + chanb = &shmp(handle, bufb->chan)->backend; + if (!chanb) + return NULL; + config = &chanb->config; + offset &= chanb->buf_size - 1; + sbidx = offset >> chanb->subbuf_size_order; + sb = shmp_index(handle, bufb->buf_wsb, sbidx); + if (!sb) + return NULL; + id = sb->id; + sb_bindex = subbuffer_id_get_index(config, id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return NULL; + CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE + && subbuffer_id_is_noref(config, id)); + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return NULL; + return shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); +} diff --git a/src/common/ringbuffer/ring_buffer_frontend.c b/src/common/ringbuffer/ring_buffer_frontend.c new file mode 100644 index 00000000..9074dbcb --- /dev/null +++ b/src/common/ringbuffer/ring_buffer_frontend.c @@ -0,0 +1,2581 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2005-2012 Mathieu Desnoyers + * + * Ring buffer wait-free buffer synchronization. Producer-consumer and flight + * recorder (overwrite) modes. See thesis: + * + * Desnoyers, Mathieu (2009), "Low-Impact Operating System Tracing", Ph.D. + * dissertation, Ecole Polytechnique de Montreal. + * http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12.pdf + * + * - Algorithm presentation in Chapter 5: + * "Lockless Multi-Core High-Throughput Buffering". + * - Algorithm formal verification in Section 8.6: + * "Formal verification of LTTng" + * + * Author: + * Mathieu Desnoyers + * + * Inspired from LTT and RelayFS: + * Karim Yaghmour + * Tom Zanussi + * Bob Wisniewski + * And from K42 : + * Bob Wisniewski + * + * Buffer reader semantic : + * + * - get_subbuf_size + * while buffer is not finalized and empty + * - get_subbuf + * - if return value != 0, continue + * - splice one subbuffer worth of data to a pipe + * - splice the data from pipe to disk/network + * - put_subbuf + */ + +#define _LGPL_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common/macros.h" + +#include +#include + +#include "smp.h" +#include "ringbuffer-config.h" +#include "vatomic.h" +#include "backend.h" +#include "frontend.h" +#include "shm.h" +#include "rb-init.h" +#include "liblttng-ust/compat.h" /* For ENODATA */ + +/* Print DBG() messages about events lost only every 1048576 hits */ +#define DBG_PRINT_NR_LOST (1UL << 20) + +#define LTTNG_UST_RB_SIG_FLUSH SIGRTMIN +#define LTTNG_UST_RB_SIG_READ SIGRTMIN + 1 +#define LTTNG_UST_RB_SIG_TEARDOWN SIGRTMIN + 2 +#define CLOCKID CLOCK_MONOTONIC +#define LTTNG_UST_RING_BUFFER_GET_RETRY 10 +#define LTTNG_UST_RING_BUFFER_RETRY_DELAY_MS 10 +#define RETRY_DELAY_MS 100 /* 100 ms. */ + +/* + * Non-static to ensure the compiler does not optimize away the xor. + */ +uint8_t lttng_crash_magic_xor[] = RB_CRASH_DUMP_ABI_MAGIC_XOR; + +/* + * Use POSIX SHM: shm_open(3) and shm_unlink(3). + * close(2) to close the fd returned by shm_open. + * shm_unlink releases the shared memory object name. + * ftruncate(2) sets the size of the memory object. + * mmap/munmap maps the shared memory obj to a virtual address in the + * calling proceess (should be done both in libust and consumer). + * See shm_overview(7) for details. + * Pass file descriptor returned by shm_open(3) to ltt-sessiond through + * a UNIX socket. + * + * Since we don't need to access the object using its name, we can + * immediately shm_unlink(3) it, and only keep the handle with its file + * descriptor. + */ + +/* + * Internal structure representing offsets to use at a sub-buffer switch. + */ +struct switch_offsets { + unsigned long begin, end, old; + size_t pre_header_padding, size; + unsigned int switch_new_start:1, switch_new_end:1, switch_old_start:1, + switch_old_end:1; +}; + +DEFINE_URCU_TLS(unsigned int, lib_ring_buffer_nesting); + +/* + * wakeup_fd_mutex protects wakeup fd use by timer from concurrent + * close. + */ +static pthread_mutex_t wakeup_fd_mutex = PTHREAD_MUTEX_INITIALIZER; + +static +void lib_ring_buffer_print_errors(struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_lib_ring_buffer *buf, int cpu, + struct lttng_ust_shm_handle *handle); + +/* + * Handle timer teardown race wrt memory free of private data by + * ring buffer signals are handled by a single thread, which permits + * a synchronization point between handling of each signal. + * Protected by the lock within the structure. + */ +struct timer_signal_data { + pthread_t tid; /* thread id managing signals */ + int setup_done; + int qs_done; + pthread_mutex_t lock; +}; + +static struct timer_signal_data timer_signal = { + .tid = 0, + .setup_done = 0, + .qs_done = 0, + .lock = PTHREAD_MUTEX_INITIALIZER, +}; + +static bool lttng_ust_allow_blocking; + +void lttng_ust_ringbuffer_set_allow_blocking(void) +{ + lttng_ust_allow_blocking = true; +} + +/* Get blocking timeout, in ms */ +static int lttng_ust_ringbuffer_get_timeout(struct lttng_ust_lib_ring_buffer_channel *chan) +{ + if (!lttng_ust_allow_blocking) + return 0; + return chan->u.s.blocking_timeout_ms; +} + +/** + * lib_ring_buffer_reset - Reset ring buffer to initial values. + * @buf: Ring buffer. + * + * Effectively empty the ring buffer. Should be called when the buffer is not + * used for writing. The ring buffer can be opened for reading, but the reader + * should not be using the iterator concurrently with reset. The previous + * current iterator record is reset. + */ +void lib_ring_buffer_reset(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; + unsigned int i; + + chan = shmp(handle, buf->backend.chan); + if (!chan) + return; + config = &chan->backend.config; + /* + * Reset iterator first. It will put the subbuffer if it currently holds + * it. + */ + v_set(config, &buf->offset, 0); + for (i = 0; i < chan->backend.num_subbuf; i++) { + struct commit_counters_hot *cc_hot; + struct commit_counters_cold *cc_cold; + uint64_t *ts_end; + + cc_hot = shmp_index(handle, buf->commit_hot, i); + if (!cc_hot) + return; + cc_cold = shmp_index(handle, buf->commit_cold, i); + if (!cc_cold) + return; + ts_end = shmp_index(handle, buf->ts_end, i); + if (!ts_end) + return; + v_set(config, &cc_hot->cc, 0); + v_set(config, &cc_hot->seq, 0); + v_set(config, &cc_cold->cc_sb, 0); + *ts_end = 0; + } + uatomic_set(&buf->consumed, 0); + uatomic_set(&buf->record_disabled, 0); + v_set(config, &buf->last_tsc, 0); + lib_ring_buffer_backend_reset(&buf->backend, handle); + /* Don't reset number of active readers */ + v_set(config, &buf->records_lost_full, 0); + v_set(config, &buf->records_lost_wrap, 0); + v_set(config, &buf->records_lost_big, 0); + v_set(config, &buf->records_count, 0); + v_set(config, &buf->records_overrun, 0); + buf->finalized = 0; +} + +/** + * channel_reset - Reset channel to initial values. + * @chan: Channel. + * + * Effectively empty the channel. Should be called when the channel is not used + * for writing. The channel can be opened for reading, but the reader should not + * be using the iterator concurrently with reset. The previous current iterator + * record is reset. + */ +void channel_reset(struct lttng_ust_lib_ring_buffer_channel *chan) +{ + /* + * Reset iterators first. Will put the subbuffer if held for reading. + */ + uatomic_set(&chan->record_disabled, 0); + /* Don't reset commit_count_mask, still valid */ + channel_backend_reset(&chan->backend); + /* Don't reset switch/read timer interval */ + /* Don't reset notifiers and notifier enable bits */ + /* Don't reset reader reference count */ +} + +static +void init_crash_abi(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_crash_abi *crash_abi, + struct lttng_ust_lib_ring_buffer *buf, + struct channel_backend *chanb, + struct shm_object *shmobj, + struct lttng_ust_shm_handle *handle) +{ + int i; + + for (i = 0; i < RB_CRASH_DUMP_ABI_MAGIC_LEN; i++) + crash_abi->magic[i] = lttng_crash_magic_xor[i] ^ 0xFF; + crash_abi->mmap_length = shmobj->memory_map_size; + crash_abi->endian = RB_CRASH_ENDIAN; + crash_abi->major = RB_CRASH_DUMP_ABI_MAJOR; + crash_abi->minor = RB_CRASH_DUMP_ABI_MINOR; + crash_abi->word_size = sizeof(unsigned long); + crash_abi->layout_type = LTTNG_CRASH_TYPE_UST; + + /* Offset of fields */ + crash_abi->offset.prod_offset = + (uint32_t) ((char *) &buf->offset - (char *) buf); + crash_abi->offset.consumed_offset = + (uint32_t) ((char *) &buf->consumed - (char *) buf); + crash_abi->offset.commit_hot_array = + (uint32_t) ((char *) shmp(handle, buf->commit_hot) - (char *) buf); + crash_abi->offset.commit_hot_seq = + offsetof(struct commit_counters_hot, seq); + crash_abi->offset.buf_wsb_array = + (uint32_t) ((char *) shmp(handle, buf->backend.buf_wsb) - (char *) buf); + crash_abi->offset.buf_wsb_id = + offsetof(struct lttng_ust_lib_ring_buffer_backend_subbuffer, id); + crash_abi->offset.sb_array = + (uint32_t) ((char *) shmp(handle, buf->backend.array) - (char *) buf); + crash_abi->offset.sb_array_shmp_offset = + offsetof(struct lttng_ust_lib_ring_buffer_backend_pages_shmp, + shmp._ref.offset); + crash_abi->offset.sb_backend_p_offset = + offsetof(struct lttng_ust_lib_ring_buffer_backend_pages, + p._ref.offset); + + /* Field length */ + crash_abi->length.prod_offset = sizeof(buf->offset); + crash_abi->length.consumed_offset = sizeof(buf->consumed); + crash_abi->length.commit_hot_seq = + sizeof(((struct commit_counters_hot *) NULL)->seq); + crash_abi->length.buf_wsb_id = + sizeof(((struct lttng_ust_lib_ring_buffer_backend_subbuffer *) NULL)->id); + crash_abi->length.sb_array_shmp_offset = + sizeof(((struct lttng_ust_lib_ring_buffer_backend_pages_shmp *) NULL)->shmp._ref.offset); + crash_abi->length.sb_backend_p_offset = + sizeof(((struct lttng_ust_lib_ring_buffer_backend_pages *) NULL)->p._ref.offset); + + /* Array stride */ + crash_abi->stride.commit_hot_array = + sizeof(struct commit_counters_hot); + crash_abi->stride.buf_wsb_array = + sizeof(struct lttng_ust_lib_ring_buffer_backend_subbuffer); + crash_abi->stride.sb_array = + sizeof(struct lttng_ust_lib_ring_buffer_backend_pages_shmp); + + /* Buffer constants */ + crash_abi->buf_size = chanb->buf_size; + crash_abi->subbuf_size = chanb->subbuf_size; + crash_abi->num_subbuf = chanb->num_subbuf; + crash_abi->mode = (uint32_t) chanb->config.mode; + + if (config->cb.content_size_field) { + size_t offset, length; + + config->cb.content_size_field(config, &offset, &length); + crash_abi->offset.content_size = offset; + crash_abi->length.content_size = length; + } else { + crash_abi->offset.content_size = 0; + crash_abi->length.content_size = 0; + } + if (config->cb.packet_size_field) { + size_t offset, length; + + config->cb.packet_size_field(config, &offset, &length); + crash_abi->offset.packet_size = offset; + crash_abi->length.packet_size = length; + } else { + crash_abi->offset.packet_size = 0; + crash_abi->length.packet_size = 0; + } +} + +/* + * Must be called under cpu hotplug protection. + */ +int lib_ring_buffer_create(struct lttng_ust_lib_ring_buffer *buf, + struct channel_backend *chanb, int cpu, + struct lttng_ust_shm_handle *handle, + struct shm_object *shmobj) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config; + struct lttng_ust_lib_ring_buffer_channel *chan = caa_container_of(chanb, + struct lttng_ust_lib_ring_buffer_channel, backend); + struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; + struct lttng_ust_lib_ring_buffer_channel *shmp_chan; + struct commit_counters_hot *cc_hot; + void *priv = channel_get_private_config(chan); + size_t subbuf_header_size; + uint64_t tsc; + int ret; + + /* Test for cpu hotplug */ + if (buf->backend.allocated) + return 0; + + align_shm(shmobj, __alignof__(struct commit_counters_hot)); + set_shmp(buf->commit_hot, + zalloc_shm(shmobj, + sizeof(struct commit_counters_hot) * chan->backend.num_subbuf)); + if (!shmp(handle, buf->commit_hot)) { + return -ENOMEM; + } + + align_shm(shmobj, __alignof__(struct commit_counters_cold)); + set_shmp(buf->commit_cold, + zalloc_shm(shmobj, + sizeof(struct commit_counters_cold) * chan->backend.num_subbuf)); + if (!shmp(handle, buf->commit_cold)) { + ret = -ENOMEM; + goto free_commit; + } + + align_shm(shmobj, __alignof__(uint64_t)); + set_shmp(buf->ts_end, + zalloc_shm(shmobj, + sizeof(uint64_t) * chan->backend.num_subbuf)); + if (!shmp(handle, buf->ts_end)) { + ret = -ENOMEM; + goto free_commit_cold; + } + + + ret = lib_ring_buffer_backend_create(&buf->backend, &chan->backend, + cpu, handle, shmobj); + if (ret) { + goto free_init; + } + + /* + * Write the subbuffer header for first subbuffer so we know the total + * duration of data gathering. + */ + subbuf_header_size = config->cb.subbuffer_header_size(); + v_set(config, &buf->offset, subbuf_header_size); + wsb = shmp_index(handle, buf->backend.buf_wsb, 0); + if (!wsb) { + ret = -EPERM; + goto free_chanbuf; + } + subbuffer_id_clear_noref(config, &wsb->id); + shmp_chan = shmp(handle, buf->backend.chan); + if (!shmp_chan) { + ret = -EPERM; + goto free_chanbuf; + } + tsc = config->cb.ring_buffer_clock_read(shmp_chan); + config->cb.buffer_begin(buf, tsc, 0, handle); + cc_hot = shmp_index(handle, buf->commit_hot, 0); + if (!cc_hot) { + ret = -EPERM; + goto free_chanbuf; + } + v_add(config, subbuf_header_size, &cc_hot->cc); + v_add(config, subbuf_header_size, &cc_hot->seq); + + if (config->cb.buffer_create) { + ret = config->cb.buffer_create(buf, priv, cpu, chanb->name, handle); + if (ret) + goto free_chanbuf; + } + + init_crash_abi(config, &buf->crash_abi, buf, chanb, shmobj, handle); + + buf->backend.allocated = 1; + return 0; + + /* Error handling */ +free_init: + /* ts_end will be freed by shm teardown */ +free_commit_cold: + /* commit_cold will be freed by shm teardown */ +free_commit: + /* commit_hot will be freed by shm teardown */ +free_chanbuf: + return ret; +} + +static +void lib_ring_buffer_channel_switch_timer(int sig __attribute__((unused)), + siginfo_t *si, void *uc __attribute__((unused))) +{ + const struct lttng_ust_lib_ring_buffer_config *config; + struct lttng_ust_shm_handle *handle; + struct lttng_ust_lib_ring_buffer_channel *chan; + int cpu; + + assert(CMM_LOAD_SHARED(timer_signal.tid) == pthread_self()); + + chan = si->si_value.sival_ptr; + handle = chan->handle; + config = &chan->backend.config; + + DBG("Switch timer for channel %p\n", chan); + + /* + * Only flush buffers periodically if readers are active. + */ + pthread_mutex_lock(&wakeup_fd_mutex); + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { + for_each_possible_cpu(cpu) { + struct lttng_ust_lib_ring_buffer *buf = + shmp(handle, chan->backend.buf[cpu].shmp); + + if (!buf) + goto end; + if (uatomic_read(&buf->active_readers)) + lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE, + chan->handle); + } + } else { + struct lttng_ust_lib_ring_buffer *buf = + shmp(handle, chan->backend.buf[0].shmp); + + if (!buf) + goto end; + if (uatomic_read(&buf->active_readers)) + lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE, + chan->handle); + } +end: + pthread_mutex_unlock(&wakeup_fd_mutex); + return; +} + +static +int lib_ring_buffer_poll_deliver(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle) +{ + unsigned long consumed_old, consumed_idx, commit_count, write_offset; + struct commit_counters_cold *cc_cold; + + consumed_old = uatomic_read(&buf->consumed); + consumed_idx = subbuf_index(consumed_old, chan); + cc_cold = shmp_index(handle, buf->commit_cold, consumed_idx); + if (!cc_cold) + return 0; + commit_count = v_read(config, &cc_cold->cc_sb); + /* + * No memory barrier here, since we are only interested + * in a statistically correct polling result. The next poll will + * get the data is we are racing. The mb() that ensures correct + * memory order is in get_subbuf. + */ + write_offset = v_read(config, &buf->offset); + + /* + * Check that the subbuffer we are trying to consume has been + * already fully committed. + */ + + if (((commit_count - chan->backend.subbuf_size) + & chan->commit_count_mask) + - (buf_trunc(consumed_old, chan) + >> chan->backend.num_subbuf_order) + != 0) + return 0; + + /* + * Check that we are not about to read the same subbuffer in + * which the writer head is. + */ + if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_old, chan) + == 0) + return 0; + + return 1; +} + +static +void lib_ring_buffer_wakeup(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) +{ + int wakeup_fd = shm_get_wakeup_fd(handle, &buf->self._ref); + sigset_t sigpipe_set, pending_set, old_set; + int ret, sigpipe_was_pending = 0; + + if (wakeup_fd < 0) + return; + + /* + * Wake-up the other end by writing a null byte in the pipe + * (non-blocking). Important note: Because writing into the + * pipe is non-blocking (and therefore we allow dropping wakeup + * data, as long as there is wakeup data present in the pipe + * buffer to wake up the consumer), the consumer should perform + * the following sequence for waiting: + * 1) empty the pipe (reads). + * 2) check if there is data in the buffer. + * 3) wait on the pipe (poll). + * + * Discard the SIGPIPE from write(), not disturbing any SIGPIPE + * that might be already pending. If a bogus SIGPIPE is sent to + * the entire process concurrently by a malicious user, it may + * be simply discarded. + */ + ret = sigemptyset(&pending_set); + assert(!ret); + /* + * sigpending returns the mask of signals that are _both_ + * blocked for the thread _and_ pending for either the thread or + * the entire process. + */ + ret = sigpending(&pending_set); + assert(!ret); + sigpipe_was_pending = sigismember(&pending_set, SIGPIPE); + /* + * If sigpipe was pending, it means it was already blocked, so + * no need to block it. + */ + if (!sigpipe_was_pending) { + ret = sigemptyset(&sigpipe_set); + assert(!ret); + ret = sigaddset(&sigpipe_set, SIGPIPE); + assert(!ret); + ret = pthread_sigmask(SIG_BLOCK, &sigpipe_set, &old_set); + assert(!ret); + } + do { + ret = write(wakeup_fd, "", 1); + } while (ret == -1L && errno == EINTR); + if (ret == -1L && errno == EPIPE && !sigpipe_was_pending) { + struct timespec timeout = { 0, 0 }; + do { + ret = sigtimedwait(&sigpipe_set, NULL, + &timeout); + } while (ret == -1L && errno == EINTR); + } + if (!sigpipe_was_pending) { + ret = pthread_sigmask(SIG_SETMASK, &old_set, NULL); + assert(!ret); + } +} + +static +void lib_ring_buffer_channel_do_read(struct lttng_ust_lib_ring_buffer_channel *chan) +{ + const struct lttng_ust_lib_ring_buffer_config *config; + struct lttng_ust_shm_handle *handle; + int cpu; + + handle = chan->handle; + config = &chan->backend.config; + + /* + * Only flush buffers periodically if readers are active. + */ + pthread_mutex_lock(&wakeup_fd_mutex); + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { + for_each_possible_cpu(cpu) { + struct lttng_ust_lib_ring_buffer *buf = + shmp(handle, chan->backend.buf[cpu].shmp); + + if (!buf) + goto end; + if (uatomic_read(&buf->active_readers) + && lib_ring_buffer_poll_deliver(config, buf, + chan, handle)) { + lib_ring_buffer_wakeup(buf, handle); + } + } + } else { + struct lttng_ust_lib_ring_buffer *buf = + shmp(handle, chan->backend.buf[0].shmp); + + if (!buf) + goto end; + if (uatomic_read(&buf->active_readers) + && lib_ring_buffer_poll_deliver(config, buf, + chan, handle)) { + lib_ring_buffer_wakeup(buf, handle); + } + } +end: + pthread_mutex_unlock(&wakeup_fd_mutex); +} + +static +void lib_ring_buffer_channel_read_timer(int sig __attribute__((unused)), + siginfo_t *si, void *uc __attribute__((unused))) +{ + struct lttng_ust_lib_ring_buffer_channel *chan; + + assert(CMM_LOAD_SHARED(timer_signal.tid) == pthread_self()); + chan = si->si_value.sival_ptr; + DBG("Read timer for channel %p\n", chan); + lib_ring_buffer_channel_do_read(chan); + return; +} + +static +void rb_setmask(sigset_t *mask) +{ + int ret; + + ret = sigemptyset(mask); + if (ret) { + PERROR("sigemptyset"); + } + ret = sigaddset(mask, LTTNG_UST_RB_SIG_FLUSH); + if (ret) { + PERROR("sigaddset"); + } + ret = sigaddset(mask, LTTNG_UST_RB_SIG_READ); + if (ret) { + PERROR("sigaddset"); + } + ret = sigaddset(mask, LTTNG_UST_RB_SIG_TEARDOWN); + if (ret) { + PERROR("sigaddset"); + } +} + +static +void *sig_thread(void *arg __attribute__((unused))) +{ + sigset_t mask; + siginfo_t info; + int signr; + + /* Only self thread will receive signal mask. */ + rb_setmask(&mask); + CMM_STORE_SHARED(timer_signal.tid, pthread_self()); + + for (;;) { + signr = sigwaitinfo(&mask, &info); + if (signr == -1) { + if (errno != EINTR) + PERROR("sigwaitinfo"); + continue; + } + if (signr == LTTNG_UST_RB_SIG_FLUSH) { + lib_ring_buffer_channel_switch_timer(info.si_signo, + &info, NULL); + } else if (signr == LTTNG_UST_RB_SIG_READ) { + lib_ring_buffer_channel_read_timer(info.si_signo, + &info, NULL); + } else if (signr == LTTNG_UST_RB_SIG_TEARDOWN) { + cmm_smp_mb(); + CMM_STORE_SHARED(timer_signal.qs_done, 1); + cmm_smp_mb(); + } else { + ERR("Unexptected signal %d\n", info.si_signo); + } + } + return NULL; +} + +/* + * Ensure only a single thread listens on the timer signal. + */ +static +void lib_ring_buffer_setup_timer_thread(void) +{ + pthread_t thread; + int ret; + + pthread_mutex_lock(&timer_signal.lock); + if (timer_signal.setup_done) + goto end; + + ret = pthread_create(&thread, NULL, &sig_thread, NULL); + if (ret) { + errno = ret; + PERROR("pthread_create"); + } + ret = pthread_detach(thread); + if (ret) { + errno = ret; + PERROR("pthread_detach"); + } + timer_signal.setup_done = 1; +end: + pthread_mutex_unlock(&timer_signal.lock); +} + +/* + * Wait for signal-handling thread quiescent state. + */ +static +void lib_ring_buffer_wait_signal_thread_qs(unsigned int signr) +{ + sigset_t pending_set; + int ret; + + /* + * We need to be the only thread interacting with the thread + * that manages signals for teardown synchronization. + */ + pthread_mutex_lock(&timer_signal.lock); + + /* + * Ensure we don't have any signal queued for this channel. + */ + for (;;) { + ret = sigemptyset(&pending_set); + if (ret == -1) { + PERROR("sigemptyset"); + } + ret = sigpending(&pending_set); + if (ret == -1) { + PERROR("sigpending"); + } + if (!sigismember(&pending_set, signr)) + break; + caa_cpu_relax(); + } + + /* + * From this point, no new signal handler will be fired that + * would try to access "chan". However, we still need to wait + * for any currently executing handler to complete. + */ + cmm_smp_mb(); + CMM_STORE_SHARED(timer_signal.qs_done, 0); + cmm_smp_mb(); + + /* + * Kill with LTTNG_UST_RB_SIG_TEARDOWN, so signal management + * thread wakes up. + */ + kill(getpid(), LTTNG_UST_RB_SIG_TEARDOWN); + + while (!CMM_LOAD_SHARED(timer_signal.qs_done)) + caa_cpu_relax(); + cmm_smp_mb(); + + pthread_mutex_unlock(&timer_signal.lock); +} + +static +void lib_ring_buffer_channel_switch_timer_start(struct lttng_ust_lib_ring_buffer_channel *chan) +{ + struct sigevent sev; + struct itimerspec its; + int ret; + + if (!chan->switch_timer_interval || chan->switch_timer_enabled) + return; + + chan->switch_timer_enabled = 1; + + lib_ring_buffer_setup_timer_thread(); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = LTTNG_UST_RB_SIG_FLUSH; + sev.sigev_value.sival_ptr = chan; + ret = timer_create(CLOCKID, &sev, &chan->switch_timer); + if (ret == -1) { + PERROR("timer_create"); + } + + its.it_value.tv_sec = chan->switch_timer_interval / 1000000; + its.it_value.tv_nsec = (chan->switch_timer_interval % 1000000) * 1000; + its.it_interval.tv_sec = its.it_value.tv_sec; + its.it_interval.tv_nsec = its.it_value.tv_nsec; + + ret = timer_settime(chan->switch_timer, 0, &its, NULL); + if (ret == -1) { + PERROR("timer_settime"); + } +} + +static +void lib_ring_buffer_channel_switch_timer_stop(struct lttng_ust_lib_ring_buffer_channel *chan) +{ + int ret; + + if (!chan->switch_timer_interval || !chan->switch_timer_enabled) + return; + + ret = timer_delete(chan->switch_timer); + if (ret == -1) { + PERROR("timer_delete"); + } + + lib_ring_buffer_wait_signal_thread_qs(LTTNG_UST_RB_SIG_FLUSH); + + chan->switch_timer = 0; + chan->switch_timer_enabled = 0; +} + +static +void lib_ring_buffer_channel_read_timer_start(struct lttng_ust_lib_ring_buffer_channel *chan) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + struct sigevent sev; + struct itimerspec its; + int ret; + + if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER + || !chan->read_timer_interval || chan->read_timer_enabled) + return; + + chan->read_timer_enabled = 1; + + lib_ring_buffer_setup_timer_thread(); + + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = LTTNG_UST_RB_SIG_READ; + sev.sigev_value.sival_ptr = chan; + ret = timer_create(CLOCKID, &sev, &chan->read_timer); + if (ret == -1) { + PERROR("timer_create"); + } + + its.it_value.tv_sec = chan->read_timer_interval / 1000000; + its.it_value.tv_nsec = (chan->read_timer_interval % 1000000) * 1000; + its.it_interval.tv_sec = its.it_value.tv_sec; + its.it_interval.tv_nsec = its.it_value.tv_nsec; + + ret = timer_settime(chan->read_timer, 0, &its, NULL); + if (ret == -1) { + PERROR("timer_settime"); + } +} + +static +void lib_ring_buffer_channel_read_timer_stop(struct lttng_ust_lib_ring_buffer_channel *chan) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + int ret; + + if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER + || !chan->read_timer_interval || !chan->read_timer_enabled) + return; + + ret = timer_delete(chan->read_timer); + if (ret == -1) { + PERROR("timer_delete"); + } + + /* + * do one more check to catch data that has been written in the last + * timer period. + */ + lib_ring_buffer_channel_do_read(chan); + + lib_ring_buffer_wait_signal_thread_qs(LTTNG_UST_RB_SIG_READ); + + chan->read_timer = 0; + chan->read_timer_enabled = 0; +} + +static void channel_unregister_notifiers(struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle __attribute__((unused))) +{ + lib_ring_buffer_channel_switch_timer_stop(chan); + lib_ring_buffer_channel_read_timer_stop(chan); +} + +static void channel_print_errors(struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle) +{ + const struct lttng_ust_lib_ring_buffer_config *config = + &chan->backend.config; + int cpu; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { + for_each_possible_cpu(cpu) { + struct lttng_ust_lib_ring_buffer *buf = + shmp(handle, chan->backend.buf[cpu].shmp); + if (buf) + lib_ring_buffer_print_errors(chan, buf, cpu, handle); + } + } else { + struct lttng_ust_lib_ring_buffer *buf = + shmp(handle, chan->backend.buf[0].shmp); + + if (buf) + lib_ring_buffer_print_errors(chan, buf, -1, handle); + } +} + +static void channel_free(struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle, + int consumer) +{ + channel_backend_free(&chan->backend, handle); + /* chan is freed by shm teardown */ + shm_object_table_destroy(handle->table, consumer); + free(handle); +} + +/** + * channel_create - Create channel. + * @config: ring buffer instance configuration + * @name: name of the channel + * @priv_data_align: alignment, in bytes, of the private data area. (config) + * @priv_data_size: length, in bytes, of the private data area. (config) + * @priv_data_init: initialization data for private data. (config) + * @priv: local private data (memory owner by caller) + * @buf_addr: pointer the the beginning of the preallocated buffer contiguous + * address mapping. It is used only by RING_BUFFER_STATIC + * configuration. It can be set to NULL for other backends. + * @subbuf_size: subbuffer size + * @num_subbuf: number of subbuffers + * @switch_timer_interval: Time interval (in us) to fill sub-buffers with + * padding to let readers get those sub-buffers. + * Used for live streaming. + * @read_timer_interval: Time interval (in us) to wake up pending readers. + * @stream_fds: array of stream file descriptors. + * @nr_stream_fds: number of file descriptors in array. + * + * Holds cpu hotplug. + * Returns NULL on failure. + */ +struct lttng_ust_shm_handle *channel_create(const struct lttng_ust_lib_ring_buffer_config *config, + const char *name, + size_t priv_data_align, + size_t priv_data_size, + void *priv_data_init, + void *priv, + void *buf_addr __attribute__((unused)), size_t subbuf_size, + size_t num_subbuf, unsigned int switch_timer_interval, + unsigned int read_timer_interval, + const int *stream_fds, int nr_stream_fds, + int64_t blocking_timeout) +{ + int ret; + size_t shmsize, chansize; + struct lttng_ust_lib_ring_buffer_channel *chan; + struct lttng_ust_shm_handle *handle; + struct shm_object *shmobj; + unsigned int nr_streams; + int64_t blocking_timeout_ms; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) + nr_streams = num_possible_cpus(); + else + nr_streams = 1; + + if (nr_stream_fds != nr_streams) + return NULL; + + if (blocking_timeout < -1) { + return NULL; + } + /* usec to msec */ + if (blocking_timeout == -1) { + blocking_timeout_ms = -1; + } else { + blocking_timeout_ms = blocking_timeout / 1000; + if (blocking_timeout_ms != (int32_t) blocking_timeout_ms) { + return NULL; + } + } + + if (lib_ring_buffer_check_config(config, switch_timer_interval, + read_timer_interval)) + return NULL; + + handle = zmalloc(sizeof(struct lttng_ust_shm_handle)); + if (!handle) + return NULL; + + /* Allocate table for channel + per-cpu buffers */ + handle->table = shm_object_table_create(1 + num_possible_cpus()); + if (!handle->table) + goto error_table_alloc; + + /* Calculate the shm allocation layout */ + shmsize = sizeof(struct lttng_ust_lib_ring_buffer_channel); + shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_shmp)); + shmsize += sizeof(struct lttng_ust_lib_ring_buffer_shmp) * nr_streams; + chansize = shmsize; + if (priv_data_align) + shmsize += lttng_ust_offset_align(shmsize, priv_data_align); + shmsize += priv_data_size; + + /* Allocate normal memory for channel (not shared) */ + shmobj = shm_object_table_alloc(handle->table, shmsize, SHM_OBJECT_MEM, + -1, -1); + if (!shmobj) + goto error_append; + /* struct lttng_ust_lib_ring_buffer_channel is at object 0, offset 0 (hardcoded) */ + set_shmp(handle->chan, zalloc_shm(shmobj, chansize)); + assert(handle->chan._ref.index == 0); + assert(handle->chan._ref.offset == 0); + chan = shmp(handle, handle->chan); + if (!chan) + goto error_append; + chan->nr_streams = nr_streams; + + /* space for private data */ + if (priv_data_size) { + void *priv_config; + + DECLARE_SHMP(void, priv_data_alloc); + + align_shm(shmobj, priv_data_align); + chan->priv_data_offset = shmobj->allocated_len; + set_shmp(priv_data_alloc, zalloc_shm(shmobj, priv_data_size)); + if (!shmp(handle, priv_data_alloc)) + goto error_append; + priv_config = channel_get_private_config(chan); + memcpy(priv_config, priv_data_init, priv_data_size); + } else { + chan->priv_data_offset = -1; + } + + chan->u.s.blocking_timeout_ms = (int32_t) blocking_timeout_ms; + + channel_set_private(chan, priv); + + ret = channel_backend_init(&chan->backend, name, config, + subbuf_size, num_subbuf, handle, + stream_fds); + if (ret) + goto error_backend_init; + + chan->handle = handle; + chan->commit_count_mask = (~0UL >> chan->backend.num_subbuf_order); + + chan->switch_timer_interval = switch_timer_interval; + chan->read_timer_interval = read_timer_interval; + lib_ring_buffer_channel_switch_timer_start(chan); + lib_ring_buffer_channel_read_timer_start(chan); + + return handle; + +error_backend_init: +error_append: + shm_object_table_destroy(handle->table, 1); +error_table_alloc: + free(handle); + return NULL; +} + +struct lttng_ust_shm_handle *channel_handle_create(void *data, + uint64_t memory_map_size, + int wakeup_fd) +{ + struct lttng_ust_shm_handle *handle; + struct shm_object *object; + + handle = zmalloc(sizeof(struct lttng_ust_shm_handle)); + if (!handle) + return NULL; + + /* Allocate table for channel + per-cpu buffers */ + handle->table = shm_object_table_create(1 + num_possible_cpus()); + if (!handle->table) + goto error_table_alloc; + /* Add channel object */ + object = shm_object_table_append_mem(handle->table, data, + memory_map_size, wakeup_fd); + if (!object) + goto error_table_object; + /* struct lttng_ust_lib_ring_buffer_channel is at object 0, offset 0 (hardcoded) */ + handle->chan._ref.index = 0; + handle->chan._ref.offset = 0; + return handle; + +error_table_object: + shm_object_table_destroy(handle->table, 0); +error_table_alloc: + free(handle); + return NULL; +} + +int channel_handle_add_stream(struct lttng_ust_shm_handle *handle, + int shm_fd, int wakeup_fd, uint32_t stream_nr, + uint64_t memory_map_size) +{ + struct shm_object *object; + + /* Add stream object */ + object = shm_object_table_append_shm(handle->table, + shm_fd, wakeup_fd, stream_nr, + memory_map_size); + if (!object) + return -EINVAL; + return 0; +} + +unsigned int channel_handle_get_nr_streams(struct lttng_ust_shm_handle *handle) +{ + assert(handle->table); + return handle->table->allocated_len - 1; +} + +static +void channel_release(struct lttng_ust_lib_ring_buffer_channel *chan, struct lttng_ust_shm_handle *handle, + int consumer) +{ + channel_free(chan, handle, consumer); +} + +/** + * channel_destroy - Finalize, wait for q.s. and destroy channel. + * @chan: channel to destroy + * + * Holds cpu hotplug. + * Call "destroy" callback, finalize channels, decrement the channel + * reference count. Note that when readers have completed data + * consumption of finalized channels, get_subbuf() will return -ENODATA. + * They should release their handle at that point. + */ +void channel_destroy(struct lttng_ust_lib_ring_buffer_channel *chan, struct lttng_ust_shm_handle *handle, + int consumer) +{ + if (consumer) { + /* + * Note: the consumer takes care of finalizing and + * switching the buffers. + */ + channel_unregister_notifiers(chan, handle); + /* + * The consumer prints errors. + */ + channel_print_errors(chan, handle); + } + + /* + * sessiond/consumer are keeping a reference on the shm file + * descriptor directly. No need to refcount. + */ + channel_release(chan, handle, consumer); + return; +} + +struct lttng_ust_lib_ring_buffer *channel_get_ring_buffer( + const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_channel *chan, int cpu, + struct lttng_ust_shm_handle *handle, + int *shm_fd, int *wait_fd, + int *wakeup_fd, + uint64_t *memory_map_size) +{ + struct shm_ref *ref; + + if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) { + cpu = 0; + } else { + if (cpu >= num_possible_cpus()) + return NULL; + } + ref = &chan->backend.buf[cpu].shmp._ref; + *shm_fd = shm_get_shm_fd(handle, ref); + *wait_fd = shm_get_wait_fd(handle, ref); + *wakeup_fd = shm_get_wakeup_fd(handle, ref); + if (shm_get_shm_size(handle, ref, memory_map_size)) + return NULL; + return shmp(handle, chan->backend.buf[cpu].shmp); +} + +int ring_buffer_channel_close_wait_fd( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer_channel *chan __attribute__((unused)), + struct lttng_ust_shm_handle *handle) +{ + struct shm_ref *ref; + + ref = &handle->chan._ref; + return shm_close_wait_fd(handle, ref); +} + +int ring_buffer_channel_close_wakeup_fd( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer_channel *chan __attribute__((unused)), + struct lttng_ust_shm_handle *handle) +{ + struct shm_ref *ref; + + ref = &handle->chan._ref; + return shm_close_wakeup_fd(handle, ref); +} + +int ring_buffer_stream_close_wait_fd(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle, + int cpu) +{ + struct shm_ref *ref; + + if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) { + cpu = 0; + } else { + if (cpu >= num_possible_cpus()) + return -EINVAL; + } + ref = &chan->backend.buf[cpu].shmp._ref; + return shm_close_wait_fd(handle, ref); +} + +int ring_buffer_stream_close_wakeup_fd(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_shm_handle *handle, + int cpu) +{ + struct shm_ref *ref; + int ret; + + if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) { + cpu = 0; + } else { + if (cpu >= num_possible_cpus()) + return -EINVAL; + } + ref = &chan->backend.buf[cpu].shmp._ref; + pthread_mutex_lock(&wakeup_fd_mutex); + ret = shm_close_wakeup_fd(handle, ref); + pthread_mutex_unlock(&wakeup_fd_mutex); + return ret; +} + +int lib_ring_buffer_open_read(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle __attribute__((unused))) +{ + if (uatomic_cmpxchg(&buf->active_readers, 0, 1) != 0) + return -EBUSY; + cmm_smp_mb(); + return 0; +} + +void lib_ring_buffer_release_read(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_channel *chan = shmp(handle, buf->backend.chan); + + if (!chan) + return; + CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1); + cmm_smp_mb(); + uatomic_dec(&buf->active_readers); +} + +/** + * lib_ring_buffer_snapshot - save subbuffer position snapshot (for read) + * @buf: ring buffer + * @consumed: consumed count indicating the position where to read + * @produced: produced count, indicates position when to stop reading + * + * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no + * data to read at consumed position, or 0 if the get operation succeeds. + */ + +int lib_ring_buffer_snapshot(struct lttng_ust_lib_ring_buffer *buf, + unsigned long *consumed, unsigned long *produced, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; + unsigned long consumed_cur, write_offset; + int finalized; + + chan = shmp(handle, buf->backend.chan); + if (!chan) + return -EPERM; + config = &chan->backend.config; + finalized = CMM_ACCESS_ONCE(buf->finalized); + /* + * Read finalized before counters. + */ + cmm_smp_rmb(); + consumed_cur = uatomic_read(&buf->consumed); + /* + * No need to issue a memory barrier between consumed count read and + * write offset read, because consumed count can only change + * concurrently in overwrite mode, and we keep a sequence counter + * identifier derived from the write offset to check we are getting + * the same sub-buffer we are expecting (the sub-buffers are atomically + * "tagged" upon writes, tags are checked upon read). + */ + write_offset = v_read(config, &buf->offset); + + /* + * Check that we are not about to read the same subbuffer in + * which the writer head is. + */ + if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan) + == 0) + goto nodata; + + *consumed = consumed_cur; + *produced = subbuf_trunc(write_offset, chan); + + return 0; + +nodata: + /* + * The memory barriers __wait_event()/wake_up_interruptible() take care + * of "raw_spin_is_locked" memory ordering. + */ + if (finalized) + return -ENODATA; + else + return -EAGAIN; +} + +/** + * Performs the same function as lib_ring_buffer_snapshot(), but the positions + * are saved regardless of whether the consumed and produced positions are + * in the same subbuffer. + * @buf: ring buffer + * @consumed: consumed byte count indicating the last position read + * @produced: produced byte count indicating the last position written + * + * This function is meant to provide information on the exact producer and + * consumer positions without regard for the "snapshot" feature. + */ +int lib_ring_buffer_snapshot_sample_positions( + struct lttng_ust_lib_ring_buffer *buf, + unsigned long *consumed, unsigned long *produced, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; + + chan = shmp(handle, buf->backend.chan); + if (!chan) + return -EPERM; + config = &chan->backend.config; + cmm_smp_rmb(); + *consumed = uatomic_read(&buf->consumed); + /* + * No need to issue a memory barrier between consumed count read and + * write offset read, because consumed count can only change + * concurrently in overwrite mode, and we keep a sequence counter + * identifier derived from the write offset to check we are getting + * the same sub-buffer we are expecting (the sub-buffers are atomically + * "tagged" upon writes, tags are checked upon read). + */ + *produced = v_read(config, &buf->offset); + return 0; +} + +/** + * lib_ring_buffer_move_consumer - move consumed counter forward + * @buf: ring buffer + * @consumed_new: new consumed count value + */ +void lib_ring_buffer_move_consumer(struct lttng_ust_lib_ring_buffer *buf, + unsigned long consumed_new, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend; + struct lttng_ust_lib_ring_buffer_channel *chan; + unsigned long consumed; + + chan = shmp(handle, bufb->chan); + if (!chan) + return; + CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1); + + /* + * Only push the consumed value forward. + * If the consumed cmpxchg fails, this is because we have been pushed by + * the writer in flight recorder mode. + */ + consumed = uatomic_read(&buf->consumed); + while ((long) consumed - (long) consumed_new < 0) + consumed = uatomic_cmpxchg(&buf->consumed, consumed, + consumed_new); +} + +/** + * lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading + * @buf: ring buffer + * @consumed: consumed count indicating the position where to read + * + * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no + * data to read at consumed position, or 0 if the get operation succeeds. + */ +int lib_ring_buffer_get_subbuf(struct lttng_ust_lib_ring_buffer *buf, + unsigned long consumed, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; + unsigned long consumed_cur, consumed_idx, commit_count, write_offset; + int ret, finalized, nr_retry = LTTNG_UST_RING_BUFFER_GET_RETRY; + struct commit_counters_cold *cc_cold; + + chan = shmp(handle, buf->backend.chan); + if (!chan) + return -EPERM; + config = &chan->backend.config; +retry: + finalized = CMM_ACCESS_ONCE(buf->finalized); + /* + * Read finalized before counters. + */ + cmm_smp_rmb(); + consumed_cur = uatomic_read(&buf->consumed); + consumed_idx = subbuf_index(consumed, chan); + cc_cold = shmp_index(handle, buf->commit_cold, consumed_idx); + if (!cc_cold) + return -EPERM; + commit_count = v_read(config, &cc_cold->cc_sb); + /* + * Make sure we read the commit count before reading the buffer + * data and the write offset. Correct consumed offset ordering + * wrt commit count is insured by the use of cmpxchg to update + * the consumed offset. + */ + /* + * Local rmb to match the remote wmb to read the commit count + * before the buffer data and the write offset. + */ + cmm_smp_rmb(); + + write_offset = v_read(config, &buf->offset); + + /* + * Check that the buffer we are getting is after or at consumed_cur + * position. + */ + if ((long) subbuf_trunc(consumed, chan) + - (long) subbuf_trunc(consumed_cur, chan) < 0) + goto nodata; + + /* + * Check that the subbuffer we are trying to consume has been + * already fully committed. There are a few causes that can make + * this unavailability situation occur: + * + * Temporary (short-term) situation: + * - Application is running on a different CPU, between reserve + * and commit ring buffer operations, + * - Application is preempted between reserve and commit ring + * buffer operations, + * + * Long-term situation: + * - Application is stopped (SIGSTOP) between reserve and commit + * ring buffer operations. Could eventually be resumed by + * SIGCONT. + * - Application is killed (SIGTERM, SIGINT, SIGKILL) between + * reserve and commit ring buffer operation. + * + * From a consumer perspective, handling short-term + * unavailability situations is performed by retrying a few + * times after a delay. Handling long-term unavailability + * situations is handled by failing to get the sub-buffer. + * + * In all of those situations, if the application is taking a + * long time to perform its commit after ring buffer space + * reservation, we can end up in a situation where the producer + * will fill the ring buffer and try to write into the same + * sub-buffer again (which has a missing commit). This is + * handled by the producer in the sub-buffer switch handling + * code of the reserve routine by detecting unbalanced + * reserve/commit counters and discarding all further events + * until the situation is resolved in those situations. Two + * scenarios can occur: + * + * 1) The application causing the reserve/commit counters to be + * unbalanced has been terminated. In this situation, all + * further events will be discarded in the buffers, and no + * further buffer data will be readable by the consumer + * daemon. Tearing down the UST tracing session and starting + * anew is a work-around for those situations. Note that this + * only affects per-UID tracing. In per-PID tracing, the + * application vanishes with the termination, and therefore + * no more data needs to be written to the buffers. + * 2) The application causing the unbalance has been delayed for + * a long time, but will eventually try to increment the + * commit counter after eventually writing to the sub-buffer. + * This situation can cause events to be discarded until the + * application resumes its operations. + */ + if (((commit_count - chan->backend.subbuf_size) + & chan->commit_count_mask) + - (buf_trunc(consumed, chan) + >> chan->backend.num_subbuf_order) + != 0) { + if (nr_retry-- > 0) { + if (nr_retry <= (LTTNG_UST_RING_BUFFER_GET_RETRY >> 1)) + (void) poll(NULL, 0, LTTNG_UST_RING_BUFFER_RETRY_DELAY_MS); + goto retry; + } else { + goto nodata; + } + } + + /* + * Check that we are not about to read the same subbuffer in + * which the writer head is. + */ + if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed, chan) + == 0) + goto nodata; + + /* + * Failure to get the subbuffer causes a busy-loop retry without going + * to a wait queue. These are caused by short-lived race windows where + * the writer is getting access to a subbuffer we were trying to get + * access to. Also checks that the "consumed" buffer count we are + * looking for matches the one contained in the subbuffer id. + * + * The short-lived race window described here can be affected by + * application signals and preemption, thus requiring to bound + * the loop to a maximum number of retry. + */ + ret = update_read_sb_index(config, &buf->backend, &chan->backend, + consumed_idx, buf_trunc_val(consumed, chan), + handle); + if (ret) { + if (nr_retry-- > 0) { + if (nr_retry <= (LTTNG_UST_RING_BUFFER_GET_RETRY >> 1)) + (void) poll(NULL, 0, LTTNG_UST_RING_BUFFER_RETRY_DELAY_MS); + goto retry; + } else { + goto nodata; + } + } + subbuffer_id_clear_noref(config, &buf->backend.buf_rsb.id); + + buf->get_subbuf_consumed = consumed; + buf->get_subbuf = 1; + + return 0; + +nodata: + /* + * The memory barriers __wait_event()/wake_up_interruptible() take care + * of "raw_spin_is_locked" memory ordering. + */ + if (finalized) + return -ENODATA; + else + return -EAGAIN; +} + +/** + * lib_ring_buffer_put_subbuf - release exclusive subbuffer access + * @buf: ring buffer + */ +void lib_ring_buffer_put_subbuf(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend; + struct lttng_ust_lib_ring_buffer_channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; + unsigned long sb_bindex, consumed_idx, consumed; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + + chan = shmp(handle, bufb->chan); + if (!chan) + return; + config = &chan->backend.config; + CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1); + + if (!buf->get_subbuf) { + /* + * Reader puts a subbuffer it did not get. + */ + CHAN_WARN_ON(chan, 1); + return; + } + consumed = buf->get_subbuf_consumed; + buf->get_subbuf = 0; + + /* + * Clear the records_unread counter. (overruns counter) + * Can still be non-zero if a file reader simply grabbed the data + * without using iterators. + * Can be below zero if an iterator is used on a snapshot more than + * once. + */ + sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return; + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return; + v_add(config, v_read(config, &backend_pages->records_unread), + &bufb->records_read); + v_set(config, &backend_pages->records_unread, 0); + CHAN_WARN_ON(chan, config->mode == RING_BUFFER_OVERWRITE + && subbuffer_id_is_noref(config, bufb->buf_rsb.id)); + subbuffer_id_set_noref(config, &bufb->buf_rsb.id); + + /* + * Exchange the reader subbuffer with the one we put in its place in the + * writer subbuffer table. Expect the original consumed count. If + * update_read_sb_index fails, this is because the writer updated the + * subbuffer concurrently. We should therefore keep the subbuffer we + * currently have: it has become invalid to try reading this sub-buffer + * consumed count value anyway. + */ + consumed_idx = subbuf_index(consumed, chan); + update_read_sb_index(config, &buf->backend, &chan->backend, + consumed_idx, buf_trunc_val(consumed, chan), + handle); + /* + * update_read_sb_index return value ignored. Don't exchange sub-buffer + * if the writer concurrently updated it. + */ +} + +/* + * cons_offset is an iterator on all subbuffer offsets between the reader + * position and the writer position. (inclusive) + */ +static +void lib_ring_buffer_print_subbuffer_errors(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + unsigned long cons_offset, + int cpu, + struct lttng_ust_shm_handle *handle) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + unsigned long cons_idx, commit_count, commit_count_sb; + struct commit_counters_hot *cc_hot; + struct commit_counters_cold *cc_cold; + + cons_idx = subbuf_index(cons_offset, chan); + cc_hot = shmp_index(handle, buf->commit_hot, cons_idx); + if (!cc_hot) + return; + cc_cold = shmp_index(handle, buf->commit_cold, cons_idx); + if (!cc_cold) + return; + commit_count = v_read(config, &cc_hot->cc); + commit_count_sb = v_read(config, &cc_cold->cc_sb); + + if (subbuf_offset(commit_count, chan) != 0) + DBG("ring buffer %s, cpu %d: " + "commit count in subbuffer %lu,\n" + "expecting multiples of %lu bytes\n" + " [ %lu bytes committed, %lu bytes reader-visible ]\n", + chan->backend.name, cpu, cons_idx, + chan->backend.subbuf_size, + commit_count, commit_count_sb); + + DBG("ring buffer: %s, cpu %d: %lu bytes committed\n", + chan->backend.name, cpu, commit_count); +} + +static +void lib_ring_buffer_print_buffer_errors(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + int cpu, struct lttng_ust_shm_handle *handle) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + unsigned long write_offset, cons_offset; + + /* + * No need to order commit_count, write_offset and cons_offset reads + * because we execute at teardown when no more writer nor reader + * references are left. + */ + write_offset = v_read(config, &buf->offset); + cons_offset = uatomic_read(&buf->consumed); + if (write_offset != cons_offset) + DBG("ring buffer %s, cpu %d: " + "non-consumed data\n" + " [ %lu bytes written, %lu bytes read ]\n", + chan->backend.name, cpu, write_offset, cons_offset); + + for (cons_offset = uatomic_read(&buf->consumed); + (long) (subbuf_trunc((unsigned long) v_read(config, &buf->offset), + chan) + - cons_offset) > 0; + cons_offset = subbuf_align(cons_offset, chan)) + lib_ring_buffer_print_subbuffer_errors(buf, chan, cons_offset, + cpu, handle); +} + +static +void lib_ring_buffer_print_errors(struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_lib_ring_buffer *buf, int cpu, + struct lttng_ust_shm_handle *handle) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + + if (!strcmp(chan->backend.name, "relay-metadata-mmap")) { + DBG("ring buffer %s: %lu records written, " + "%lu records overrun\n", + chan->backend.name, + v_read(config, &buf->records_count), + v_read(config, &buf->records_overrun)); + } else { + DBG("ring buffer %s, cpu %d: %lu records written, " + "%lu records overrun\n", + chan->backend.name, cpu, + v_read(config, &buf->records_count), + v_read(config, &buf->records_overrun)); + + if (v_read(config, &buf->records_lost_full) + || v_read(config, &buf->records_lost_wrap) + || v_read(config, &buf->records_lost_big)) + DBG("ring buffer %s, cpu %d: records were lost. Caused by:\n" + " [ %lu buffer full, %lu nest buffer wrap-around, " + "%lu event too big ]\n", + chan->backend.name, cpu, + v_read(config, &buf->records_lost_full), + v_read(config, &buf->records_lost_wrap), + v_read(config, &buf->records_lost_big)); + } + lib_ring_buffer_print_buffer_errors(buf, chan, cpu, handle); +} + +/* + * lib_ring_buffer_switch_old_start: Populate old subbuffer header. + * + * Only executed by SWITCH_FLUSH, which can be issued while tracing is + * active or at buffer finalization (destroy). + */ +static +void lib_ring_buffer_switch_old_start(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct switch_offsets *offsets, + uint64_t tsc, + struct lttng_ust_shm_handle *handle) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + unsigned long oldidx = subbuf_index(offsets->old, chan); + unsigned long commit_count; + struct commit_counters_hot *cc_hot; + + config->cb.buffer_begin(buf, tsc, oldidx, handle); + + /* + * Order all writes to buffer before the commit count update that will + * determine that the subbuffer is full. + */ + cmm_smp_wmb(); + cc_hot = shmp_index(handle, buf->commit_hot, oldidx); + if (!cc_hot) + return; + v_add(config, config->cb.subbuffer_header_size(), + &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); + /* Check if the written buffer has to be delivered */ + lib_ring_buffer_check_deliver(config, buf, chan, offsets->old, + commit_count, oldidx, handle, tsc); + lib_ring_buffer_write_commit_counter(config, buf, chan, + offsets->old + config->cb.subbuffer_header_size(), + commit_count, handle, cc_hot); +} + +/* + * lib_ring_buffer_switch_old_end: switch old subbuffer + * + * Note : offset_old should never be 0 here. It is ok, because we never perform + * buffer switch on an empty subbuffer in SWITCH_ACTIVE mode. The caller + * increments the offset_old value when doing a SWITCH_FLUSH on an empty + * subbuffer. + */ +static +void lib_ring_buffer_switch_old_end(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct switch_offsets *offsets, + uint64_t tsc, + struct lttng_ust_shm_handle *handle) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + unsigned long oldidx = subbuf_index(offsets->old - 1, chan); + unsigned long commit_count, padding_size, data_size; + struct commit_counters_hot *cc_hot; + uint64_t *ts_end; + + data_size = subbuf_offset(offsets->old - 1, chan) + 1; + padding_size = chan->backend.subbuf_size - data_size; + subbuffer_set_data_size(config, &buf->backend, oldidx, data_size, + handle); + + ts_end = shmp_index(handle, buf->ts_end, oldidx); + if (!ts_end) + return; + /* + * This is the last space reservation in that sub-buffer before + * it gets delivered. This provides exclusive access to write to + * this sub-buffer's ts_end. There are also no concurrent + * readers of that ts_end because delivery of that sub-buffer is + * postponed until the commit counter is incremented for the + * current space reservation. + */ + *ts_end = tsc; + + /* + * Order all writes to buffer and store to ts_end before the commit + * count update that will determine that the subbuffer is full. + */ + cmm_smp_wmb(); + cc_hot = shmp_index(handle, buf->commit_hot, oldidx); + if (!cc_hot) + return; + v_add(config, padding_size, &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); + lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1, + commit_count, oldidx, handle, tsc); + lib_ring_buffer_write_commit_counter(config, buf, chan, + offsets->old + padding_size, commit_count, handle, + cc_hot); +} + +/* + * lib_ring_buffer_switch_new_start: Populate new subbuffer. + * + * This code can be executed unordered : writers may already have written to the + * sub-buffer before this code gets executed, caution. The commit makes sure + * that this code is executed before the deliver of this sub-buffer. + */ +static +void lib_ring_buffer_switch_new_start(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct switch_offsets *offsets, + uint64_t tsc, + struct lttng_ust_shm_handle *handle) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + unsigned long beginidx = subbuf_index(offsets->begin, chan); + unsigned long commit_count; + struct commit_counters_hot *cc_hot; + + config->cb.buffer_begin(buf, tsc, beginidx, handle); + + /* + * Order all writes to buffer before the commit count update that will + * determine that the subbuffer is full. + */ + cmm_smp_wmb(); + cc_hot = shmp_index(handle, buf->commit_hot, beginidx); + if (!cc_hot) + return; + v_add(config, config->cb.subbuffer_header_size(), &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); + /* Check if the written buffer has to be delivered */ + lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin, + commit_count, beginidx, handle, tsc); + lib_ring_buffer_write_commit_counter(config, buf, chan, + offsets->begin + config->cb.subbuffer_header_size(), + commit_count, handle, cc_hot); +} + +/* + * lib_ring_buffer_switch_new_end: finish switching current subbuffer + * + * Calls subbuffer_set_data_size() to set the data size of the current + * sub-buffer. We do not need to perform check_deliver nor commit here, + * since this task will be done by the "commit" of the event for which + * we are currently doing the space reservation. + */ +static +void lib_ring_buffer_switch_new_end(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct switch_offsets *offsets, + uint64_t tsc, + struct lttng_ust_shm_handle *handle) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + unsigned long endidx, data_size; + uint64_t *ts_end; + + endidx = subbuf_index(offsets->end - 1, chan); + data_size = subbuf_offset(offsets->end - 1, chan) + 1; + subbuffer_set_data_size(config, &buf->backend, endidx, data_size, + handle); + ts_end = shmp_index(handle, buf->ts_end, endidx); + if (!ts_end) + return; + /* + * This is the last space reservation in that sub-buffer before + * it gets delivered. This provides exclusive access to write to + * this sub-buffer's ts_end. There are also no concurrent + * readers of that ts_end because delivery of that sub-buffer is + * postponed until the commit counter is incremented for the + * current space reservation. + */ + *ts_end = tsc; +} + +/* + * Returns : + * 0 if ok + * !0 if execution must be aborted. + */ +static +int lib_ring_buffer_try_switch_slow(enum switch_mode mode, + struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct switch_offsets *offsets, + uint64_t *tsc, + struct lttng_ust_shm_handle *handle) +{ + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + unsigned long off, reserve_commit_diff; + + offsets->begin = v_read(config, &buf->offset); + offsets->old = offsets->begin; + offsets->switch_old_start = 0; + off = subbuf_offset(offsets->begin, chan); + + *tsc = config->cb.ring_buffer_clock_read(chan); + + /* + * Ensure we flush the header of an empty subbuffer when doing the + * finalize (SWITCH_FLUSH). This ensures that we end up knowing the + * total data gathering duration even if there were no records saved + * after the last buffer switch. + * In SWITCH_ACTIVE mode, switch the buffer when it contains events. + * SWITCH_ACTIVE only flushes the current subbuffer, dealing with end of + * subbuffer header as appropriate. + * The next record that reserves space will be responsible for + * populating the following subbuffer header. We choose not to populate + * the next subbuffer header here because we want to be able to use + * SWITCH_ACTIVE for periodical buffer flush, which must + * guarantee that all the buffer content (records and header + * timestamps) are visible to the reader. This is required for + * quiescence guarantees for the fusion merge. + */ + if (mode != SWITCH_FLUSH && !off) + return -1; /* we do not have to switch : buffer is empty */ + + if (caa_unlikely(off == 0)) { + unsigned long sb_index, commit_count; + struct commit_counters_cold *cc_cold; + + /* + * We are performing a SWITCH_FLUSH. There may be concurrent + * writes into the buffer if e.g. invoked while performing a + * snapshot on an active trace. + * + * If the client does not save any header information + * (sub-buffer header size == 0), don't switch empty subbuffer + * on finalize, because it is invalid to deliver a completely + * empty subbuffer. + */ + if (!config->cb.subbuffer_header_size()) + return -1; + + /* Test new buffer integrity */ + sb_index = subbuf_index(offsets->begin, chan); + cc_cold = shmp_index(handle, buf->commit_cold, sb_index); + if (!cc_cold) + return -1; + commit_count = v_read(config, &cc_cold->cc_sb); + reserve_commit_diff = + (buf_trunc(offsets->begin, chan) + >> chan->backend.num_subbuf_order) + - (commit_count & chan->commit_count_mask); + if (caa_likely(reserve_commit_diff == 0)) { + /* Next subbuffer not being written to. */ + if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE && + subbuf_trunc(offsets->begin, chan) + - subbuf_trunc((unsigned long) + uatomic_read(&buf->consumed), chan) + >= chan->backend.buf_size)) { + /* + * We do not overwrite non consumed buffers + * and we are full : don't switch. + */ + return -1; + } else { + /* + * Next subbuffer not being written to, and we + * are either in overwrite mode or the buffer is + * not full. It's safe to write in this new + * subbuffer. + */ + } + } else { + /* + * Next subbuffer reserve offset does not match the + * commit offset. Don't perform switch in + * producer-consumer and overwrite mode. Caused by + * either a writer OOPS or too many nested writes over a + * reserve/commit pair. + */ + return -1; + } + + /* + * Need to write the subbuffer start header on finalize. + */ + offsets->switch_old_start = 1; + } + offsets->begin = subbuf_align(offsets->begin, chan); + /* Note: old points to the next subbuf at offset 0 */ + offsets->end = offsets->begin; + return 0; +} + +/* + * Force a sub-buffer switch. This operation is completely reentrant : can be + * called while tracing is active with absolutely no lock held. + * + * For RING_BUFFER_SYNC_PER_CPU ring buffers, as a v_cmpxchg is used for + * some atomic operations, this function must be called from the CPU + * which owns the buffer for a ACTIVE flush. However, for + * RING_BUFFER_SYNC_GLOBAL ring buffers, this function can be called + * from any CPU. + */ +void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, enum switch_mode mode, + struct lttng_ust_shm_handle *handle) +{ + struct lttng_ust_lib_ring_buffer_channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; + struct switch_offsets offsets; + unsigned long oldidx; + uint64_t tsc; + + chan = shmp(handle, buf->backend.chan); + if (!chan) + return; + config = &chan->backend.config; + + offsets.size = 0; + + /* + * Perform retryable operations. + */ + do { + if (lib_ring_buffer_try_switch_slow(mode, buf, chan, &offsets, + &tsc, handle)) + return; /* Switch not needed */ + } while (v_cmpxchg(config, &buf->offset, offsets.old, offsets.end) + != offsets.old); + + /* + * Atomically update last_tsc. This update races against concurrent + * atomic updates, but the race will always cause supplementary full TSC + * records, never the opposite (missing a full TSC record when it would + * be needed). + */ + save_last_tsc(config, buf, tsc); + + /* + * Push the reader if necessary + */ + lib_ring_buffer_reserve_push_reader(buf, chan, offsets.old); + + oldidx = subbuf_index(offsets.old, chan); + lib_ring_buffer_clear_noref(config, &buf->backend, oldidx, handle); + + /* + * May need to populate header start on SWITCH_FLUSH. + */ + if (offsets.switch_old_start) { + lib_ring_buffer_switch_old_start(buf, chan, &offsets, tsc, handle); + offsets.old += config->cb.subbuffer_header_size(); + } + + /* + * Switch old subbuffer. + */ + lib_ring_buffer_switch_old_end(buf, chan, &offsets, tsc, handle); +} + +static +bool handle_blocking_retry(int *timeout_left_ms) +{ + int timeout = *timeout_left_ms, delay; + + if (caa_likely(!timeout)) + return false; /* Do not retry, discard event. */ + if (timeout < 0) /* Wait forever. */ + delay = RETRY_DELAY_MS; + else + delay = min_t(int, timeout, RETRY_DELAY_MS); + (void) poll(NULL, 0, delay); + if (timeout > 0) + *timeout_left_ms -= delay; + return true; /* Retry. */ +} + +/* + * Returns : + * 0 if ok + * -ENOSPC if event size is too large for packet. + * -ENOBUFS if there is currently not enough space in buffer for the event. + * -EIO if data cannot be written into the buffer for any other reason. + */ +static +int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct switch_offsets *offsets, + struct lttng_ust_lib_ring_buffer_ctx *ctx, + void *client_ctx) +{ + struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + struct lttng_ust_shm_handle *handle = chan->handle; + unsigned long reserve_commit_diff, offset_cmp; + int timeout_left_ms = lttng_ust_ringbuffer_get_timeout(chan); + +retry: + offsets->begin = offset_cmp = v_read(config, &buf->offset); + offsets->old = offsets->begin; + offsets->switch_new_start = 0; + offsets->switch_new_end = 0; + offsets->switch_old_end = 0; + offsets->pre_header_padding = 0; + + ctx_private->tsc = config->cb.ring_buffer_clock_read(chan); + if ((int64_t) ctx_private->tsc == -EIO) + return -EIO; + + if (last_tsc_overflow(config, buf, ctx_private->tsc)) + ctx_private->rflags |= RING_BUFFER_RFLAG_FULL_TSC; + + if (caa_unlikely(subbuf_offset(offsets->begin, chan) == 0)) { + offsets->switch_new_start = 1; /* For offsets->begin */ + } else { + offsets->size = config->cb.record_header_size(config, chan, + offsets->begin, + &offsets->pre_header_padding, + ctx, client_ctx); + offsets->size += + lttng_ust_lib_ring_buffer_align(offsets->begin + offsets->size, + ctx->largest_align) + + ctx->data_size; + if (caa_unlikely(subbuf_offset(offsets->begin, chan) + + offsets->size > chan->backend.subbuf_size)) { + offsets->switch_old_end = 1; /* For offsets->old */ + offsets->switch_new_start = 1; /* For offsets->begin */ + } + } + if (caa_unlikely(offsets->switch_new_start)) { + unsigned long sb_index, commit_count; + struct commit_counters_cold *cc_cold; + + /* + * We are typically not filling the previous buffer completely. + */ + if (caa_likely(offsets->switch_old_end)) + offsets->begin = subbuf_align(offsets->begin, chan); + offsets->begin = offsets->begin + + config->cb.subbuffer_header_size(); + /* Test new buffer integrity */ + sb_index = subbuf_index(offsets->begin, chan); + /* + * Read buf->offset before buf->commit_cold[sb_index].cc_sb. + * lib_ring_buffer_check_deliver() has the matching + * memory barriers required around commit_cold cc_sb + * updates to ensure reserve and commit counter updates + * are not seen reordered when updated by another CPU. + */ + cmm_smp_rmb(); + cc_cold = shmp_index(handle, buf->commit_cold, sb_index); + if (!cc_cold) + return -1; + commit_count = v_read(config, &cc_cold->cc_sb); + /* Read buf->commit_cold[sb_index].cc_sb before buf->offset. */ + cmm_smp_rmb(); + if (caa_unlikely(offset_cmp != v_read(config, &buf->offset))) { + /* + * The reserve counter have been concurrently updated + * while we read the commit counter. This means the + * commit counter we read might not match buf->offset + * due to concurrent update. We therefore need to retry. + */ + goto retry; + } + reserve_commit_diff = + (buf_trunc(offsets->begin, chan) + >> chan->backend.num_subbuf_order) + - (commit_count & chan->commit_count_mask); + if (caa_likely(reserve_commit_diff == 0)) { + /* Next subbuffer not being written to. */ + if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE && + subbuf_trunc(offsets->begin, chan) + - subbuf_trunc((unsigned long) + uatomic_read(&buf->consumed), chan) + >= chan->backend.buf_size)) { + unsigned long nr_lost; + + if (handle_blocking_retry(&timeout_left_ms)) + goto retry; + + /* + * We do not overwrite non consumed buffers + * and we are full : record is lost. + */ + nr_lost = v_read(config, &buf->records_lost_full); + v_inc(config, &buf->records_lost_full); + if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) { + DBG("%lu or more records lost in (%s:%d) (buffer full)\n", + nr_lost + 1, chan->backend.name, + buf->backend.cpu); + } + return -ENOBUFS; + } else { + /* + * Next subbuffer not being written to, and we + * are either in overwrite mode or the buffer is + * not full. It's safe to write in this new + * subbuffer. + */ + } + } else { + unsigned long nr_lost; + + /* + * Next subbuffer reserve offset does not match the + * commit offset, and this did not involve update to the + * reserve counter. Drop record in producer-consumer and + * overwrite mode. Caused by either a writer OOPS or too + * many nested writes over a reserve/commit pair. + */ + nr_lost = v_read(config, &buf->records_lost_wrap); + v_inc(config, &buf->records_lost_wrap); + if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) { + DBG("%lu or more records lost in (%s:%d) (wrap-around)\n", + nr_lost + 1, chan->backend.name, + buf->backend.cpu); + } + return -EIO; + } + offsets->size = + config->cb.record_header_size(config, chan, + offsets->begin, + &offsets->pre_header_padding, + ctx, client_ctx); + offsets->size += + lttng_ust_lib_ring_buffer_align(offsets->begin + offsets->size, + ctx->largest_align) + + ctx->data_size; + if (caa_unlikely(subbuf_offset(offsets->begin, chan) + + offsets->size > chan->backend.subbuf_size)) { + unsigned long nr_lost; + + /* + * Record too big for subbuffers, report error, don't + * complete the sub-buffer switch. + */ + nr_lost = v_read(config, &buf->records_lost_big); + v_inc(config, &buf->records_lost_big); + if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) { + DBG("%lu or more records lost in (%s:%d) record size " + " of %zu bytes is too large for buffer\n", + nr_lost + 1, chan->backend.name, + buf->backend.cpu, offsets->size); + } + return -ENOSPC; + } else { + /* + * We just made a successful buffer switch and the + * record fits in the new subbuffer. Let's write. + */ + } + } else { + /* + * Record fits in the current buffer and we are not on a switch + * boundary. It's safe to write. + */ + } + offsets->end = offsets->begin + offsets->size; + + if (caa_unlikely(subbuf_offset(offsets->end, chan) == 0)) { + /* + * The offset_end will fall at the very beginning of the next + * subbuffer. + */ + offsets->switch_new_end = 1; /* For offsets->begin */ + } + return 0; +} + +/** + * lib_ring_buffer_reserve_slow - Atomic slot reservation in a buffer. + * @ctx: ring buffer context. + * + * Return : -NOBUFS if not enough space, -ENOSPC if event size too large, + * -EIO for other errors, else returns 0. + * It will take care of sub-buffer switching. + */ +int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx, + void *client_ctx) +{ + struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; + struct lttng_ust_lib_ring_buffer_channel *chan = ctx_private->chan; + struct lttng_ust_shm_handle *handle = chan->handle; + const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + struct lttng_ust_lib_ring_buffer *buf; + struct switch_offsets offsets; + int ret; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) + buf = shmp(handle, chan->backend.buf[ctx_private->reserve_cpu].shmp); + else + buf = shmp(handle, chan->backend.buf[0].shmp); + if (!buf) + return -EIO; + ctx_private->buf = buf; + + offsets.size = 0; + + do { + ret = lib_ring_buffer_try_reserve_slow(buf, chan, &offsets, + ctx, client_ctx); + if (caa_unlikely(ret)) + return ret; + } while (caa_unlikely(v_cmpxchg(config, &buf->offset, offsets.old, + offsets.end) + != offsets.old)); + + /* + * Atomically update last_tsc. This update races against concurrent + * atomic updates, but the race will always cause supplementary full TSC + * records, never the opposite (missing a full TSC record when it would + * be needed). + */ + save_last_tsc(config, buf, ctx_private->tsc); + + /* + * Push the reader if necessary + */ + lib_ring_buffer_reserve_push_reader(buf, chan, offsets.end - 1); + + /* + * Clear noref flag for this subbuffer. + */ + lib_ring_buffer_clear_noref(config, &buf->backend, + subbuf_index(offsets.end - 1, chan), + handle); + + /* + * Switch old subbuffer if needed. + */ + if (caa_unlikely(offsets.switch_old_end)) { + lib_ring_buffer_clear_noref(config, &buf->backend, + subbuf_index(offsets.old - 1, chan), + handle); + lib_ring_buffer_switch_old_end(buf, chan, &offsets, ctx_private->tsc, handle); + } + + /* + * Populate new subbuffer. + */ + if (caa_unlikely(offsets.switch_new_start)) + lib_ring_buffer_switch_new_start(buf, chan, &offsets, ctx_private->tsc, handle); + + if (caa_unlikely(offsets.switch_new_end)) + lib_ring_buffer_switch_new_end(buf, chan, &offsets, ctx_private->tsc, handle); + + ctx_private->slot_size = offsets.size; + ctx_private->pre_offset = offsets.begin; + ctx_private->buf_offset = offsets.begin + offsets.pre_header_padding; + return 0; +} + +static +void lib_ring_buffer_vmcore_check_deliver(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + unsigned long commit_count, + unsigned long idx, + struct lttng_ust_shm_handle *handle) +{ + struct commit_counters_hot *cc_hot; + + if (config->oops != RING_BUFFER_OOPS_CONSISTENCY) + return; + cc_hot = shmp_index(handle, buf->commit_hot, idx); + if (!cc_hot) + return; + v_set(config, &cc_hot->seq, commit_count); +} + +/* + * The ring buffer can count events recorded and overwritten per buffer, + * but it is disabled by default due to its performance overhead. + */ +#ifdef LTTNG_RING_BUFFER_COUNT_EVENTS +static +void deliver_count_events(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + unsigned long idx, + struct lttng_ust_shm_handle *handle) +{ + v_add(config, subbuffer_get_records_count(config, + &buf->backend, idx, handle), + &buf->records_count); + v_add(config, subbuffer_count_records_overrun(config, + &buf->backend, idx, handle), + &buf->records_overrun); +} +#else /* LTTNG_RING_BUFFER_COUNT_EVENTS */ +static +void deliver_count_events( + const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), + struct lttng_ust_lib_ring_buffer *buf __attribute__((unused)), + unsigned long idx __attribute__((unused)), + struct lttng_ust_shm_handle *handle __attribute__((unused))) +{ +} +#endif /* #else LTTNG_RING_BUFFER_COUNT_EVENTS */ + +void lib_ring_buffer_check_deliver_slow(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_lib_ring_buffer_channel *chan, + unsigned long offset, + unsigned long commit_count, + unsigned long idx, + struct lttng_ust_shm_handle *handle, + uint64_t tsc __attribute__((unused))) +{ + unsigned long old_commit_count = commit_count + - chan->backend.subbuf_size; + struct commit_counters_cold *cc_cold; + + /* + * If we succeeded at updating cc_sb below, we are the subbuffer + * writer delivering the subbuffer. Deals with concurrent + * updates of the "cc" value without adding a add_return atomic + * operation to the fast path. + * + * We are doing the delivery in two steps: + * - First, we cmpxchg() cc_sb to the new value + * old_commit_count + 1. This ensures that we are the only + * subbuffer user successfully filling the subbuffer, but we + * do _not_ set the cc_sb value to "commit_count" yet. + * Therefore, other writers that would wrap around the ring + * buffer and try to start writing to our subbuffer would + * have to drop records, because it would appear as + * non-filled. + * We therefore have exclusive access to the subbuffer control + * structures. This mutual exclusion with other writers is + * crucially important to perform record overruns count in + * flight recorder mode locklessly. + * - When we are ready to release the subbuffer (either for + * reading or for overrun by other writers), we simply set the + * cc_sb value to "commit_count" and perform delivery. + * + * The subbuffer size is least 2 bytes (minimum size: 1 page). + * This guarantees that old_commit_count + 1 != commit_count. + */ + + /* + * Order prior updates to reserve count prior to the + * commit_cold cc_sb update. + */ + cmm_smp_wmb(); + cc_cold = shmp_index(handle, buf->commit_cold, idx); + if (!cc_cold) + return; + if (caa_likely(v_cmpxchg(config, &cc_cold->cc_sb, + old_commit_count, old_commit_count + 1) + == old_commit_count)) { + uint64_t *ts_end; + + /* + * Start of exclusive subbuffer access. We are + * guaranteed to be the last writer in this subbuffer + * and any other writer trying to access this subbuffer + * in this state is required to drop records. + * + * We can read the ts_end for the current sub-buffer + * which has been saved by the very last space + * reservation for the current sub-buffer. + * + * Order increment of commit counter before reading ts_end. + */ + cmm_smp_mb(); + ts_end = shmp_index(handle, buf->ts_end, idx); + if (!ts_end) + return; + deliver_count_events(config, buf, idx, handle); + config->cb.buffer_end(buf, *ts_end, idx, + lib_ring_buffer_get_data_size(config, + buf, + idx, + handle), + handle); + + /* + * Increment the packet counter while we have exclusive + * access. + */ + subbuffer_inc_packet_count(config, &buf->backend, idx, handle); + + /* + * Set noref flag and offset for this subbuffer id. + * Contains a memory barrier that ensures counter stores + * are ordered before set noref and offset. + */ + lib_ring_buffer_set_noref_offset(config, &buf->backend, idx, + buf_trunc_val(offset, chan), handle); + + /* + * Order set_noref and record counter updates before the + * end of subbuffer exclusive access. Orders with + * respect to writers coming into the subbuffer after + * wrap around, and also order wrt concurrent readers. + */ + cmm_smp_mb(); + /* End of exclusive subbuffer access */ + v_set(config, &cc_cold->cc_sb, commit_count); + /* + * Order later updates to reserve count after + * the commit cold cc_sb update. + */ + cmm_smp_wmb(); + lib_ring_buffer_vmcore_check_deliver(config, buf, + commit_count, idx, handle); + + /* + * RING_BUFFER_WAKEUP_BY_WRITER wakeup is not lock-free. + */ + if (config->wakeup == RING_BUFFER_WAKEUP_BY_WRITER + && uatomic_read(&buf->active_readers) + && lib_ring_buffer_poll_deliver(config, buf, chan, handle)) { + lib_ring_buffer_wakeup(buf, handle); + } + } +} + +/* + * Force a read (imply TLS fixup for dlopen) of TLS variables. + */ +void lttng_fixup_ringbuffer_tls(void) +{ + asm volatile ("" : : "m" (URCU_TLS(lib_ring_buffer_nesting))); +} + +void lib_ringbuffer_signal_init(void) +{ + sigset_t mask; + int ret; + + /* + * Block signal for entire process, so only our thread processes + * it. + */ + rb_setmask(&mask); + ret = pthread_sigmask(SIG_BLOCK, &mask, NULL); + if (ret) { + errno = ret; + PERROR("pthread_sigmask"); + } +} diff --git a/src/common/ringbuffer/ringbuffer-config.h b/src/common/ringbuffer/ringbuffer-config.h new file mode 100644 index 00000000..2e10de03 --- /dev/null +++ b/src/common/ringbuffer/ringbuffer-config.h @@ -0,0 +1,242 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright (C) 2010-2021 Mathieu Desnoyers + * + * Ring buffer configuration header. Note: after declaring the standard inline + * functions, clients should also include linux/ringbuffer/api.h. + */ + +#ifndef _LTTNG_RING_BUFFER_CONFIG_H +#define _LTTNG_RING_BUFFER_CONFIG_H + +#include +#include +#include +#include +#include + +#include +#include +#include + +struct lttng_ust_lib_ring_buffer; +struct lttng_ust_lib_ring_buffer_channel; +struct lttng_ust_lib_ring_buffer_config; +struct lttng_ust_lib_ring_buffer_ctx_private; +struct lttng_ust_shm_handle; + +/* + * Ring buffer client callbacks. Only used by slow path, never on fast path. + * For the fast path, record_header_size(), ring_buffer_clock_read() should be + * provided as inline functions too. These may simply return 0 if not used by + * the client. + */ +struct lttng_ust_lib_ring_buffer_client_cb { + /* Mandatory callbacks */ + + /* A static inline version is also required for fast path */ + uint64_t (*ring_buffer_clock_read) (struct lttng_ust_lib_ring_buffer_channel *chan); + size_t (*record_header_size) (const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_channel *chan, + size_t offset, + size_t *pre_header_padding, + struct lttng_ust_lib_ring_buffer_ctx *ctx, + void *client_ctx); + + /* Slow path only, at subbuffer switch */ + size_t (*subbuffer_header_size) (void); + void (*buffer_begin) (struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc, + unsigned int subbuf_idx, + struct lttng_ust_shm_handle *handle); + void (*buffer_end) (struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc, + unsigned int subbuf_idx, unsigned long data_size, + struct lttng_ust_shm_handle *handle); + + /* Optional callbacks (can be set to NULL) */ + + /* Called at buffer creation/finalize */ + int (*buffer_create) (struct lttng_ust_lib_ring_buffer *buf, void *priv, + int cpu, const char *name, + struct lttng_ust_shm_handle *handle); + /* + * Clients should guarantee that no new reader handle can be opened + * after finalize. + */ + void (*buffer_finalize) (struct lttng_ust_lib_ring_buffer *buf, + void *priv, int cpu, + struct lttng_ust_shm_handle *handle); + + /* + * Extract header length, payload length and timestamp from event + * record. Used by buffer iterators. Timestamp is only used by channel + * iterator. + */ + void (*record_get) (const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer_channel *chan, + struct lttng_ust_lib_ring_buffer *buf, + size_t offset, size_t *header_len, + size_t *payload_len, uint64_t *timestamp, + struct lttng_ust_shm_handle *handle); + /* + * Offset and size of content size field in client. + */ + void (*content_size_field) (const struct lttng_ust_lib_ring_buffer_config *config, + size_t *offset, size_t *length); + void (*packet_size_field) (const struct lttng_ust_lib_ring_buffer_config *config, + size_t *offset, size_t *length); +}; + +/* + * Ring buffer instance configuration. + * + * Declare as "static const" within the client object to ensure the inline fast + * paths can be optimized. + * + * alloc/sync pairs: + * + * RING_BUFFER_ALLOC_PER_CPU and RING_BUFFER_SYNC_PER_CPU : + * Per-cpu buffers with per-cpu synchronization. + * + * RING_BUFFER_ALLOC_PER_CPU and RING_BUFFER_SYNC_GLOBAL : + * Per-cpu buffer with global synchronization. Tracing can be performed with + * preemption enabled, statistically stays on the local buffers. + * + * RING_BUFFER_ALLOC_GLOBAL and RING_BUFFER_SYNC_PER_CPU : + * Should only be used for buffers belonging to a single thread or protected + * by mutual exclusion by the client. Note that periodical sub-buffer switch + * should be disabled in this kind of configuration. + * + * RING_BUFFER_ALLOC_GLOBAL and RING_BUFFER_SYNC_GLOBAL : + * Global shared buffer with global synchronization. + * + * wakeup: + * + * RING_BUFFER_WAKEUP_BY_TIMER uses per-cpu deferrable timers to poll the + * buffers and wake up readers if data is ready. Mainly useful for tracers which + * don't want to call into the wakeup code on the tracing path. Use in + * combination with "read_timer_interval" channel_create() argument. + * + * RING_BUFFER_WAKEUP_BY_WRITER directly wakes up readers when a subbuffer is + * ready to read. Lower latencies before the reader is woken up. Mainly suitable + * for drivers. + * + * RING_BUFFER_WAKEUP_NONE does not perform any wakeup whatsoever. The client + * has the responsibility to perform wakeups. + */ +#define LTTNG_UST_RING_BUFFER_CONFIG_PADDING 20 + +enum lttng_ust_lib_ring_buffer_alloc_types { + RING_BUFFER_ALLOC_PER_CPU, + RING_BUFFER_ALLOC_GLOBAL, +}; + +enum lttng_ust_lib_ring_buffer_sync_types { + RING_BUFFER_SYNC_PER_CPU, /* Wait-free */ + RING_BUFFER_SYNC_GLOBAL, /* Lock-free */ +}; + +enum lttng_ust_lib_ring_buffer_mode_types { + RING_BUFFER_OVERWRITE = 0, /* Overwrite when buffer full */ + RING_BUFFER_DISCARD = 1, /* Discard when buffer full */ +}; + +enum lttng_ust_lib_ring_buffer_output_types { + RING_BUFFER_SPLICE, + RING_BUFFER_MMAP, + RING_BUFFER_READ, /* TODO */ + RING_BUFFER_ITERATOR, + RING_BUFFER_NONE, +}; + +enum lttng_ust_lib_ring_buffer_backend_types { + RING_BUFFER_PAGE, + RING_BUFFER_VMAP, /* TODO */ + RING_BUFFER_STATIC, /* TODO */ +}; + +enum lttng_ust_lib_ring_buffer_oops_types { + RING_BUFFER_NO_OOPS_CONSISTENCY, + RING_BUFFER_OOPS_CONSISTENCY, +}; + +enum lttng_ust_lib_ring_buffer_ipi_types { + RING_BUFFER_IPI_BARRIER, + RING_BUFFER_NO_IPI_BARRIER, +}; + +enum lttng_ust_lib_ring_buffer_wakeup_types { + RING_BUFFER_WAKEUP_BY_TIMER, /* wake up performed by timer */ + RING_BUFFER_WAKEUP_BY_WRITER, /* + * writer wakes up reader, + * not lock-free + * (takes spinlock). + */ +}; + +struct lttng_ust_lib_ring_buffer_config { + enum lttng_ust_lib_ring_buffer_alloc_types alloc; + enum lttng_ust_lib_ring_buffer_sync_types sync; + enum lttng_ust_lib_ring_buffer_mode_types mode; + enum lttng_ust_lib_ring_buffer_output_types output; + enum lttng_ust_lib_ring_buffer_backend_types backend; + enum lttng_ust_lib_ring_buffer_oops_types oops; + enum lttng_ust_lib_ring_buffer_ipi_types ipi; + enum lttng_ust_lib_ring_buffer_wakeup_types wakeup; + /* + * tsc_bits: timestamp bits saved at each record. + * 0 and 64 disable the timestamp compression scheme. + */ + unsigned int tsc_bits; + struct lttng_ust_lib_ring_buffer_client_cb cb; + /* + * client_type is used by the consumer process (which is in a + * different address space) to lookup the appropriate client + * callbacks and update the cb pointers. + */ + int client_type; + int _unused1; + const struct lttng_ust_lib_ring_buffer_client_cb *cb_ptr; + char padding[LTTNG_UST_RING_BUFFER_CONFIG_PADDING]; +}; + +/* + * Reservation flags. + * + * RING_BUFFER_RFLAG_FULL_TSC + * + * This flag is passed to record_header_size() and to the primitive used to + * write the record header. It indicates that the full 64-bit time value is + * needed in the record header. If this flag is not set, the record header needs + * only to contain "tsc_bits" bit of time value. + * + * Reservation flags can be added by the client, starting from + * "(RING_BUFFER_FLAGS_END << 0)". It can be used to pass information from + * record_header_size() to lib_ring_buffer_write_record_header(). + */ +#define RING_BUFFER_RFLAG_FULL_TSC (1U << 0) +#define RING_BUFFER_RFLAG_END (1U << 1) + +/* + * lib_ring_buffer_check_config() returns 0 on success. + * Used internally to check for valid configurations at channel creation. + */ +static inline +int lib_ring_buffer_check_config(const struct lttng_ust_lib_ring_buffer_config *config, + unsigned int switch_timer_interval, + unsigned int read_timer_interval) + lttng_ust_notrace; + +static inline +int lib_ring_buffer_check_config(const struct lttng_ust_lib_ring_buffer_config *config, + unsigned int switch_timer_interval, + unsigned int read_timer_interval __attribute__((unused))) +{ + if (config->alloc == RING_BUFFER_ALLOC_GLOBAL + && config->sync == RING_BUFFER_SYNC_PER_CPU + && switch_timer_interval) + return -EINVAL; + return 0; +} + +#endif /* _LTTNG_RING_BUFFER_CONFIG_H */ diff --git a/src/common/ringbuffer/shm.c b/src/common/ringbuffer/shm.c new file mode 100644 index 00000000..a5de019c --- /dev/null +++ b/src/common/ringbuffer/shm.c @@ -0,0 +1,522 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2005-2012 Mathieu Desnoyers + */ + +#define _LGPL_SOURCE +#include "shm.h" +#include +#include +#include +#include +#include /* For mode constants */ +#include /* For O_* constants */ +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_LIBNUMA +#include +#include +#endif + +#include + +#include "common/macros.h" +#include "common/ust-fd.h" +#include "mmap.h" + +/* + * Ensure we have the required amount of space available by writing 0 + * into the entire buffer. Not doing so can trigger SIGBUS when going + * beyond the available shm space. + */ +static +int zero_file(int fd, size_t len) +{ + ssize_t retlen; + size_t written = 0; + char *zeropage; + long pagelen; + int ret; + + pagelen = sysconf(_SC_PAGESIZE); + if (pagelen < 0) + return (int) pagelen; + zeropage = calloc(pagelen, 1); + if (!zeropage) + return -ENOMEM; + + while (len > written) { + do { + retlen = write(fd, zeropage, + min_t(size_t, pagelen, len - written)); + } while (retlen == -1UL && errno == EINTR); + if (retlen < 0) { + ret = (int) retlen; + goto error; + } + written += retlen; + } + ret = 0; +error: + free(zeropage); + return ret; +} + +struct shm_object_table *shm_object_table_create(size_t max_nb_obj) +{ + struct shm_object_table *table; + + table = zmalloc(sizeof(struct shm_object_table) + + max_nb_obj * sizeof(table->objects[0])); + if (!table) + return NULL; + table->size = max_nb_obj; + return table; +} + +static +struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table, + size_t memory_map_size, + int stream_fd) +{ + int shmfd, waitfd[2], ret, i; + struct shm_object *obj; + char *memory_map; + + if (stream_fd < 0) + return NULL; + if (table->allocated_len >= table->size) + return NULL; + obj = &table->objects[table->allocated_len]; + + /* wait_fd: create pipe */ + ret = pipe(waitfd); + if (ret < 0) { + PERROR("pipe"); + goto error_pipe; + } + for (i = 0; i < 2; i++) { + ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC); + if (ret < 0) { + PERROR("fcntl"); + goto error_fcntl; + } + } + /* The write end of the pipe needs to be non-blocking */ + ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK); + if (ret < 0) { + PERROR("fcntl"); + goto error_fcntl; + } + memcpy(obj->wait_fd, waitfd, sizeof(waitfd)); + + /* + * Set POSIX shared memory object size + * + * First, use ftruncate() to set its size, some implementations won't + * allow writes past the size set by ftruncate. + * Then, use write() to fill it with zeros, this allows us to fully + * allocate it and detect a shortage of shm space without dealing with + * a SIGBUS. + */ + + shmfd = stream_fd; + ret = ftruncate(shmfd, memory_map_size); + if (ret) { + PERROR("ftruncate"); + goto error_ftruncate; + } + ret = zero_file(shmfd, memory_map_size); + if (ret) { + PERROR("zero_file"); + goto error_zero_file; + } + + /* + * Also ensure the file metadata is synced with the storage by using + * fsync(2). Some platforms don't allow fsync on POSIX shm fds, ignore + * EINVAL accordingly. + */ + ret = fsync(shmfd); + if (ret && errno != EINVAL) { + PERROR("fsync"); + goto error_fsync; + } + obj->shm_fd_ownership = 0; + obj->shm_fd = shmfd; + + /* memory_map: mmap */ + memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE, + MAP_SHARED | LTTNG_MAP_POPULATE, shmfd, 0); + if (memory_map == MAP_FAILED) { + PERROR("mmap"); + goto error_mmap; + } + obj->type = SHM_OBJECT_SHM; + obj->memory_map = memory_map; + obj->memory_map_size = memory_map_size; + obj->allocated_len = 0; + obj->index = table->allocated_len++; + + return obj; + +error_mmap: +error_fsync: +error_ftruncate: +error_zero_file: +error_fcntl: + for (i = 0; i < 2; i++) { + ret = close(waitfd[i]); + if (ret) { + PERROR("close"); + assert(0); + } + } +error_pipe: + return NULL; +} + +static +struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table, + size_t memory_map_size) +{ + struct shm_object *obj; + void *memory_map; + int waitfd[2], i, ret; + + if (table->allocated_len >= table->size) + return NULL; + obj = &table->objects[table->allocated_len]; + + memory_map = zmalloc(memory_map_size); + if (!memory_map) + goto alloc_error; + + /* wait_fd: create pipe */ + ret = pipe(waitfd); + if (ret < 0) { + PERROR("pipe"); + goto error_pipe; + } + for (i = 0; i < 2; i++) { + ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC); + if (ret < 0) { + PERROR("fcntl"); + goto error_fcntl; + } + } + /* The write end of the pipe needs to be non-blocking */ + ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK); + if (ret < 0) { + PERROR("fcntl"); + goto error_fcntl; + } + memcpy(obj->wait_fd, waitfd, sizeof(waitfd)); + + /* no shm_fd */ + obj->shm_fd = -1; + obj->shm_fd_ownership = 0; + + obj->type = SHM_OBJECT_MEM; + obj->memory_map = memory_map; + obj->memory_map_size = memory_map_size; + obj->allocated_len = 0; + obj->index = table->allocated_len++; + + return obj; + +error_fcntl: + for (i = 0; i < 2; i++) { + ret = close(waitfd[i]); + if (ret) { + PERROR("close"); + assert(0); + } + } +error_pipe: + free(memory_map); +alloc_error: + return NULL; +} + +/* + * libnuma prints errors on the console even for numa_available(). + * Work-around this limitation by using get_mempolicy() directly to + * check whether the kernel supports mempolicy. + */ +#ifdef HAVE_LIBNUMA +static bool lttng_is_numa_available(void) +{ + int ret; + + ret = get_mempolicy(NULL, NULL, 0, NULL, 0); + if (ret && errno == ENOSYS) { + return false; + } + return numa_available() > 0; +} +#endif + +struct shm_object *shm_object_table_alloc(struct shm_object_table *table, + size_t memory_map_size, + enum shm_object_type type, + int stream_fd, + int cpu) +{ + struct shm_object *shm_object; +#ifdef HAVE_LIBNUMA + int oldnode = 0, node; + bool numa_avail; + + numa_avail = lttng_is_numa_available(); + if (numa_avail) { + oldnode = numa_preferred(); + if (cpu >= 0) { + node = numa_node_of_cpu(cpu); + if (node >= 0) + numa_set_preferred(node); + } + if (cpu < 0 || node < 0) + numa_set_localalloc(); + } +#endif /* HAVE_LIBNUMA */ + switch (type) { + case SHM_OBJECT_SHM: + shm_object = _shm_object_table_alloc_shm(table, memory_map_size, + stream_fd); + break; + case SHM_OBJECT_MEM: + shm_object = _shm_object_table_alloc_mem(table, memory_map_size); + break; + default: + assert(0); + } +#ifdef HAVE_LIBNUMA + if (numa_avail) + numa_set_preferred(oldnode); +#endif /* HAVE_LIBNUMA */ + return shm_object; +} + +struct shm_object *shm_object_table_append_shm(struct shm_object_table *table, + int shm_fd, int wakeup_fd, uint32_t stream_nr, + size_t memory_map_size) +{ + struct shm_object *obj; + char *memory_map; + int ret; + + if (table->allocated_len >= table->size) + return NULL; + /* streams _must_ be received in sequential order, else fail. */ + if (stream_nr + 1 != table->allocated_len) + return NULL; + + obj = &table->objects[table->allocated_len]; + + /* wait_fd: set write end of the pipe. */ + obj->wait_fd[0] = -1; /* read end is unset */ + obj->wait_fd[1] = wakeup_fd; + obj->shm_fd = shm_fd; + obj->shm_fd_ownership = 1; + + /* The write end of the pipe needs to be non-blocking */ + ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK); + if (ret < 0) { + PERROR("fcntl"); + goto error_fcntl; + } + + /* memory_map: mmap */ + memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE, + MAP_SHARED | LTTNG_MAP_POPULATE, shm_fd, 0); + if (memory_map == MAP_FAILED) { + PERROR("mmap"); + goto error_mmap; + } + obj->type = SHM_OBJECT_SHM; + obj->memory_map = memory_map; + obj->memory_map_size = memory_map_size; + obj->allocated_len = memory_map_size; + obj->index = table->allocated_len++; + + return obj; + +error_fcntl: +error_mmap: + return NULL; +} + +/* + * Passing ownership of mem to object. + */ +struct shm_object *shm_object_table_append_mem(struct shm_object_table *table, + void *mem, size_t memory_map_size, int wakeup_fd) +{ + struct shm_object *obj; + int ret; + + if (table->allocated_len >= table->size) + return NULL; + obj = &table->objects[table->allocated_len]; + + obj->wait_fd[0] = -1; /* read end is unset */ + obj->wait_fd[1] = wakeup_fd; + obj->shm_fd = -1; + obj->shm_fd_ownership = 0; + + ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC); + if (ret < 0) { + PERROR("fcntl"); + goto error_fcntl; + } + /* The write end of the pipe needs to be non-blocking */ + ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK); + if (ret < 0) { + PERROR("fcntl"); + goto error_fcntl; + } + + obj->type = SHM_OBJECT_MEM; + obj->memory_map = mem; + obj->memory_map_size = memory_map_size; + obj->allocated_len = memory_map_size; + obj->index = table->allocated_len++; + + return obj; + +error_fcntl: + return NULL; +} + +static +void shmp_object_destroy(struct shm_object *obj, int consumer) +{ + switch (obj->type) { + case SHM_OBJECT_SHM: + { + int ret, i; + + ret = munmap(obj->memory_map, obj->memory_map_size); + if (ret) { + PERROR("umnmap"); + assert(0); + } + + if (obj->shm_fd_ownership) { + /* Delete FDs only if called from app (not consumer). */ + if (!consumer) { + lttng_ust_lock_fd_tracker(); + ret = close(obj->shm_fd); + if (!ret) { + lttng_ust_delete_fd_from_tracker(obj->shm_fd); + } else { + PERROR("close"); + assert(0); + } + lttng_ust_unlock_fd_tracker(); + } else { + ret = close(obj->shm_fd); + if (ret) { + PERROR("close"); + assert(0); + } + } + } + for (i = 0; i < 2; i++) { + if (obj->wait_fd[i] < 0) + continue; + if (!consumer) { + lttng_ust_lock_fd_tracker(); + ret = close(obj->wait_fd[i]); + if (!ret) { + lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]); + } else { + PERROR("close"); + assert(0); + } + lttng_ust_unlock_fd_tracker(); + } else { + ret = close(obj->wait_fd[i]); + if (ret) { + PERROR("close"); + assert(0); + } + } + } + break; + } + case SHM_OBJECT_MEM: + { + int ret, i; + + for (i = 0; i < 2; i++) { + if (obj->wait_fd[i] < 0) + continue; + if (!consumer) { + lttng_ust_lock_fd_tracker(); + ret = close(obj->wait_fd[i]); + if (!ret) { + lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]); + } else { + PERROR("close"); + assert(0); + } + lttng_ust_unlock_fd_tracker(); + } else { + ret = close(obj->wait_fd[i]); + if (ret) { + PERROR("close"); + assert(0); + } + } + } + free(obj->memory_map); + break; + } + default: + assert(0); + } +} + +void shm_object_table_destroy(struct shm_object_table *table, int consumer) +{ + int i; + + for (i = 0; i < table->allocated_len; i++) + shmp_object_destroy(&table->objects[i], consumer); + free(table); +} + +/* + * zalloc_shm - allocate memory within a shm object. + * + * Shared memory is already zeroed by shmget. + * *NOT* multithread-safe (should be protected by mutex). + * Returns a -1, -1 tuple on error. + */ +struct shm_ref zalloc_shm(struct shm_object *obj, size_t len) +{ + struct shm_ref ref; + struct shm_ref shm_ref_error = { -1, -1 }; + + if (obj->memory_map_size - obj->allocated_len < len) + return shm_ref_error; + ref.index = obj->index; + ref.offset = obj->allocated_len; + obj->allocated_len += len; + return ref; +} + +void align_shm(struct shm_object *obj, size_t align) +{ + size_t offset_len = lttng_ust_offset_align(obj->allocated_len, align); + obj->allocated_len += offset_len; +} diff --git a/src/common/ringbuffer/shm.h b/src/common/ringbuffer/shm.h new file mode 100644 index 00000000..6e4f7f7b --- /dev/null +++ b/src/common/ringbuffer/shm.h @@ -0,0 +1,221 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + */ + +#ifndef _LIBRINGBUFFER_SHM_H +#define _LIBRINGBUFFER_SHM_H + +#include +#include +#include +#include "common/logging.h" +#include +#include "shm_types.h" + +/* channel_handle_create - for UST. */ +extern +struct lttng_ust_shm_handle *channel_handle_create(void *data, + uint64_t memory_map_size, int wakeup_fd) + __attribute__((visibility("hidden"))); + +/* channel_handle_add_stream - for UST. */ +extern +int channel_handle_add_stream(struct lttng_ust_shm_handle *handle, + int shm_fd, int wakeup_fd, uint32_t stream_nr, + uint64_t memory_map_size) + __attribute__((visibility("hidden"))); + +unsigned int channel_handle_get_nr_streams(struct lttng_ust_shm_handle *handle) + __attribute__((visibility("hidden"))); + +/* + * Pointer dereferencing. We don't trust the shm_ref, so we validate + * both the index and offset with known boundaries. + * + * "shmp" and "shmp_index" guarantee that it's safe to use the pointer + * target type, even in the occurrence of shm_ref modification by an + * untrusted process having write access to the shm_ref. We return a + * NULL pointer if the ranges are invalid. + */ +static inline +char *_shmp_offset(struct shm_object_table *table, struct shm_ref *ref, + size_t idx, size_t elem_size) +{ + struct shm_object *obj; + size_t objindex, ref_offset; + + objindex = (size_t) ref->index; + if (caa_unlikely(objindex >= table->allocated_len)) + return NULL; + obj = &table->objects[objindex]; + ref_offset = (size_t) ref->offset; + ref_offset += idx * elem_size; + /* Check if part of the element returned would exceed the limits. */ + if (caa_unlikely(ref_offset + elem_size > obj->memory_map_size)) + return NULL; + return &obj->memory_map[ref_offset]; +} + +#define shmp_index(handle, ref, index) \ + ((__typeof__((ref)._type)) _shmp_offset((handle)->table, &(ref)._ref, index, sizeof(*((ref)._type)))) + +#define shmp(handle, ref) shmp_index(handle, ref, 0) + +static inline +void _set_shmp(struct shm_ref *ref, struct shm_ref src) +{ + *ref = src; +} + +#define set_shmp(ref, src) _set_shmp(&(ref)._ref, src) + +struct shm_object_table *shm_object_table_create(size_t max_nb_obj) + __attribute__((visibility("hidden"))); + +struct shm_object *shm_object_table_alloc(struct shm_object_table *table, + size_t memory_map_size, + enum shm_object_type type, + const int stream_fd, + int cpu) + __attribute__((visibility("hidden"))); + +struct shm_object *shm_object_table_append_shm(struct shm_object_table *table, + int shm_fd, int wakeup_fd, uint32_t stream_nr, + size_t memory_map_size) + __attribute__((visibility("hidden"))); + +/* mem ownership is passed to shm_object_table_append_mem(). */ +struct shm_object *shm_object_table_append_mem(struct shm_object_table *table, + void *mem, size_t memory_map_size, int wakeup_fd) + __attribute__((visibility("hidden"))); + +void shm_object_table_destroy(struct shm_object_table *table, int consumer) + __attribute__((visibility("hidden"))); + +/* + * zalloc_shm - allocate memory within a shm object. + * + * Shared memory is already zeroed by shmget. + * *NOT* multithread-safe (should be protected by mutex). + * Returns a -1, -1 tuple on error. + */ +struct shm_ref zalloc_shm(struct shm_object *obj, size_t len) + __attribute__((visibility("hidden"))); + +void align_shm(struct shm_object *obj, size_t align) + __attribute__((visibility("hidden"))); + +static inline +int shm_get_wait_fd(struct lttng_ust_shm_handle *handle, struct shm_ref *ref) +{ + struct shm_object_table *table = handle->table; + struct shm_object *obj; + size_t index; + + index = (size_t) ref->index; + if (caa_unlikely(index >= table->allocated_len)) + return -EPERM; + obj = &table->objects[index]; + return obj->wait_fd[0]; +} + +static inline +int shm_get_wakeup_fd(struct lttng_ust_shm_handle *handle, struct shm_ref *ref) +{ + struct shm_object_table *table = handle->table; + struct shm_object *obj; + size_t index; + + index = (size_t) ref->index; + if (caa_unlikely(index >= table->allocated_len)) + return -EPERM; + obj = &table->objects[index]; + return obj->wait_fd[1]; +} + +static inline +int shm_close_wait_fd(struct lttng_ust_shm_handle *handle, + struct shm_ref *ref) +{ + struct shm_object_table *table = handle->table; + struct shm_object *obj; + int wait_fd; + size_t index; + int ret; + + index = (size_t) ref->index; + if (caa_unlikely(index >= table->allocated_len)) + return -EPERM; + obj = &table->objects[index]; + wait_fd = obj->wait_fd[0]; + if (wait_fd < 0) + return -ENOENT; + obj->wait_fd[0] = -1; + ret = close(wait_fd); + if (ret) { + ret = -errno; + return ret; + } + return 0; +} + +static inline +int shm_close_wakeup_fd(struct lttng_ust_shm_handle *handle, + struct shm_ref *ref) +{ + struct shm_object_table *table = handle->table; + struct shm_object *obj; + int wakeup_fd; + size_t index; + int ret; + + index = (size_t) ref->index; + if (caa_unlikely(index >= table->allocated_len)) + return -EPERM; + obj = &table->objects[index]; + wakeup_fd = obj->wait_fd[1]; + if (wakeup_fd < 0) + return -ENOENT; + obj->wait_fd[1] = -1; + ret = close(wakeup_fd); + if (ret) { + ret = -errno; + return ret; + } + return 0; +} + +static inline +int shm_get_shm_fd(struct lttng_ust_shm_handle *handle, struct shm_ref *ref) +{ + struct shm_object_table *table = handle->table; + struct shm_object *obj; + size_t index; + + index = (size_t) ref->index; + if (caa_unlikely(index >= table->allocated_len)) + return -EPERM; + obj = &table->objects[index]; + return obj->shm_fd; +} + + +static inline +int shm_get_shm_size(struct lttng_ust_shm_handle *handle, struct shm_ref *ref, + uint64_t *size) +{ + struct shm_object_table *table = handle->table; + struct shm_object *obj; + size_t index; + + index = (size_t) ref->index; + if (caa_unlikely(index >= table->allocated_len)) + return -EPERM; + obj = &table->objects[index]; + *size = obj->memory_map_size; + return 0; +} + +#endif /* _LIBRINGBUFFER_SHM_H */ diff --git a/src/common/ringbuffer/shm_internal.h b/src/common/ringbuffer/shm_internal.h new file mode 100644 index 00000000..0e92b451 --- /dev/null +++ b/src/common/ringbuffer/shm_internal.h @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + */ + +#ifndef _LIBRINGBUFFER_SHM_INTERNAL_H +#define _LIBRINGBUFFER_SHM_INTERNAL_H + +struct shm_ref { + volatile ssize_t index; /* within the object table */ + volatile ssize_t offset; /* within the object */ +}; + +#define DECLARE_SHMP(type, name) \ + union { \ + struct shm_ref _ref; \ + type *_type; \ + } name + +#endif /* _LIBRINGBUFFER_SHM_INTERNAL_H */ diff --git a/src/common/ringbuffer/shm_types.h b/src/common/ringbuffer/shm_types.h new file mode 100644 index 00000000..c1ad7b68 --- /dev/null +++ b/src/common/ringbuffer/shm_types.h @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + */ + +#ifndef _LIBRINGBUFFER_SHM_TYPES_H +#define _LIBRINGBUFFER_SHM_TYPES_H + +#include +#include +#include +#include "shm_internal.h" + +struct lttng_ust_lib_ring_buffer_channel; + +enum shm_object_type { + SHM_OBJECT_SHM, + SHM_OBJECT_MEM, +}; + +struct shm_object { + enum shm_object_type type; + size_t index; /* within the object table */ + int shm_fd; /* shm fd */ + int wait_fd[2]; /* fd for wait/wakeup */ + char *memory_map; + size_t memory_map_size; + uint64_t allocated_len; + int shm_fd_ownership; +}; + +struct shm_object_table { + size_t size; + size_t allocated_len; + struct shm_object objects[]; +}; + +struct lttng_ust_shm_handle { + struct shm_object_table *table; + DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_channel, chan); +}; + +#endif /* _LIBRINGBUFFER_SHM_TYPES_H */ diff --git a/src/common/ringbuffer/smp.c b/src/common/ringbuffer/smp.c new file mode 100644 index 00000000..39bd5559 --- /dev/null +++ b/src/common/ringbuffer/smp.c @@ -0,0 +1,96 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + * Copyright (C) 2019 Michael Jeanson + */ + +#define _LGPL_SOURCE +#include +#include +#include "smp.h" + +int __num_possible_cpus; + +#if (defined(__GLIBC__) || defined( __UCLIBC__)) +void _get_num_possible_cpus(void) +{ + int result; + + /* On Linux, when some processors are offline + * _SC_NPROCESSORS_CONF counts the offline + * processors, whereas _SC_NPROCESSORS_ONLN + * does not. If we used _SC_NPROCESSORS_ONLN, + * getcpu() could return a value greater than + * this sysconf, in which case the arrays + * indexed by processor would overflow. + */ + result = sysconf(_SC_NPROCESSORS_CONF); + if (result == -1) + return; + __num_possible_cpus = result; +} + +#else + +/* + * The MUSL libc implementation of the _SC_NPROCESSORS_CONF sysconf does not + * return the number of configured CPUs in the system but relies on the cpu + * affinity mask of the current task. + * + * So instead we use a strategy similar to GLIBC's, counting the cpu + * directories in "/sys/devices/system/cpu" and fallback on the value from + * sysconf if it fails. + */ + +#include +#include +#include +#include +#include + +#define __max(a,b) ((a)>(b)?(a):(b)) + +void _get_num_possible_cpus(void) +{ + int result, count = 0; + DIR *cpudir; + struct dirent *entry; + + cpudir = opendir("/sys/devices/system/cpu"); + if (cpudir == NULL) + goto end; + + /* + * Count the number of directories named "cpu" followed by and + * integer. This is the same strategy as glibc uses. + */ + while ((entry = readdir(cpudir))) { + if (entry->d_type == DT_DIR && + strncmp(entry->d_name, "cpu", 3) == 0) { + + char *endptr; + unsigned long cpu_num; + + cpu_num = strtoul(entry->d_name + 3, &endptr, 10); + if ((cpu_num < ULONG_MAX) && (endptr != entry->d_name + 3) + && (*endptr == '\0')) { + count++; + } + } + } + +end: + /* + * Get the sysconf value as a fallback. Keep the highest number. + */ + result = __max(sysconf(_SC_NPROCESSORS_CONF), count); + + /* + * If both methods failed, don't store the value. + */ + if (result < 1) + return; + __num_possible_cpus = result; +} +#endif diff --git a/src/common/ringbuffer/smp.h b/src/common/ringbuffer/smp.h new file mode 100644 index 00000000..028a66f7 --- /dev/null +++ b/src/common/ringbuffer/smp.h @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + */ + +#ifndef _LIBRINGBUFFER_SMP_H +#define _LIBRINGBUFFER_SMP_H + +#include "getcpu.h" + +/* + * 4kB of per-cpu data available. Enough to hold the control structures, + * but not ring buffers. + */ +#define PER_CPU_MEM_SIZE 4096 + +extern int __num_possible_cpus + __attribute__((visibility("hidden"))); + +extern void _get_num_possible_cpus(void) + __attribute__((visibility("hidden"))); + +static inline +int num_possible_cpus(void) +{ + if (!__num_possible_cpus) + _get_num_possible_cpus(); + return __num_possible_cpus; +} + +#define for_each_possible_cpu(cpu) \ + for ((cpu) = 0; (cpu) < num_possible_cpus(); (cpu)++) + +#endif /* _LIBRINGBUFFER_SMP_H */ diff --git a/src/common/ringbuffer/vatomic.h b/src/common/ringbuffer/vatomic.h new file mode 100644 index 00000000..199d2c77 --- /dev/null +++ b/src/common/ringbuffer/vatomic.h @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright (C) 2010-2012 Mathieu Desnoyers + */ + +#ifndef _LTTNG_RING_BUFFER_VATOMIC_H +#define _LTTNG_RING_BUFFER_VATOMIC_H + +#include +#include + +/* + * Same data type (long) accessed differently depending on configuration. + * v field is for non-atomic access (protected by mutual exclusion). + * In the fast-path, the ring_buffer_config structure is constant, so the + * compiler can statically select the appropriate branch. + * local_t is used for per-cpu and per-thread buffers. + * atomic_long_t is used for globally shared buffers. + */ +union v_atomic { + long a; /* accessed through uatomic */ + long v; +}; + +static inline +long v_read(const struct lttng_ust_lib_ring_buffer_config *config, union v_atomic *v_a) +{ + assert(config->sync != RING_BUFFER_SYNC_PER_CPU); + return uatomic_read(&v_a->a); +} + +static inline +void v_set(const struct lttng_ust_lib_ring_buffer_config *config, union v_atomic *v_a, + long v) +{ + assert(config->sync != RING_BUFFER_SYNC_PER_CPU); + uatomic_set(&v_a->a, v); +} + +static inline +void v_add(const struct lttng_ust_lib_ring_buffer_config *config, long v, union v_atomic *v_a) +{ + assert(config->sync != RING_BUFFER_SYNC_PER_CPU); + uatomic_add(&v_a->a, v); +} + +static inline +void v_inc(const struct lttng_ust_lib_ring_buffer_config *config, union v_atomic *v_a) +{ + assert(config->sync != RING_BUFFER_SYNC_PER_CPU); + uatomic_inc(&v_a->a); +} + +/* + * Non-atomic decrement. Only used by reader, apply to reader-owned subbuffer. + */ +static inline +void _v_dec(const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), union v_atomic *v_a) +{ + --v_a->v; +} + +static inline +long v_cmpxchg(const struct lttng_ust_lib_ring_buffer_config *config, union v_atomic *v_a, + long old, long _new) +{ + assert(config->sync != RING_BUFFER_SYNC_PER_CPU); + return uatomic_cmpxchg(&v_a->a, old, _new); +} + +#endif /* _LTTNG_RING_BUFFER_VATOMIC_H */ diff --git a/src/lib/lttng-ust-ctl/ustctl.c b/src/lib/lttng-ust-ctl/ustctl.c index c89085a1..488148bf 100644 --- a/src/lib/lttng-ust-ctl/ustctl.c +++ b/src/lib/lttng-ust-ctl/ustctl.c @@ -22,8 +22,8 @@ #include "common/macros.h" #include "common/align.h" -#include "libringbuffer/backend.h" -#include "libringbuffer/frontend.h" +#include "common/ringbuffer/backend.h" +#include "common/ringbuffer/frontend.h" #include "liblttng-ust/ust-events-internal.h" #include "liblttng-ust/wait.h" #include "liblttng-ust/lttng-rb-clients.h" diff --git a/src/liblttng-ust/Makefile.am b/src/liblttng-ust/Makefile.am index 89fba09a..78d82f0e 100644 --- a/src/liblttng-ust/Makefile.am +++ b/src/liblttng-ust/Makefile.am @@ -130,7 +130,7 @@ liblttng_ust_la_SOURCES = liblttng_ust_la_LDFLAGS = -no-undefined -version-info $(LTTNG_UST_LIBRARY_VERSION) liblttng_ust_support_la_LIBADD = \ - $(top_builddir)/src/libringbuffer/libringbuffer.la \ + $(top_builddir)/src/common/libringbuffer.la \ $(top_builddir)/src/common/libcounter.la liblttng_ust_la_LIBADD = \ diff --git a/src/liblttng-ust/lttng-context-cpu-id.c b/src/liblttng-ust/lttng-context-cpu-id.c index 358dff98..900b3c2e 100644 --- a/src/liblttng-ust/lttng-context-cpu-id.c +++ b/src/liblttng-ust/lttng-context-cpu-id.c @@ -19,7 +19,7 @@ #include #include #include -#include "../libringbuffer/getcpu.h" +#include "common/ringbuffer/getcpu.h" #include #include "context-internal.h" diff --git a/src/liblttng-ust/lttng-events.c b/src/liblttng-ust/lttng-events.c index 7a4ade01..ccd3dfd9 100644 --- a/src/liblttng-ust/lttng-events.c +++ b/src/liblttng-ust/lttng-events.c @@ -52,9 +52,9 @@ #include "context-internal.h" #include "ust-events-internal.h" #include "wait.h" -#include "../libringbuffer/shm.h" -#include "../libringbuffer/frontend_types.h" -#include "../libringbuffer/frontend.h" +#include "common/ringbuffer/shm.h" +#include "common/ringbuffer/frontend_types.h" +#include "common/ringbuffer/frontend.h" #include "common/counter/counter.h" #include "jhash.h" #include diff --git a/src/liblttng-ust/lttng-getcpu.c b/src/liblttng-ust/lttng-getcpu.c index d4d91da6..8d2c944c 100644 --- a/src/liblttng-ust/lttng-getcpu.c +++ b/src/liblttng-ust/lttng-getcpu.c @@ -14,7 +14,7 @@ #include #include "getenv.h" -#include "../libringbuffer/getcpu.h" +#include "common/ringbuffer/getcpu.h" int (*lttng_get_cpu)(void); diff --git a/src/liblttng-ust/lttng-rb-clients.h b/src/liblttng-ust/lttng-rb-clients.h index 9cec72e8..afdbd057 100644 --- a/src/liblttng-ust/lttng-rb-clients.h +++ b/src/liblttng-ust/lttng-rb-clients.h @@ -8,7 +8,7 @@ #define _LTTNG_RB_CLIENT_H #include -#include "../libringbuffer/ringbuffer-config.h" +#include "common/ringbuffer/ringbuffer-config.h" struct lttng_ust_client_lib_ring_buffer_client_cb { struct lttng_ust_lib_ring_buffer_client_cb parent; diff --git a/src/liblttng-ust/lttng-ring-buffer-client-template.h b/src/liblttng-ust/lttng-ring-buffer-client-template.h index d7f77698..880ae94c 100644 --- a/src/liblttng-ust/lttng-ring-buffer-client-template.h +++ b/src/liblttng-ust/lttng-ring-buffer-client-template.h @@ -17,7 +17,7 @@ #include "clock.h" #include "context-internal.h" #include "lttng-tracer.h" -#include "../libringbuffer/frontend_types.h" +#include "common/ringbuffer/frontend_types.h" #include #define LTTNG_COMPACT_EVENT_BITS 5 @@ -195,7 +195,7 @@ size_t record_header_size( return offset - orig_offset; } -#include "../libringbuffer/api.h" +#include "common/ringbuffer/api.h" #include "lttng-rb-clients.h" static diff --git a/src/liblttng-ust/lttng-ring-buffer-metadata-client-template.h b/src/liblttng-ust/lttng-ring-buffer-metadata-client-template.h index 2406567e..954fc020 100644 --- a/src/liblttng-ust/lttng-ring-buffer-metadata-client-template.h +++ b/src/liblttng-ust/lttng-ring-buffer-metadata-client-template.h @@ -14,7 +14,7 @@ #include "common/bitfield.h" #include "common/align.h" #include "lttng-tracer.h" -#include "../libringbuffer/frontend_types.h" +#include "common/ringbuffer/frontend_types.h" #include struct metadata_packet_header { @@ -58,7 +58,7 @@ size_t record_header_size( return 0; } -#include "../libringbuffer/api.h" +#include "common/ringbuffer/api.h" #include "lttng-rb-clients.h" static uint64_t client_ring_buffer_clock_read( diff --git a/src/liblttng-ust/lttng-ust-abi.c b/src/liblttng-ust/lttng-ust-abi.c index 31b6fe22..f1096d06 100644 --- a/src/liblttng-ust/lttng-ust-abi.c +++ b/src/liblttng-ust/lttng-ust-abi.c @@ -39,9 +39,9 @@ #include "common/ust-fd.h" #include "common/logging.h" -#include "../libringbuffer/frontend_types.h" -#include "../libringbuffer/frontend.h" -#include "../libringbuffer/shm.h" +#include "common/ringbuffer/frontend_types.h" +#include "common/ringbuffer/frontend.h" +#include "common/ringbuffer/shm.h" #include "common/counter/counter.h" #include "tracepoint-internal.h" #include "lttng-tracer.h" diff --git a/src/liblttng-ust/lttng-ust-comm.c b/src/liblttng-ust/lttng-ust-comm.c index 137107cd..a753206f 100644 --- a/src/liblttng-ust/lttng-ust-comm.c +++ b/src/liblttng-ust/lttng-ust-comm.c @@ -46,10 +46,10 @@ #include "tracepoint-internal.h" #include "lttng-tracer-core.h" #include "compat.h" -#include "../libringbuffer/rb-init.h" +#include "common/ringbuffer/rb-init.h" #include "lttng-ust-statedump.h" #include "clock.h" -#include "../libringbuffer/getcpu.h" +#include "common/ringbuffer/getcpu.h" #include "getenv.h" #include "ust-events-internal.h" #include "context-internal.h" diff --git a/src/libringbuffer/Makefile.am b/src/libringbuffer/Makefile.am deleted file mode 100644 index eb9ba512..00000000 --- a/src/libringbuffer/Makefile.am +++ /dev/null @@ -1,24 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only - -AM_CFLAGS += -fno-strict-aliasing - -noinst_LTLIBRARIES = libringbuffer.la - -libringbuffer_la_SOURCES = \ - smp.h smp.c getcpu.h \ - shm.c shm.h shm_types.h shm_internal.h \ - ring_buffer_backend.c \ - ring_buffer_frontend.c \ - api.h mmap.h \ - backend.h backend_internal.h backend_types.h \ - frontend_api.h frontend.h frontend_internal.h frontend_types.h \ - nohz.h vatomic.h rb-init.h ringbuffer-config.h - -libringbuffer_la_LIBADD = \ - -lrt - -if ENABLE_NUMA -libringbuffer_la_LIBADD += -lnuma -endif - -libringbuffer_la_CFLAGS = -DUST_COMPONENT="libringbuffer" $(AM_CFLAGS) diff --git a/src/libringbuffer/api.h b/src/libringbuffer/api.h deleted file mode 100644 index ec4f5229..00000000 --- a/src/libringbuffer/api.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2010-2012 Mathieu Desnoyers - * - * Ring Buffer API. - */ - -#ifndef _LTTNG_RING_BUFFER_API_H -#define _LTTNG_RING_BUFFER_API_H - -#include "backend.h" -#include "frontend.h" -#include - -/* - * ring_buffer_frontend_api.h contains static inline functions that depend on - * client static inlines. Hence the inclusion of this "api" header only - * within the client. - */ -#include "frontend_api.h" - -#endif /* _LTTNG_RING_BUFFER_API_H */ diff --git a/src/libringbuffer/backend.h b/src/libringbuffer/backend.h deleted file mode 100644 index 0b482182..00000000 --- a/src/libringbuffer/backend.h +++ /dev/null @@ -1,318 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2011-2012 Mathieu Desnoyers - * - * Ring buffer backend (API). - * - * Credits to Steven Rostedt for proposing to use an extra-subbuffer owned by - * the reader in flight recorder mode. - */ - -#ifndef _LTTNG_RING_BUFFER_BACKEND_H -#define _LTTNG_RING_BUFFER_BACKEND_H - -#include -#include - -/* Internal helpers */ -#include "backend_internal.h" -#include "frontend_internal.h" - -/* Ring buffer backend API */ - -/* Ring buffer backend access (read/write) */ - -extern size_t lib_ring_buffer_read(struct lttng_ust_lib_ring_buffer_backend *bufb, - size_t offset, void *dest, size_t len, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -extern int lib_ring_buffer_read_cstr(struct lttng_ust_lib_ring_buffer_backend *bufb, - size_t offset, void *dest, size_t len, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -/* - * Return the address where a given offset is located. - * Should be used to get the current subbuffer header pointer. Given we know - * it's never on a page boundary, it's safe to write directly to this address, - * as long as the write is never bigger than a page size. - */ -extern void * -lib_ring_buffer_offset_address(struct lttng_ust_lib_ring_buffer_backend *bufb, - size_t offset, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -extern void * -lib_ring_buffer_read_offset_address(struct lttng_ust_lib_ring_buffer_backend *bufb, - size_t offset, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -/** - * lib_ring_buffer_write - write data to a buffer backend - * @config : ring buffer instance configuration - * @ctx: ring buffer context. (input arguments only) - * @src : source pointer to copy from - * @len : length of data to copy - * - * This function copies "len" bytes of data from a source pointer to a buffer - * backend, at the current context offset. This is more or less a buffer - * backend-specific memcpy() operation. Calls the slow path (_ring_buffer_write) - * if copy is crossing a page boundary. - */ -static inline -void lib_ring_buffer_write(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_ctx *ctx, - const void *src, size_t len) - __attribute__((always_inline)); -static inline -void lib_ring_buffer_write(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_ctx *ctx, - const void *src, size_t len) -{ - struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; - struct channel_backend *chanb = &ctx_private->chan->backend; - struct lttng_ust_shm_handle *handle = ctx_private->chan->handle; - size_t offset = ctx_private->buf_offset; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - void *p; - - if (caa_unlikely(!len)) - return; - /* - * Underlying layer should never ask for writes across - * subbuffers. - */ - CHAN_WARN_ON(chanb, (offset & (chanb->buf_size - 1)) + len > chanb->buf_size); - backend_pages = lib_ring_buffer_get_backend_pages_from_ctx(config, ctx); - if (caa_unlikely(!backend_pages)) { - if (lib_ring_buffer_backend_get_pages(config, ctx, &backend_pages)) - return; - } - p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); - if (caa_unlikely(!p)) - return; - lib_ring_buffer_do_copy(config, p, src, len); - ctx_private->buf_offset += len; -} - -/* - * Copy up to @len string bytes from @src to @dest. Stop whenever a NULL - * terminating character is found in @src. Returns the number of bytes - * copied. Does *not* terminate @dest with NULL terminating character. - */ -static inline -size_t lib_ring_buffer_do_strcpy(const struct lttng_ust_lib_ring_buffer_config *config, - char *dest, const char *src, size_t len) - __attribute__((always_inline)); -static inline -size_t lib_ring_buffer_do_strcpy( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - char *dest, const char *src, size_t len) -{ - size_t count; - - for (count = 0; count < len; count++) { - char c; - - /* - * Only read source character once, in case it is - * modified concurrently. - */ - c = CMM_LOAD_SHARED(src[count]); - if (!c) - break; - lib_ring_buffer_do_copy(config, &dest[count], &c, 1); - } - return count; -} - -/** - * lib_ring_buffer_strcpy - write string data to a buffer backend - * @config : ring buffer instance configuration - * @ctx: ring buffer context. (input arguments only) - * @src : source pointer to copy from - * @len : length of data to copy - * @pad : character to use for padding - * - * This function copies @len - 1 bytes of string data from a source - * pointer to a buffer backend, followed by a terminating '\0' - * character, at the current context offset. This is more or less a - * buffer backend-specific strncpy() operation. If a terminating '\0' - * character is found in @src before @len - 1 characters are copied, pad - * the buffer with @pad characters (e.g. '#'). - */ -static inline -void lib_ring_buffer_strcpy(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_ctx *ctx, - const char *src, size_t len, char pad) - __attribute__((always_inline)); -static inline -void lib_ring_buffer_strcpy(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_ctx *ctx, - const char *src, size_t len, char pad) -{ - struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; - struct channel_backend *chanb = &ctx_private->chan->backend; - struct lttng_ust_shm_handle *handle = ctx_private->chan->handle; - size_t count; - size_t offset = ctx_private->buf_offset; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - void *p; - - if (caa_unlikely(!len)) - return; - /* - * Underlying layer should never ask for writes across - * subbuffers. - */ - CHAN_WARN_ON(chanb, (offset & (chanb->buf_size - 1)) + len > chanb->buf_size); - backend_pages = lib_ring_buffer_get_backend_pages_from_ctx(config, ctx); - if (caa_unlikely(!backend_pages)) { - if (lib_ring_buffer_backend_get_pages(config, ctx, &backend_pages)) - return; - } - p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); - if (caa_unlikely(!p)) - return; - - count = lib_ring_buffer_do_strcpy(config, p, src, len - 1); - offset += count; - /* Padding */ - if (caa_unlikely(count < len - 1)) { - size_t pad_len = len - 1 - count; - - p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); - if (caa_unlikely(!p)) - return; - lib_ring_buffer_do_memset(p, pad, pad_len); - offset += pad_len; - } - /* Final '\0' */ - p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); - if (caa_unlikely(!p)) - return; - lib_ring_buffer_do_memset(p, '\0', 1); - ctx_private->buf_offset += len; -} - -/** - * lib_ring_buffer_pstrcpy - write to a buffer backend P-string - * @config : ring buffer instance configuration - * @ctx: ring buffer context. (input arguments only) - * @src : source pointer to copy from - * @len : length of data to copy - * @pad : character to use for padding - * - * This function copies up to @len bytes of data from a source pointer - * to a Pascal String into the buffer backend. If a terminating '\0' - * character is found in @src before @len characters are copied, pad the - * buffer with @pad characters (e.g. '\0'). - * - * The length of the pascal strings in the ring buffer is explicit: it - * is either the array or sequence length. - */ -static inline -void lib_ring_buffer_pstrcpy(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_ctx *ctx, - const char *src, size_t len, char pad) - __attribute__((always_inline)); -static inline -void lib_ring_buffer_pstrcpy(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_ctx *ctx, - const char *src, size_t len, char pad) -{ - struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; - struct channel_backend *chanb = &ctx_private->chan->backend; - struct lttng_ust_shm_handle *handle = ctx_private->chan->handle; - size_t count; - size_t offset = ctx_private->buf_offset; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - void *p; - - if (caa_unlikely(!len)) - return; - /* - * Underlying layer should never ask for writes across - * subbuffers. - */ - CHAN_WARN_ON(chanb, (offset & (chanb->buf_size - 1)) + len > chanb->buf_size); - backend_pages = lib_ring_buffer_get_backend_pages_from_ctx(config, ctx); - if (caa_unlikely(!backend_pages)) { - if (lib_ring_buffer_backend_get_pages(config, ctx, &backend_pages)) - return; - } - p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); - if (caa_unlikely(!p)) - return; - - count = lib_ring_buffer_do_strcpy(config, p, src, len); - offset += count; - /* Padding */ - if (caa_unlikely(count < len)) { - size_t pad_len = len - count; - - p = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); - if (caa_unlikely(!p)) - return; - lib_ring_buffer_do_memset(p, pad, pad_len); - } - ctx_private->buf_offset += len; -} - -/* - * This accessor counts the number of unread records in a buffer. - * It only provides a consistent value if no reads not writes are performed - * concurrently. - */ -static inline -unsigned long lib_ring_buffer_get_records_unread( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend; - unsigned long records_unread = 0, sb_bindex; - unsigned int i; - struct lttng_ust_lib_ring_buffer_channel *chan; - - chan = shmp(handle, bufb->chan); - if (!chan) - return 0; - for (i = 0; i < chan->backend.num_subbuf; i++) { - struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - - wsb = shmp_index(handle, bufb->buf_wsb, i); - if (!wsb) - return 0; - sb_bindex = subbuffer_id_get_index(config, wsb->id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (!rpages) - return 0; - backend_pages = shmp(handle, rpages->shmp); - if (!backend_pages) - return 0; - records_unread += v_read(config, &backend_pages->records_unread); - } - if (config->mode == RING_BUFFER_OVERWRITE) { - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - - sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (!rpages) - return 0; - backend_pages = shmp(handle, rpages->shmp); - if (!backend_pages) - return 0; - records_unread += v_read(config, &backend_pages->records_unread); - } - return records_unread; -} - -#endif /* _LTTNG_RING_BUFFER_BACKEND_H */ diff --git a/src/libringbuffer/backend_internal.h b/src/libringbuffer/backend_internal.h deleted file mode 100644 index a325875c..00000000 --- a/src/libringbuffer/backend_internal.h +++ /dev/null @@ -1,696 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2005-2012 Mathieu Desnoyers - * - * Ring buffer backend (internal helpers). - */ - -#ifndef _LTTNG_RING_BUFFER_BACKEND_INTERNAL_H -#define _LTTNG_RING_BUFFER_BACKEND_INTERNAL_H - -#include -#include -#include -#include - -#include -#include "ringbuffer-config.h" -#include "backend_types.h" -#include "frontend_types.h" -#include "shm.h" - -/* Ring buffer backend API presented to the frontend */ - -/* Ring buffer and channel backend create/free */ - -int lib_ring_buffer_backend_create(struct lttng_ust_lib_ring_buffer_backend *bufb, - struct channel_backend *chan, - int cpu, - struct lttng_ust_shm_handle *handle, - struct shm_object *shmobj) - __attribute__((visibility("hidden"))); - -void channel_backend_unregister_notifiers(struct channel_backend *chanb) - __attribute__((visibility("hidden"))); - -void lib_ring_buffer_backend_free(struct lttng_ust_lib_ring_buffer_backend *bufb) - __attribute__((visibility("hidden"))); - -int channel_backend_init(struct channel_backend *chanb, - const char *name, - const struct lttng_ust_lib_ring_buffer_config *config, - size_t subbuf_size, - size_t num_subbuf, struct lttng_ust_shm_handle *handle, - const int *stream_fds) - __attribute__((visibility("hidden"))); - -void channel_backend_free(struct channel_backend *chanb, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -void lib_ring_buffer_backend_reset(struct lttng_ust_lib_ring_buffer_backend *bufb, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -void channel_backend_reset(struct channel_backend *chanb) - __attribute__((visibility("hidden"))); - -int lib_ring_buffer_backend_init(void) - __attribute__((visibility("hidden"))); - -void lib_ring_buffer_backend_exit(void) - __attribute__((visibility("hidden"))); - -extern void _lib_ring_buffer_write(struct lttng_ust_lib_ring_buffer_backend *bufb, - size_t offset, const void *src, size_t len, - ssize_t pagecpy) - __attribute__((visibility("hidden"))); - -/* - * Subbuffer ID bits for overwrite mode. Need to fit within a single word to be - * exchanged atomically. - * - * Top half word, except lowest bit, belongs to "offset", which is used to keep - * to count the produced buffers. For overwrite mode, this provides the - * consumer with the capacity to read subbuffers in order, handling the - * situation where producers would write up to 2^15 buffers (or 2^31 for 64-bit - * systems) concurrently with a single execution of get_subbuf (between offset - * sampling and subbuffer ID exchange). - */ - -#define HALF_ULONG_BITS (CAA_BITS_PER_LONG >> 1) - -#define SB_ID_OFFSET_SHIFT (HALF_ULONG_BITS + 1) -#define SB_ID_OFFSET_COUNT (1UL << SB_ID_OFFSET_SHIFT) -#define SB_ID_OFFSET_MASK (~(SB_ID_OFFSET_COUNT - 1)) -/* - * Lowest bit of top word half belongs to noref. Used only for overwrite mode. - */ -#define SB_ID_NOREF_SHIFT (SB_ID_OFFSET_SHIFT - 1) -#define SB_ID_NOREF_COUNT (1UL << SB_ID_NOREF_SHIFT) -#define SB_ID_NOREF_MASK SB_ID_NOREF_COUNT -/* - * In overwrite mode: lowest half of word is used for index. - * Limit of 2^16 subbuffers per buffer on 32-bit, 2^32 on 64-bit. - * In producer-consumer mode: whole word used for index. - */ -#define SB_ID_INDEX_SHIFT 0 -#define SB_ID_INDEX_COUNT (1UL << SB_ID_INDEX_SHIFT) -#define SB_ID_INDEX_MASK (SB_ID_NOREF_COUNT - 1) - -/* - * Construct the subbuffer id from offset, index and noref. Use only the index - * for producer-consumer mode (offset and noref are only used in overwrite - * mode). - */ -static inline -unsigned long subbuffer_id(const struct lttng_ust_lib_ring_buffer_config *config, - unsigned long offset, unsigned long noref, - unsigned long index) -{ - if (config->mode == RING_BUFFER_OVERWRITE) - return (offset << SB_ID_OFFSET_SHIFT) - | (noref << SB_ID_NOREF_SHIFT) - | index; - else - return index; -} - -/* - * Compare offset with the offset contained within id. Return 1 if the offset - * bits are identical, else 0. - */ -static inline -int subbuffer_id_compare_offset( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - unsigned long id, unsigned long offset) -{ - return (id & SB_ID_OFFSET_MASK) == (offset << SB_ID_OFFSET_SHIFT); -} - -static inline -unsigned long subbuffer_id_get_index(const struct lttng_ust_lib_ring_buffer_config *config, - unsigned long id) -{ - if (config->mode == RING_BUFFER_OVERWRITE) - return id & SB_ID_INDEX_MASK; - else - return id; -} - -static inline -unsigned long subbuffer_id_is_noref(const struct lttng_ust_lib_ring_buffer_config *config, - unsigned long id) -{ - if (config->mode == RING_BUFFER_OVERWRITE) - return !!(id & SB_ID_NOREF_MASK); - else - return 1; -} - -/* - * Only used by reader on subbuffer ID it has exclusive access to. No volatile - * needed. - */ -static inline -void subbuffer_id_set_noref(const struct lttng_ust_lib_ring_buffer_config *config, - unsigned long *id) -{ - if (config->mode == RING_BUFFER_OVERWRITE) - *id |= SB_ID_NOREF_MASK; -} - -static inline -void subbuffer_id_set_noref_offset(const struct lttng_ust_lib_ring_buffer_config *config, - unsigned long *id, unsigned long offset) -{ - unsigned long tmp; - - if (config->mode == RING_BUFFER_OVERWRITE) { - tmp = *id; - tmp &= ~SB_ID_OFFSET_MASK; - tmp |= offset << SB_ID_OFFSET_SHIFT; - tmp |= SB_ID_NOREF_MASK; - /* Volatile store, read concurrently by readers. */ - CMM_ACCESS_ONCE(*id) = tmp; - } -} - -/* No volatile access, since already used locally */ -static inline -void subbuffer_id_clear_noref(const struct lttng_ust_lib_ring_buffer_config *config, - unsigned long *id) -{ - if (config->mode == RING_BUFFER_OVERWRITE) - *id &= ~SB_ID_NOREF_MASK; -} - -/* - * For overwrite mode, cap the number of subbuffers per buffer to: - * 2^16 on 32-bit architectures - * 2^32 on 64-bit architectures - * This is required to fit in the index part of the ID. Return 0 on success, - * -EPERM on failure. - */ -static inline -int subbuffer_id_check_index(const struct lttng_ust_lib_ring_buffer_config *config, - unsigned long num_subbuf) -{ - if (config->mode == RING_BUFFER_OVERWRITE) - return (num_subbuf > (1UL << HALF_ULONG_BITS)) ? -EPERM : 0; - else - return 0; -} - -static inline -int lib_ring_buffer_backend_get_pages(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_ctx *ctx, - struct lttng_ust_lib_ring_buffer_backend_pages **backend_pages) -{ - struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; - struct lttng_ust_lib_ring_buffer_backend *bufb = &ctx_private->buf->backend; - struct channel_backend *chanb = &ctx_private->chan->backend; - struct lttng_ust_shm_handle *handle = ctx_private->chan->handle; - size_t sbidx; - size_t offset = ctx_private->buf_offset; - struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - unsigned long sb_bindex, id; - struct lttng_ust_lib_ring_buffer_backend_pages *_backend_pages; - - offset &= chanb->buf_size - 1; - sbidx = offset >> chanb->subbuf_size_order; - wsb = shmp_index(handle, bufb->buf_wsb, sbidx); - if (caa_unlikely(!wsb)) - return -1; - id = wsb->id; - sb_bindex = subbuffer_id_get_index(config, id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (caa_unlikely(!rpages)) - return -1; - CHAN_WARN_ON(ctx_private->chan, - config->mode == RING_BUFFER_OVERWRITE - && subbuffer_id_is_noref(config, id)); - _backend_pages = shmp(handle, rpages->shmp); - if (caa_unlikely(!_backend_pages)) - return -1; - *backend_pages = _backend_pages; - return 0; -} - -/* Get backend pages from cache. */ -static inline -struct lttng_ust_lib_ring_buffer_backend_pages * - lib_ring_buffer_get_backend_pages_from_ctx( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer_ctx *ctx) -{ - return ctx->priv->backend_pages; -} - -/* - * The ring buffer can count events recorded and overwritten per buffer, - * but it is disabled by default due to its performance overhead. - */ -#ifdef LTTNG_RING_BUFFER_COUNT_EVENTS -static inline -void subbuffer_count_record(const struct lttng_ust_lib_ring_buffer_config *config, - const struct lttng_ust_lib_ring_buffer_ctx *ctx, - struct lttng_ust_lib_ring_buffer_backend *bufb, - unsigned long idx, struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - - backend_pages = lib_ring_buffer_get_backend_pages_from_ctx(config, ctx); - if (caa_unlikely(!backend_pages)) { - if (lib_ring_buffer_backend_get_pages(config, ctx, &backend_pages)) - return; - } - v_inc(config, &backend_pages->records_commit); -} -#else /* LTTNG_RING_BUFFER_COUNT_EVENTS */ -static inline -void subbuffer_count_record(const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - const struct lttng_ust_lib_ring_buffer_ctx *ctx __attribute__((unused)), - struct lttng_ust_lib_ring_buffer_backend *bufb __attribute__((unused)), - unsigned long idx __attribute__((unused)), - struct lttng_ust_shm_handle *handle __attribute__((unused))) -{ -} -#endif /* #else LTTNG_RING_BUFFER_COUNT_EVENTS */ - -/* - * Reader has exclusive subbuffer access for record consumption. No need to - * perform the decrement atomically. - */ -static inline -void subbuffer_consume_record(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_backend *bufb, - struct lttng_ust_shm_handle *handle) -{ - unsigned long sb_bindex; - struct lttng_ust_lib_ring_buffer_channel *chan; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *pages_shmp; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - - sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id); - chan = shmp(handle, bufb->chan); - if (!chan) - return; - pages_shmp = shmp_index(handle, bufb->array, sb_bindex); - if (!pages_shmp) - return; - backend_pages = shmp(handle, pages_shmp->shmp); - if (!backend_pages) - return; - CHAN_WARN_ON(chan, !v_read(config, &backend_pages->records_unread)); - /* Non-atomic decrement protected by exclusive subbuffer access */ - _v_dec(config, &backend_pages->records_unread); - v_inc(config, &bufb->records_read); -} - -static inline -unsigned long subbuffer_get_records_count( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_backend *bufb, - unsigned long idx, - struct lttng_ust_shm_handle *handle) -{ - unsigned long sb_bindex; - struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - - wsb = shmp_index(handle, bufb->buf_wsb, idx); - if (!wsb) - return 0; - sb_bindex = subbuffer_id_get_index(config, wsb->id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (!rpages) - return 0; - backend_pages = shmp(handle, rpages->shmp); - if (!backend_pages) - return 0; - return v_read(config, &backend_pages->records_commit); -} - -/* - * Must be executed at subbuffer delivery when the writer has _exclusive_ - * subbuffer access. See lib_ring_buffer_check_deliver() for details. - * lib_ring_buffer_get_records_count() must be called to get the records - * count before this function, because it resets the records_commit - * count. - */ -static inline -unsigned long subbuffer_count_records_overrun( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_backend *bufb, - unsigned long idx, - struct lttng_ust_shm_handle *handle) -{ - unsigned long overruns, sb_bindex; - struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - - wsb = shmp_index(handle, bufb->buf_wsb, idx); - if (!wsb) - return 0; - sb_bindex = subbuffer_id_get_index(config, wsb->id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (!rpages) - return 0; - backend_pages = shmp(handle, rpages->shmp); - if (!backend_pages) - return 0; - overruns = v_read(config, &backend_pages->records_unread); - v_set(config, &backend_pages->records_unread, - v_read(config, &backend_pages->records_commit)); - v_set(config, &backend_pages->records_commit, 0); - - return overruns; -} - -static inline -void subbuffer_set_data_size(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_backend *bufb, - unsigned long idx, - unsigned long data_size, - struct lttng_ust_shm_handle *handle) -{ - unsigned long sb_bindex; - struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - - wsb = shmp_index(handle, bufb->buf_wsb, idx); - if (!wsb) - return; - sb_bindex = subbuffer_id_get_index(config, wsb->id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (!rpages) - return; - backend_pages = shmp(handle, rpages->shmp); - if (!backend_pages) - return; - backend_pages->data_size = data_size; -} - -static inline -unsigned long subbuffer_get_read_data_size( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_backend *bufb, - struct lttng_ust_shm_handle *handle) -{ - unsigned long sb_bindex; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *pages_shmp; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - - sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id); - pages_shmp = shmp_index(handle, bufb->array, sb_bindex); - if (!pages_shmp) - return 0; - backend_pages = shmp(handle, pages_shmp->shmp); - if (!backend_pages) - return 0; - return backend_pages->data_size; -} - -static inline -unsigned long subbuffer_get_data_size( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_backend *bufb, - unsigned long idx, - struct lttng_ust_shm_handle *handle) -{ - unsigned long sb_bindex; - struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - - wsb = shmp_index(handle, bufb->buf_wsb, idx); - if (!wsb) - return 0; - sb_bindex = subbuffer_id_get_index(config, wsb->id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (!rpages) - return 0; - backend_pages = shmp(handle, rpages->shmp); - if (!backend_pages) - return 0; - return backend_pages->data_size; -} - -static inline -void subbuffer_inc_packet_count( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer_backend *bufb, - unsigned long idx, struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_backend_counts *counts; - - counts = shmp_index(handle, bufb->buf_cnt, idx); - if (!counts) - return; - counts->seq_cnt++; -} - -/** - * lib_ring_buffer_clear_noref - Clear the noref subbuffer flag, called by - * writer. - */ -static inline -void lib_ring_buffer_clear_noref(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_backend *bufb, - unsigned long idx, - struct lttng_ust_shm_handle *handle) -{ - unsigned long id, new_id; - struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; - - if (config->mode != RING_BUFFER_OVERWRITE) - return; - - /* - * Performing a volatile access to read the sb_pages, because we want to - * read a coherent version of the pointer and the associated noref flag. - */ - wsb = shmp_index(handle, bufb->buf_wsb, idx); - if (!wsb) - return; - id = CMM_ACCESS_ONCE(wsb->id); - for (;;) { - /* This check is called on the fast path for each record. */ - if (caa_likely(!subbuffer_id_is_noref(config, id))) { - /* - * Store after load dependency ordering the writes to - * the subbuffer after load and test of the noref flag - * matches the memory barrier implied by the cmpxchg() - * in update_read_sb_index(). - */ - return; /* Already writing to this buffer */ - } - new_id = id; - subbuffer_id_clear_noref(config, &new_id); - new_id = uatomic_cmpxchg(&wsb->id, id, new_id); - if (caa_likely(new_id == id)) - break; - id = new_id; - } -} - -/** - * lib_ring_buffer_set_noref_offset - Set the noref subbuffer flag and offset, - * called by writer. - */ -static inline -void lib_ring_buffer_set_noref_offset(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_backend *bufb, - unsigned long idx, unsigned long offset, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; - struct lttng_ust_lib_ring_buffer_channel *chan; - - if (config->mode != RING_BUFFER_OVERWRITE) - return; - - wsb = shmp_index(handle, bufb->buf_wsb, idx); - if (!wsb) - return; - /* - * Because ring_buffer_set_noref() is only called by a single thread - * (the one which updated the cc_sb value), there are no concurrent - * updates to take care of: other writers have not updated cc_sb, so - * they cannot set the noref flag, and concurrent readers cannot modify - * the pointer because the noref flag is not set yet. - * The smp_wmb() in ring_buffer_commit() takes care of ordering writes - * to the subbuffer before this set noref operation. - * subbuffer_set_noref() uses a volatile store to deal with concurrent - * readers of the noref flag. - */ - chan = shmp(handle, bufb->chan); - if (!chan) - return; - CHAN_WARN_ON(chan, subbuffer_id_is_noref(config, wsb->id)); - /* - * Memory barrier that ensures counter stores are ordered before set - * noref and offset. - */ - cmm_smp_mb(); - subbuffer_id_set_noref_offset(config, &wsb->id, offset); -} - -/** - * update_read_sb_index - Read-side subbuffer index update. - */ -static inline -int update_read_sb_index(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_backend *bufb, - struct channel_backend *chanb __attribute__((unused)), - unsigned long consumed_idx, - unsigned long consumed_count, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; - unsigned long old_id, new_id; - - wsb = shmp_index(handle, bufb->buf_wsb, consumed_idx); - if (caa_unlikely(!wsb)) - return -EPERM; - - if (config->mode == RING_BUFFER_OVERWRITE) { - struct lttng_ust_lib_ring_buffer_channel *chan; - - /* - * Exchange the target writer subbuffer with our own unused - * subbuffer. No need to use CMM_ACCESS_ONCE() here to read the - * old_wpage, because the value read will be confirmed by the - * following cmpxchg(). - */ - old_id = wsb->id; - if (caa_unlikely(!subbuffer_id_is_noref(config, old_id))) - return -EAGAIN; - /* - * Make sure the offset count we are expecting matches the one - * indicated by the writer. - */ - if (caa_unlikely(!subbuffer_id_compare_offset(config, old_id, - consumed_count))) - return -EAGAIN; - chan = shmp(handle, bufb->chan); - if (caa_unlikely(!chan)) - return -EPERM; - CHAN_WARN_ON(chan, !subbuffer_id_is_noref(config, bufb->buf_rsb.id)); - subbuffer_id_set_noref_offset(config, &bufb->buf_rsb.id, - consumed_count); - new_id = uatomic_cmpxchg(&wsb->id, old_id, bufb->buf_rsb.id); - if (caa_unlikely(old_id != new_id)) - return -EAGAIN; - bufb->buf_rsb.id = new_id; - } else { - /* No page exchange, use the writer page directly */ - bufb->buf_rsb.id = wsb->id; - } - return 0; -} - -#ifndef inline_memcpy -#define inline_memcpy(dest, src, n) memcpy(dest, src, n) -#endif - -static inline -void lttng_inline_memcpy(void *dest, const void *src, - unsigned long len) - __attribute__((always_inline)); -static inline -void lttng_inline_memcpy(void *dest, const void *src, - unsigned long len) -{ - switch (len) { - case 1: - *(uint8_t *) dest = *(const uint8_t *) src; - break; - case 2: - *(uint16_t *) dest = *(const uint16_t *) src; - break; - case 4: - *(uint32_t *) dest = *(const uint32_t *) src; - break; - case 8: - *(uint64_t *) dest = *(const uint64_t *) src; - break; - default: - inline_memcpy(dest, src, len); - } -} - -/* - * Use the architecture-specific memcpy implementation for constant-sized - * inputs, but rely on an inline memcpy for length statically unknown. - * The function call to memcpy is just way too expensive for a fast path. - */ -#define lib_ring_buffer_do_copy(config, dest, src, len) \ -do { \ - size_t __len = (len); \ - if (__builtin_constant_p(len)) \ - memcpy(dest, src, __len); \ - else \ - lttng_inline_memcpy(dest, src, __len); \ -} while (0) - -/* - * write len bytes to dest with c - */ -static inline -void lib_ring_buffer_do_memset(char *dest, char c, unsigned long len) -{ - unsigned long i; - - for (i = 0; i < len; i++) - dest[i] = c; -} - -/* arch-agnostic implementation */ - -static inline int lttng_ust_fls(unsigned int x) -{ - int r = 32; - - if (!x) - return 0; - if (!(x & 0xFFFF0000U)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xFF000000U)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xF0000000U)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xC0000000U)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x80000000U)) { - /* No need to bit shift on last operation */ - r -= 1; - } - return r; -} - -static inline int get_count_order(unsigned int count) -{ - int order; - - order = lttng_ust_fls(count) - 1; - if (count & (count - 1)) - order++; - return order; -} - -#endif /* _LTTNG_RING_BUFFER_BACKEND_INTERNAL_H */ diff --git a/src/libringbuffer/backend_types.h b/src/libringbuffer/backend_types.h deleted file mode 100644 index 19d6513d..00000000 --- a/src/libringbuffer/backend_types.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2008-2012 Mathieu Desnoyers - * - * Ring buffer backend (types). - */ - -#ifndef _LTTNG_RING_BUFFER_BACKEND_TYPES_H -#define _LTTNG_RING_BUFFER_BACKEND_TYPES_H - -#include -#include -#include "shm_internal.h" -#include "vatomic.h" - -#define RB_BACKEND_PAGES_PADDING 16 -struct lttng_ust_lib_ring_buffer_backend_pages { - unsigned long mmap_offset; /* offset of the subbuffer in mmap */ - union v_atomic records_commit; /* current records committed count */ - union v_atomic records_unread; /* records to read */ - unsigned long data_size; /* Amount of data to read from subbuf */ - DECLARE_SHMP(char, p); /* Backing memory map */ - char padding[RB_BACKEND_PAGES_PADDING]; -}; - -struct lttng_ust_lib_ring_buffer_backend_subbuffer { - /* Identifier for subbuf backend pages. Exchanged atomically. */ - unsigned long id; /* backend subbuffer identifier */ -}; - -struct lttng_ust_lib_ring_buffer_backend_counts { - /* - * Counter specific to the sub-buffer location within the ring buffer. - * The actual sequence number of the packet within the entire ring - * buffer can be derived from the formula nr_subbuffers * seq_cnt + - * subbuf_idx. - */ - uint64_t seq_cnt; /* packet sequence number */ -}; - -/* - * Forward declaration of frontend-specific channel and ring_buffer. - */ -struct lttng_ust_lib_ring_buffer_channel; -struct lttng_ust_lib_ring_buffer; - -struct lttng_ust_lib_ring_buffer_backend_pages_shmp { - DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_backend_pages, shmp); -}; - -#define RB_BACKEND_RING_BUFFER_PADDING 64 -struct lttng_ust_lib_ring_buffer_backend { - /* Array of ring_buffer_backend_subbuffer for writer */ - DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_backend_subbuffer, buf_wsb); - /* ring_buffer_backend_subbuffer for reader */ - struct lttng_ust_lib_ring_buffer_backend_subbuffer buf_rsb; - /* Array of lib_ring_buffer_backend_counts for the packet counter */ - DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_backend_counts, buf_cnt); - /* - * Pointer array of backend pages, for whole buffer. - * Indexed by ring_buffer_backend_subbuffer identifier (id) index. - */ - DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_backend_pages_shmp, array); - DECLARE_SHMP(char, memory_map); /* memory mapping */ - - DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_channel, chan); /* Associated channel */ - int cpu; /* This buffer's cpu. -1 if global. */ - union v_atomic records_read; /* Number of records read */ - unsigned int allocated:1; /* is buffer allocated ? */ - char padding[RB_BACKEND_RING_BUFFER_PADDING]; -}; - -struct lttng_ust_lib_ring_buffer_shmp { - DECLARE_SHMP(struct lttng_ust_lib_ring_buffer, shmp); /* Channel per-cpu buffers */ -}; - -#define RB_BACKEND_CHANNEL_PADDING 64 -struct channel_backend { - unsigned long buf_size; /* Size of the buffer */ - unsigned long subbuf_size; /* Sub-buffer size */ - unsigned int subbuf_size_order; /* Order of sub-buffer size */ - unsigned int num_subbuf_order; /* - * Order of number of sub-buffers/buffer - * for writer. - */ - unsigned int buf_size_order; /* Order of buffer size */ - unsigned int extra_reader_sb:1; /* has extra reader subbuffer ? */ - unsigned long num_subbuf; /* Number of sub-buffers for writer */ - uint64_t start_tsc; /* Channel creation TSC value */ - DECLARE_SHMP(void *, priv_data);/* Client-specific information */ - struct lttng_ust_lib_ring_buffer_config config; /* Ring buffer configuration */ - char name[NAME_MAX]; /* Channel name */ - char padding[RB_BACKEND_CHANNEL_PADDING]; - struct lttng_ust_lib_ring_buffer_shmp buf[]; -}; - -#endif /* _LTTNG_RING_BUFFER_BACKEND_TYPES_H */ diff --git a/src/libringbuffer/frontend.h b/src/libringbuffer/frontend.h deleted file mode 100644 index 12e21005..00000000 --- a/src/libringbuffer/frontend.h +++ /dev/null @@ -1,297 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2005-2012 Mathieu Desnoyers - * - * Ring Buffer Library Synchronization Header (API). - * - * See ring_buffer_frontend.c for more information on wait-free algorithms. - */ - -#ifndef _LTTNG_RING_BUFFER_FRONTEND_H -#define _LTTNG_RING_BUFFER_FRONTEND_H - -#include -#include - -#include -#include - -#include "smp.h" - -/* Internal helpers */ -#include "frontend_internal.h" - -/* Buffer creation/removal and setup operations */ - -/* - * switch_timer_interval is the time interval (in us) to fill sub-buffers with - * padding to let readers get those sub-buffers. Used for live streaming. - * - * read_timer_interval is the time interval (in us) to wake up pending readers. - * - * buf_addr is a pointer the the beginning of the preallocated buffer contiguous - * address mapping. It is used only by RING_BUFFER_STATIC configuration. It can - * be set to NULL for other backends. - * - * private data is a memory area for configuration data. This memory is - * managed by lib ring buffer. priv_data_align is the alignment required - * for the private data area. - */ - -extern -struct lttng_ust_shm_handle *channel_create(const struct lttng_ust_lib_ring_buffer_config *config, - const char *name, - size_t priv_data_align, - size_t priv_data_size, - void *priv_data_init, - void *priv, - void *buf_addr, - size_t subbuf_size, size_t num_subbuf, - unsigned int switch_timer_interval, - unsigned int read_timer_interval, - const int *stream_fds, int nr_stream_fds, - int64_t blocking_timeout) - __attribute__((visibility("hidden"))); - -/* - * channel_destroy finalizes all channel's buffers, waits for readers to - * release all references, and destroys the channel. - */ -void channel_destroy(struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle, - int consumer) - __attribute__((visibility("hidden"))); - - -/* Buffer read operations */ - -/* - * Iteration on channel cpumask needs to issue a read barrier to match the write - * barrier in cpu hotplug. It orders the cpumask read before read of per-cpu - * buffer data. The per-cpu buffer is never removed by cpu hotplug; teardown is - * only performed at channel destruction. - */ -#define for_each_channel_cpu(cpu, chan) \ - for_each_possible_cpu(cpu) - -extern struct lttng_ust_lib_ring_buffer *channel_get_ring_buffer( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_channel *chan, int cpu, - struct lttng_ust_shm_handle *handle, - int *shm_fd, int *wait_fd, - int *wakeup_fd, - uint64_t *memory_map_size) - __attribute__((visibility("hidden"))); - -extern -int ring_buffer_channel_close_wait_fd(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -extern -int ring_buffer_channel_close_wakeup_fd(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -extern -int ring_buffer_stream_close_wait_fd(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle, - int cpu) - __attribute__((visibility("hidden"))); - -extern -int ring_buffer_stream_close_wakeup_fd(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle, - int cpu) - __attribute__((visibility("hidden"))); - -extern int lib_ring_buffer_open_read(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -extern void lib_ring_buffer_release_read(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -/* - * Initialize signals for ring buffer. Should be called early e.g. by - * main() in the program to affect all threads. - */ -void lib_ringbuffer_signal_init(void) - __attribute__((visibility("hidden"))); - -/* - * Read sequence: snapshot, many get_subbuf/put_subbuf, move_consumer. - */ -extern int lib_ring_buffer_snapshot(struct lttng_ust_lib_ring_buffer *buf, - unsigned long *consumed, - unsigned long *produced, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -extern int lib_ring_buffer_snapshot_sample_positions( - struct lttng_ust_lib_ring_buffer *buf, - unsigned long *consumed, - unsigned long *produced, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -extern void lib_ring_buffer_move_consumer(struct lttng_ust_lib_ring_buffer *buf, - unsigned long consumed_new, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -extern int lib_ring_buffer_get_subbuf(struct lttng_ust_lib_ring_buffer *buf, - unsigned long consumed, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -extern void lib_ring_buffer_put_subbuf(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -/* - * lib_ring_buffer_get_next_subbuf/lib_ring_buffer_put_next_subbuf are helpers - * to read sub-buffers sequentially. - */ -static inline int lib_ring_buffer_get_next_subbuf(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) -{ - int ret; - - ret = lib_ring_buffer_snapshot(buf, &buf->cons_snapshot, - &buf->prod_snapshot, handle); - if (ret) - return ret; - ret = lib_ring_buffer_get_subbuf(buf, buf->cons_snapshot, handle); - return ret; -} - -static inline -void lib_ring_buffer_put_next_subbuf(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_channel *chan; - - chan = shmp(handle, buf->backend.chan); - if (!chan) - return; - lib_ring_buffer_put_subbuf(buf, handle); - lib_ring_buffer_move_consumer(buf, subbuf_align(buf->cons_snapshot, chan), - handle); -} - -extern void channel_reset(struct lttng_ust_lib_ring_buffer_channel *chan) - __attribute__((visibility("hidden"))); - -extern void lib_ring_buffer_reset(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -static inline -unsigned long lib_ring_buffer_get_offset(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf) -{ - return v_read(config, &buf->offset); -} - -static inline -unsigned long lib_ring_buffer_get_consumed( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer *buf) -{ - return uatomic_read(&buf->consumed); -} - -/* - * Must call lib_ring_buffer_is_finalized before reading counters (memory - * ordering enforced with respect to trace teardown). - */ -static inline -int lib_ring_buffer_is_finalized( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer *buf) -{ - int finalized = CMM_ACCESS_ONCE(buf->finalized); - /* - * Read finalized before counters. - */ - cmm_smp_rmb(); - return finalized; -} - -static inline -int lib_ring_buffer_channel_is_finalized(const struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return chan->finalized; -} - -static inline -int lib_ring_buffer_channel_is_disabled(const struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return uatomic_read(&chan->record_disabled); -} - -static inline -unsigned long lib_ring_buffer_get_read_data_size( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) -{ - return subbuffer_get_read_data_size(config, &buf->backend, handle); -} - -static inline -unsigned long lib_ring_buffer_get_records_count( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf) -{ - return v_read(config, &buf->records_count); -} - -static inline -unsigned long lib_ring_buffer_get_records_overrun( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf) -{ - return v_read(config, &buf->records_overrun); -} - -static inline -unsigned long lib_ring_buffer_get_records_lost_full( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf) -{ - return v_read(config, &buf->records_lost_full); -} - -static inline -unsigned long lib_ring_buffer_get_records_lost_wrap( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf) -{ - return v_read(config, &buf->records_lost_wrap); -} - -static inline -unsigned long lib_ring_buffer_get_records_lost_big( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf) -{ - return v_read(config, &buf->records_lost_big); -} - -static inline -unsigned long lib_ring_buffer_get_records_read( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf) -{ - return v_read(config, &buf->backend.records_read); -} - -#endif /* _LTTNG_RING_BUFFER_FRONTEND_H */ diff --git a/src/libringbuffer/frontend_api.h b/src/libringbuffer/frontend_api.h deleted file mode 100644 index ac5e31bf..00000000 --- a/src/libringbuffer/frontend_api.h +++ /dev/null @@ -1,370 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2005-2012 Mathieu Desnoyers - * - * See ring_buffer_frontend.c for more information on wait-free - * algorithms. - * See frontend.h for channel allocation and read-side API. - */ - -#ifndef _LTTNG_RING_BUFFER_FRONTEND_API_H -#define _LTTNG_RING_BUFFER_FRONTEND_API_H - -#include - -#include - -#include "frontend.h" - -/** - * lib_ring_buffer_nesting_inc - Ring buffer recursive use protection. - * - * The rint buffer buffer nesting count is a safety net to ensure tracer - * client code will never trigger an endless recursion. - * Returns a nesting level >= 0 on success, -EPERM on failure (nesting - * count too high). - * - * asm volatile and "memory" clobber prevent the compiler from moving - * instructions out of the ring buffer nesting count. This is required to ensure - * that probe side-effects which can cause recursion (e.g. unforeseen traps, - * divisions by 0, ...) are triggered within the incremented nesting count - * section. - */ -static inline -int lib_ring_buffer_nesting_inc( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused))) -{ - int nesting; - - nesting = ++URCU_TLS(lib_ring_buffer_nesting); - cmm_barrier(); - if (caa_unlikely(nesting >= LIB_RING_BUFFER_MAX_NESTING)) { - WARN_ON_ONCE(1); - URCU_TLS(lib_ring_buffer_nesting)--; - return -EPERM; - } - return nesting - 1; -} - -static inline -int lib_ring_buffer_nesting_count( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused))) -{ - return URCU_TLS(lib_ring_buffer_nesting); -} - -static inline -void lib_ring_buffer_nesting_dec( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused))) -{ - cmm_barrier(); - URCU_TLS(lib_ring_buffer_nesting)--; /* TLS */ -} - -/* - * lib_ring_buffer_try_reserve is called by lib_ring_buffer_reserve(). It is not - * part of the API per se. - * - * returns 0 if reserve ok, or 1 if the slow path must be taken. - */ -static inline -int lib_ring_buffer_try_reserve(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_ctx *ctx, - void *client_ctx, - unsigned long *o_begin, unsigned long *o_end, - unsigned long *o_old, size_t *before_hdr_pad) -{ - struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; - struct lttng_ust_lib_ring_buffer_channel *chan = ctx_private->chan; - struct lttng_ust_lib_ring_buffer *buf = ctx_private->buf; - *o_begin = v_read(config, &buf->offset); - *o_old = *o_begin; - - ctx_private->tsc = lib_ring_buffer_clock_read(chan); - if ((int64_t) ctx_private->tsc == -EIO) - return 1; - - /* - * Prefetch cacheline for read because we have to read the previous - * commit counter to increment it and commit seq value to compare it to - * the commit counter. - */ - //prefetch(&buf->commit_hot[subbuf_index(*o_begin, chan)]); - - if (last_tsc_overflow(config, buf, ctx_private->tsc)) - ctx_private->rflags |= RING_BUFFER_RFLAG_FULL_TSC; - - if (caa_unlikely(subbuf_offset(*o_begin, chan) == 0)) - return 1; - - ctx_private->slot_size = record_header_size(config, chan, *o_begin, - before_hdr_pad, ctx, client_ctx); - ctx_private->slot_size += - lttng_ust_lib_ring_buffer_align(*o_begin + ctx_private->slot_size, - ctx->largest_align) + ctx->data_size; - if (caa_unlikely((subbuf_offset(*o_begin, chan) + ctx_private->slot_size) - > chan->backend.subbuf_size)) - return 1; - - /* - * Record fits in the current buffer and we are not on a switch - * boundary. It's safe to write. - */ - *o_end = *o_begin + ctx_private->slot_size; - - if (caa_unlikely((subbuf_offset(*o_end, chan)) == 0)) - /* - * The offset_end will fall at the very beginning of the next - * subbuffer. - */ - return 1; - - return 0; -} - -/** - * lib_ring_buffer_reserve - Reserve space in a ring buffer. - * @config: ring buffer instance configuration. - * @ctx: ring buffer context. (input and output) Must be already initialized. - * - * Atomic wait-free slot reservation. The reserved space starts at the context - * "pre_offset". Its length is "slot_size". The associated time-stamp is "tsc". - * - * Return : - * 0 on success. - * -EAGAIN if channel is disabled. - * -ENOSPC if event size is too large for packet. - * -ENOBUFS if there is currently not enough space in buffer for the event. - * -EIO if data cannot be written into the buffer for any other reason. - */ - -static inline -int lib_ring_buffer_reserve(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_ctx *ctx, - void *client_ctx) -{ - struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; - struct lttng_ust_lib_ring_buffer_channel *chan = ctx_private->chan; - struct lttng_ust_shm_handle *handle = chan->handle; - struct lttng_ust_lib_ring_buffer *buf; - unsigned long o_begin, o_end, o_old; - size_t before_hdr_pad = 0; - - if (caa_unlikely(uatomic_read(&chan->record_disabled))) - return -EAGAIN; - - if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { - ctx_private->reserve_cpu = lttng_ust_get_cpu(); - buf = shmp(handle, chan->backend.buf[ctx_private->reserve_cpu].shmp); - } else { - buf = shmp(handle, chan->backend.buf[0].shmp); - } - if (caa_unlikely(!buf)) - return -EIO; - if (caa_unlikely(uatomic_read(&buf->record_disabled))) - return -EAGAIN; - ctx_private->buf = buf; - - /* - * Perform retryable operations. - */ - if (caa_unlikely(lib_ring_buffer_try_reserve(config, ctx, client_ctx, &o_begin, - &o_end, &o_old, &before_hdr_pad))) - goto slow_path; - - if (caa_unlikely(v_cmpxchg(config, &buf->offset, o_old, o_end) - != o_old)) - goto slow_path; - - /* - * Atomically update last_tsc. This update races against concurrent - * atomic updates, but the race will always cause supplementary full TSC - * record headers, never the opposite (missing a full TSC record header - * when it would be needed). - */ - save_last_tsc(config, buf, ctx_private->tsc); - - /* - * Push the reader if necessary - */ - lib_ring_buffer_reserve_push_reader(buf, chan, o_end - 1); - - /* - * Clear noref flag for this subbuffer. - */ - lib_ring_buffer_clear_noref(config, &buf->backend, - subbuf_index(o_end - 1, chan), handle); - - ctx_private->pre_offset = o_begin; - ctx_private->buf_offset = o_begin + before_hdr_pad; - return 0; -slow_path: - return lib_ring_buffer_reserve_slow(ctx, client_ctx); -} - -/** - * lib_ring_buffer_switch - Perform a sub-buffer switch for a per-cpu buffer. - * @config: ring buffer instance configuration. - * @buf: buffer - * @mode: buffer switch mode (SWITCH_ACTIVE or SWITCH_FLUSH) - * - * This operation is completely reentrant : can be called while tracing is - * active with absolutely no lock held. - * - * Note, however, that as a v_cmpxchg is used for some atomic operations and - * requires to be executed locally for per-CPU buffers, this function must be - * called from the CPU which owns the buffer for a ACTIVE flush, with preemption - * disabled, for RING_BUFFER_SYNC_PER_CPU configuration. - */ -static inline -void lib_ring_buffer_switch( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer *buf, enum switch_mode mode, - struct lttng_ust_shm_handle *handle) -{ - lib_ring_buffer_switch_slow(buf, mode, handle); -} - -/* See ring_buffer_frontend_api.h for lib_ring_buffer_reserve(). */ - -/** - * lib_ring_buffer_commit - Commit an record. - * @config: ring buffer instance configuration. - * @ctx: ring buffer context. (input arguments only) - * - * Atomic unordered slot commit. Increments the commit count in the - * specified sub-buffer, and delivers it if necessary. - */ -static inline -void lib_ring_buffer_commit(const struct lttng_ust_lib_ring_buffer_config *config, - const struct lttng_ust_lib_ring_buffer_ctx *ctx) -{ - struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; - struct lttng_ust_lib_ring_buffer_channel *chan = ctx_private->chan; - struct lttng_ust_shm_handle *handle = chan->handle; - struct lttng_ust_lib_ring_buffer *buf = ctx_private->buf; - unsigned long offset_end = ctx_private->buf_offset; - unsigned long endidx = subbuf_index(offset_end - 1, chan); - unsigned long commit_count; - struct commit_counters_hot *cc_hot = shmp_index(handle, - buf->commit_hot, endidx); - - if (caa_unlikely(!cc_hot)) - return; - - /* - * Must count record before incrementing the commit count. - */ - subbuffer_count_record(config, ctx, &buf->backend, endidx, handle); - - /* - * Order all writes to buffer before the commit count update that will - * determine that the subbuffer is full. - */ - cmm_smp_wmb(); - - v_add(config, ctx_private->slot_size, &cc_hot->cc); - - /* - * commit count read can race with concurrent OOO commit count updates. - * This is only needed for lib_ring_buffer_check_deliver (for - * non-polling delivery only) and for - * lib_ring_buffer_write_commit_counter. The race can only cause the - * counter to be read with the same value more than once, which could - * cause : - * - Multiple delivery for the same sub-buffer (which is handled - * gracefully by the reader code) if the value is for a full - * sub-buffer. It's important that we can never miss a sub-buffer - * delivery. Re-reading the value after the v_add ensures this. - * - Reading a commit_count with a higher value that what was actually - * added to it for the lib_ring_buffer_write_commit_counter call - * (again caused by a concurrent committer). It does not matter, - * because this function is interested in the fact that the commit - * count reaches back the reserve offset for a specific sub-buffer, - * which is completely independent of the order. - */ - commit_count = v_read(config, &cc_hot->cc); - - lib_ring_buffer_check_deliver(config, buf, chan, offset_end - 1, - commit_count, endidx, handle, ctx_private->tsc); - /* - * Update used size at each commit. It's needed only for extracting - * ring_buffer buffers from vmcore, after crash. - */ - lib_ring_buffer_write_commit_counter(config, buf, chan, - offset_end, commit_count, handle, cc_hot); -} - -/** - * lib_ring_buffer_try_discard_reserve - Try discarding a record. - * @config: ring buffer instance configuration. - * @ctx: ring buffer context. (input arguments only) - * - * Only succeeds if no other record has been written after the record to - * discard. If discard fails, the record must be committed to the buffer. - * - * Returns 0 upon success, -EPERM if the record cannot be discarded. - */ -static inline -int lib_ring_buffer_try_discard_reserve(const struct lttng_ust_lib_ring_buffer_config *config, - const struct lttng_ust_lib_ring_buffer_ctx *ctx) -{ - struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; - struct lttng_ust_lib_ring_buffer *buf = ctx_private->buf; - unsigned long end_offset = ctx_private->pre_offset + ctx_private->slot_size; - - /* - * We need to ensure that if the cmpxchg succeeds and discards the - * record, the next record will record a full TSC, because it cannot - * rely on the last_tsc associated with the discarded record to detect - * overflows. The only way to ensure this is to set the last_tsc to 0 - * (assuming no 64-bit TSC overflow), which forces to write a 64-bit - * timestamp in the next record. - * - * Note: if discard fails, we must leave the TSC in the record header. - * It is needed to keep track of TSC overflows for the following - * records. - */ - save_last_tsc(config, buf, 0ULL); - - if (caa_likely(v_cmpxchg(config, &buf->offset, end_offset, ctx_private->pre_offset) - != end_offset)) - return -EPERM; - else - return 0; -} - -static inline -void channel_record_disable( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer_channel *chan) -{ - uatomic_inc(&chan->record_disabled); -} - -static inline -void channel_record_enable( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer_channel *chan) -{ - uatomic_dec(&chan->record_disabled); -} - -static inline -void lib_ring_buffer_record_disable( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer *buf) -{ - uatomic_inc(&buf->record_disabled); -} - -static inline -void lib_ring_buffer_record_enable( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer *buf) -{ - uatomic_dec(&buf->record_disabled); -} - -#endif /* _LTTNG_RING_BUFFER_FRONTEND_API_H */ diff --git a/src/libringbuffer/frontend_internal.h b/src/libringbuffer/frontend_internal.h deleted file mode 100644 index 7d905d4b..00000000 --- a/src/libringbuffer/frontend_internal.h +++ /dev/null @@ -1,367 +0,0 @@ -/* - * SPDX-License-Identifier: (LGPL-2.1-only or GPL-2.0-only) - * - * Copyright (C) 2005-2012 Mathieu Desnoyers - * - * Ring Buffer Library Synchronization Header (internal helpers). - * - * See ring_buffer_frontend.c for more information on wait-free algorithms. - */ - -#ifndef _LTTNG_RING_BUFFER_FRONTEND_INTERNAL_H -#define _LTTNG_RING_BUFFER_FRONTEND_INTERNAL_H - -#include -#include -#include -#include -#include - -#include -#include "ringbuffer-config.h" -#include "backend_types.h" -#include "backend_internal.h" -#include "frontend_types.h" -#include "shm.h" - -/* Buffer offset macros */ - -/* buf_trunc mask selects only the buffer number. */ -static inline -unsigned long buf_trunc(unsigned long offset, - struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return offset & ~(chan->backend.buf_size - 1); - -} - -/* Select the buffer number value (counter). */ -static inline -unsigned long buf_trunc_val(unsigned long offset, - struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return buf_trunc(offset, chan) >> chan->backend.buf_size_order; -} - -/* buf_offset mask selects only the offset within the current buffer. */ -static inline -unsigned long buf_offset(unsigned long offset, - struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return offset & (chan->backend.buf_size - 1); -} - -/* subbuf_offset mask selects the offset within the current subbuffer. */ -static inline -unsigned long subbuf_offset(unsigned long offset, - struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return offset & (chan->backend.subbuf_size - 1); -} - -/* subbuf_trunc mask selects the subbuffer number. */ -static inline -unsigned long subbuf_trunc(unsigned long offset, - struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return offset & ~(chan->backend.subbuf_size - 1); -} - -/* subbuf_align aligns the offset to the next subbuffer. */ -static inline -unsigned long subbuf_align(unsigned long offset, - struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return (offset + chan->backend.subbuf_size) - & ~(chan->backend.subbuf_size - 1); -} - -/* subbuf_index returns the index of the current subbuffer within the buffer. */ -static inline -unsigned long subbuf_index(unsigned long offset, - struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return buf_offset(offset, chan) >> chan->backend.subbuf_size_order; -} - -/* - * Last TSC comparison functions. Check if the current TSC overflows tsc_bits - * bits from the last TSC read. When overflows are detected, the full 64-bit - * timestamp counter should be written in the record header. Reads and writes - * last_tsc atomically. - */ - -#if (CAA_BITS_PER_LONG == 32) -static inline -void save_last_tsc(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc) -{ - if (config->tsc_bits == 0 || config->tsc_bits == 64) - return; - - /* - * Ensure the compiler performs this update in a single instruction. - */ - v_set(config, &buf->last_tsc, (unsigned long)(tsc >> config->tsc_bits)); -} - -static inline -int last_tsc_overflow(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc) -{ - unsigned long tsc_shifted; - - if (config->tsc_bits == 0 || config->tsc_bits == 64) - return 0; - - tsc_shifted = (unsigned long)(tsc >> config->tsc_bits); - if (caa_unlikely(tsc_shifted - - (unsigned long)v_read(config, &buf->last_tsc))) - return 1; - else - return 0; -} -#else -static inline -void save_last_tsc(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc) -{ - if (config->tsc_bits == 0 || config->tsc_bits == 64) - return; - - v_set(config, &buf->last_tsc, (unsigned long)tsc); -} - -static inline -int last_tsc_overflow(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc) -{ - if (config->tsc_bits == 0 || config->tsc_bits == 64) - return 0; - - if (caa_unlikely((tsc - v_read(config, &buf->last_tsc)) - >> config->tsc_bits)) - return 1; - else - return 0; -} -#endif - -extern -int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx, - void *client_ctx) - __attribute__((visibility("hidden"))); - -extern -void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, - enum switch_mode mode, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -void lib_ring_buffer_check_deliver_slow(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - unsigned long offset, - unsigned long commit_count, - unsigned long idx, - struct lttng_ust_shm_handle *handle, - uint64_t tsc) - __attribute__((visibility("hidden"))); - -/* Buffer write helpers */ - -static inline -void lib_ring_buffer_reserve_push_reader(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - unsigned long offset) -{ - unsigned long consumed_old, consumed_new; - - do { - consumed_old = uatomic_read(&buf->consumed); - /* - * If buffer is in overwrite mode, push the reader consumed - * count if the write position has reached it and we are not - * at the first iteration (don't push the reader farther than - * the writer). This operation can be done concurrently by many - * writers in the same buffer, the writer being at the farthest - * write position sub-buffer index in the buffer being the one - * which will win this loop. - */ - if (caa_unlikely(subbuf_trunc(offset, chan) - - subbuf_trunc(consumed_old, chan) - >= chan->backend.buf_size)) - consumed_new = subbuf_align(consumed_old, chan); - else - return; - } while (caa_unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old, - consumed_new) != consumed_old)); -} - -/* - * Move consumed position to the beginning of subbuffer in which the - * write offset is. Should only be used on ring buffers that are not - * actively being written into, because clear_reader does not take into - * account the commit counters when moving the consumed position, which - * can make concurrent trace producers or consumers observe consumed - * position further than the write offset, which breaks ring buffer - * algorithm guarantees. - */ -static inline -void lib_ring_buffer_clear_reader(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_channel *chan; - const struct lttng_ust_lib_ring_buffer_config *config; - unsigned long offset, consumed_old, consumed_new; - - chan = shmp(handle, buf->backend.chan); - if (!chan) - return; - config = &chan->backend.config; - - do { - offset = v_read(config, &buf->offset); - consumed_old = uatomic_read(&buf->consumed); - CHAN_WARN_ON(chan, (long) (subbuf_trunc(offset, chan) - - subbuf_trunc(consumed_old, chan)) - < 0); - consumed_new = subbuf_trunc(offset, chan); - } while (caa_unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old, - consumed_new) != consumed_old)); -} - -static inline -int lib_ring_buffer_pending_data(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return !!subbuf_offset(v_read(config, &buf->offset), chan); -} - -static inline -unsigned long lib_ring_buffer_get_data_size(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, - unsigned long idx, - struct lttng_ust_shm_handle *handle) -{ - return subbuffer_get_data_size(config, &buf->backend, idx, handle); -} - -/* - * Check if all space reservation in a buffer have been committed. This helps - * knowing if an execution context is nested (for per-cpu buffers only). - * This is a very specific ftrace use-case, so we keep this as "internal" API. - */ -static inline -int lib_ring_buffer_reserve_committed(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle) -{ - unsigned long offset, idx, commit_count; - struct commit_counters_hot *cc_hot; - - CHAN_WARN_ON(chan, config->alloc != RING_BUFFER_ALLOC_PER_CPU); - CHAN_WARN_ON(chan, config->sync != RING_BUFFER_SYNC_PER_CPU); - - /* - * Read offset and commit count in a loop so they are both read - * atomically wrt interrupts. By deal with interrupt concurrency by - * restarting both reads if the offset has been pushed. Note that given - * we only have to deal with interrupt concurrency here, an interrupt - * modifying the commit count will also modify "offset", so it is safe - * to only check for offset modifications. - */ - do { - offset = v_read(config, &buf->offset); - idx = subbuf_index(offset, chan); - cc_hot = shmp_index(handle, buf->commit_hot, idx); - if (caa_unlikely(!cc_hot)) - return 0; - commit_count = v_read(config, &cc_hot->cc); - } while (offset != v_read(config, &buf->offset)); - - return ((buf_trunc(offset, chan) >> chan->backend.num_subbuf_order) - - (commit_count & chan->commit_count_mask) == 0); -} - -/* - * Receive end of subbuffer TSC as parameter. It has been read in the - * space reservation loop of either reserve or switch, which ensures it - * progresses monotonically with event records in the buffer. Therefore, - * it ensures that the end timestamp of a subbuffer is <= begin - * timestamp of the following subbuffers. - */ -static inline -void lib_ring_buffer_check_deliver(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - unsigned long offset, - unsigned long commit_count, - unsigned long idx, - struct lttng_ust_shm_handle *handle, - uint64_t tsc) -{ - unsigned long old_commit_count = commit_count - - chan->backend.subbuf_size; - - /* Check if all commits have been done */ - if (caa_unlikely((buf_trunc(offset, chan) >> chan->backend.num_subbuf_order) - - (old_commit_count & chan->commit_count_mask) == 0)) - lib_ring_buffer_check_deliver_slow(config, buf, chan, offset, - commit_count, idx, handle, tsc); -} - -/* - * lib_ring_buffer_write_commit_counter - * - * For flight recording. must be called after commit. - * This function increments the subbuffer's commit_seq counter each time the - * commit count reaches back the reserve offset (modulo subbuffer size). It is - * useful for crash dump. - */ -static inline -void lib_ring_buffer_write_commit_counter( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf __attribute__((unused)), - struct lttng_ust_lib_ring_buffer_channel *chan, - unsigned long buf_offset, - unsigned long commit_count, - struct lttng_ust_shm_handle *handle __attribute__((unused)), - struct commit_counters_hot *cc_hot) -{ - unsigned long commit_seq_old; - - if (config->oops != RING_BUFFER_OOPS_CONSISTENCY) - return; - - /* - * subbuf_offset includes commit_count_mask. We can simply - * compare the offsets within the subbuffer without caring about - * buffer full/empty mismatch because offset is never zero here - * (subbuffer header and record headers have non-zero length). - */ - if (caa_unlikely(subbuf_offset(buf_offset - commit_count, chan))) - return; - - commit_seq_old = v_read(config, &cc_hot->seq); - if (caa_likely((long) (commit_seq_old - commit_count) < 0)) - v_set(config, &cc_hot->seq, commit_count); -} - -extern int lib_ring_buffer_create(struct lttng_ust_lib_ring_buffer *buf, - struct channel_backend *chanb, int cpu, - struct lttng_ust_shm_handle *handle, - struct shm_object *shmobj) - __attribute__((visibility("hidden"))); - -extern void lib_ring_buffer_free(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -/* Keep track of trap nesting inside ring buffer code */ -extern DECLARE_URCU_TLS(unsigned int, lib_ring_buffer_nesting) - __attribute__((visibility("hidden"))); - -#endif /* _LTTNG_RING_BUFFER_FRONTEND_INTERNAL_H */ diff --git a/src/libringbuffer/frontend_types.h b/src/libringbuffer/frontend_types.h deleted file mode 100644 index 467ece73..00000000 --- a/src/libringbuffer/frontend_types.h +++ /dev/null @@ -1,330 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2005-2012 Mathieu Desnoyers - * - * Ring Buffer Library Synchronization Header (types). - * - * See ring_buffer_frontend.c for more information on wait-free algorithms. - */ - -#ifndef _LTTNG_RING_BUFFER_FRONTEND_TYPES_H -#define _LTTNG_RING_BUFFER_FRONTEND_TYPES_H - -#include -#include -#include /* for timer_t */ - -#include -#include - -#include -#include "ringbuffer-config.h" -#include "common/logging.h" -#include "backend_types.h" -#include "shm_internal.h" -#include "shm_types.h" -#include "vatomic.h" - -#define LIB_RING_BUFFER_MAX_NESTING 5 - -/* - * A switch is done during tracing or as a final flush after tracing (so it - * won't write in the new sub-buffer). - */ -enum switch_mode { SWITCH_ACTIVE, SWITCH_FLUSH }; - -/* channel: collection of per-cpu ring buffers. */ -#define RB_CHANNEL_PADDING 32 -struct lttng_ust_lib_ring_buffer_channel { - int record_disabled; - unsigned long commit_count_mask; /* - * Commit count mask, removing - * the MSBs corresponding to - * bits used to represent the - * subbuffer index. - */ - - unsigned long switch_timer_interval; /* Buffer flush (us) */ - timer_t switch_timer; - int switch_timer_enabled; - - unsigned long read_timer_interval; /* Reader wakeup (us) */ - timer_t read_timer; - int read_timer_enabled; - - int finalized; /* Has channel been finalized */ - size_t priv_data_offset; /* Offset of private data channel config */ - unsigned int nr_streams; /* Number of streams */ - struct lttng_ust_shm_handle *handle; - /* Extended options. */ - union { - struct { - int32_t blocking_timeout_ms; - void *priv; /* Private data pointer. */ - } s; - char padding[RB_CHANNEL_PADDING]; - } u; - /* - * Associated backend contains a variable-length array. Needs to - * be last member. - */ - struct channel_backend backend; /* Associated backend */ -} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); - -/* Per-subbuffer commit counters used on the hot path */ -#define RB_COMMIT_COUNT_HOT_PADDING 16 -struct commit_counters_hot { - union v_atomic cc; /* Commit counter */ - union v_atomic seq; /* Consecutive commits */ - char padding[RB_COMMIT_COUNT_HOT_PADDING]; -} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); - -/* Per-subbuffer commit counters used only on cold paths */ -#define RB_COMMIT_COUNT_COLD_PADDING 24 -struct commit_counters_cold { - union v_atomic cc_sb; /* Incremented _once_ at sb switch */ - char padding[RB_COMMIT_COUNT_COLD_PADDING]; -} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); - -/* ring buffer state */ -#define RB_CRASH_DUMP_ABI_LEN 256 -#define RB_RING_BUFFER_PADDING 60 - -#define RB_CRASH_DUMP_ABI_MAGIC_LEN 16 - -/* - * The 128-bit magic number is xor'd in the process data so it does not - * cause a false positive when searching for buffers by scanning memory. - * The actual magic number is: - * 0x17, 0x7B, 0xF1, 0x77, 0xBF, 0x17, 0x7B, 0xF1, - * 0x77, 0xBF, 0x17, 0x7B, 0xF1, 0x77, 0xBF, 0x17, - */ -#define RB_CRASH_DUMP_ABI_MAGIC_XOR \ - { \ - 0x17 ^ 0xFF, 0x7B ^ 0xFF, 0xF1 ^ 0xFF, 0x77 ^ 0xFF, \ - 0xBF ^ 0xFF, 0x17 ^ 0xFF, 0x7B ^ 0xFF, 0xF1 ^ 0xFF, \ - 0x77 ^ 0xFF, 0xBF ^ 0xFF, 0x17 ^ 0xFF, 0x7B ^ 0xFF, \ - 0xF1 ^ 0xFF, 0x77 ^ 0xFF, 0xBF ^ 0xFF, 0x17 ^ 0xFF, \ - } - -#define RB_CRASH_ENDIAN 0x1234 - -#define RB_CRASH_DUMP_ABI_MAJOR 0 -#define RB_CRASH_DUMP_ABI_MINOR 0 - -enum lttng_crash_type { - LTTNG_CRASH_TYPE_UST = 0, - LTTNG_CRASH_TYPE_KERNEL = 1, -}; - -struct lttng_crash_abi { - uint8_t magic[RB_CRASH_DUMP_ABI_MAGIC_LEN]; - uint64_t mmap_length; /* Overall lenght of crash record */ - uint16_t endian; /* - * { 0x12, 0x34 }: big endian - * { 0x34, 0x12 }: little endian - */ - uint16_t major; /* Major number. */ - uint16_t minor; /* Minor number. */ - uint8_t word_size; /* Word size (bytes). */ - uint8_t layout_type; /* enum lttng_crash_type */ - - struct { - uint32_t prod_offset; - uint32_t consumed_offset; - uint32_t commit_hot_array; - uint32_t commit_hot_seq; - uint32_t buf_wsb_array; - uint32_t buf_wsb_id; - uint32_t sb_array; - uint32_t sb_array_shmp_offset; - uint32_t sb_backend_p_offset; - uint32_t content_size; - uint32_t packet_size; - } __attribute__((packed)) offset; - struct { - uint8_t prod_offset; - uint8_t consumed_offset; - uint8_t commit_hot_seq; - uint8_t buf_wsb_id; - uint8_t sb_array_shmp_offset; - uint8_t sb_backend_p_offset; - uint8_t content_size; - uint8_t packet_size; - } __attribute__((packed)) length; - struct { - uint32_t commit_hot_array; - uint32_t buf_wsb_array; - uint32_t sb_array; - } __attribute__((packed)) stride; - - uint64_t buf_size; /* Size of the buffer */ - uint64_t subbuf_size; /* Sub-buffer size */ - uint64_t num_subbuf; /* Number of sub-buffers for writer */ - uint32_t mode; /* Buffer mode: 0: overwrite, 1: discard */ -} __attribute__((packed)); - -struct lttng_ust_lib_ring_buffer { - /* First 32 bytes are for the buffer crash dump ABI */ - struct lttng_crash_abi crash_abi; - - /* 32 bytes cache-hot cacheline */ - union v_atomic __attribute__((aligned(32))) offset; - /* Current offset in the buffer */ - DECLARE_SHMP(struct commit_counters_hot, commit_hot); - /* Commit count per sub-buffer */ - long consumed; /* - * Current offset in the buffer - * standard atomic access (shared) - */ - int record_disabled; - /* End of cache-hot 32 bytes cacheline */ - - union v_atomic last_tsc; /* - * Last timestamp written in the buffer. - */ - - struct lttng_ust_lib_ring_buffer_backend backend; - /* Associated backend */ - - DECLARE_SHMP(struct commit_counters_cold, commit_cold); - /* Commit count per sub-buffer */ - DECLARE_SHMP(uint64_t, ts_end); /* - * timestamp_end per sub-buffer. - * Time is sampled by the - * switch_*_end() callbacks - * which are the last space - * reservation performed in the - * sub-buffer before it can be - * fully committed and - * delivered. This time value is - * then read by the deliver - * callback, performed by the - * last commit before the buffer - * becomes readable. - */ - long active_readers; /* - * Active readers count - * standard atomic access (shared) - */ - /* Dropped records */ - union v_atomic records_lost_full; /* Buffer full */ - union v_atomic records_lost_wrap; /* Nested wrap-around */ - union v_atomic records_lost_big; /* Events too big */ - union v_atomic records_count; /* Number of records written */ - union v_atomic records_overrun; /* Number of overwritten records */ - //wait_queue_head_t read_wait; /* reader buffer-level wait queue */ - int finalized; /* buffer has been finalized */ - unsigned long get_subbuf_consumed; /* Read-side consumed */ - unsigned long prod_snapshot; /* Producer count snapshot */ - unsigned long cons_snapshot; /* Consumer count snapshot */ - unsigned int get_subbuf:1; /* Sub-buffer being held by reader */ - /* shmp pointer to self */ - DECLARE_SHMP(struct lttng_ust_lib_ring_buffer, self); - char padding[RB_RING_BUFFER_PADDING]; -} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); - -/* - * ring buffer private context - * - * Private context passed to lib_ring_buffer_reserve(), lib_ring_buffer_commit(), - * lib_ring_buffer_try_discard_reserve(), lttng_ust_lib_ring_buffer_align_ctx() and - * lib_ring_buffer_write(). - * - * This context is allocated on an internal shadow-stack by a successful reserve - * operation, used by align/write, and freed by commit. - */ - -struct lttng_ust_lib_ring_buffer_ctx_private { - /* input received by lib_ring_buffer_reserve(). */ - struct lttng_ust_lib_ring_buffer_ctx *pub; - struct lttng_ust_lib_ring_buffer_channel *chan; /* channel */ - - /* output from lib_ring_buffer_reserve() */ - int reserve_cpu; /* processor id updated by the reserve */ - size_t slot_size; /* size of the reserved slot */ - unsigned long buf_offset; /* offset following the record header */ - unsigned long pre_offset; /* - * Initial offset position _before_ - * the record is written. Positioned - * prior to record header alignment - * padding. - */ - uint64_t tsc; /* time-stamp counter value */ - unsigned int rflags; /* reservation flags */ - void *ip; /* caller ip address */ - - struct lttng_ust_lib_ring_buffer *buf; /* - * buffer corresponding to processor id - * for this channel - */ - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; -}; - -static inline -void *channel_get_private_config(struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return ((char *) chan) + chan->priv_data_offset; -} - -static inline -void *channel_get_private(struct lttng_ust_lib_ring_buffer_channel *chan) -{ - return chan->u.s.priv; -} - -static inline -void channel_set_private(struct lttng_ust_lib_ring_buffer_channel *chan, void *priv) -{ - chan->u.s.priv = priv; -} - -#ifndef __rb_same_type -#define __rb_same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) -#endif - -/* - * Issue warnings and disable channels upon internal error. - * Can receive struct lttng_ust_lib_ring_buffer or struct lttng_ust_lib_ring_buffer_backend - * parameters. - */ -#define CHAN_WARN_ON(c, cond) \ - ({ \ - struct lttng_ust_lib_ring_buffer_channel *__chan; \ - int _____ret = caa_unlikely(cond); \ - if (_____ret) { \ - if (__rb_same_type(*(c), struct channel_backend)) \ - __chan = caa_container_of((void *) (c), \ - struct lttng_ust_lib_ring_buffer_channel, \ - backend); \ - else if (__rb_same_type(*(c), \ - struct lttng_ust_lib_ring_buffer_channel)) \ - __chan = (void *) (c); \ - else \ - BUG_ON(1); \ - uatomic_inc(&__chan->record_disabled); \ - WARN_ON(1); \ - } \ - _____ret = _____ret; /* For clang "unused result". */ \ - }) - -/** - * lttng_ust_lib_ring_buffer_align_ctx - Align context offset on "alignment" - * @ctx: ring buffer context. - */ -static inline -void lttng_ust_lib_ring_buffer_align_ctx(struct lttng_ust_lib_ring_buffer_ctx *ctx, - size_t alignment) - lttng_ust_notrace; -static inline -void lttng_ust_lib_ring_buffer_align_ctx(struct lttng_ust_lib_ring_buffer_ctx *ctx, - size_t alignment) -{ - struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; - - ctx_private->buf_offset += lttng_ust_lib_ring_buffer_align(ctx_private->buf_offset, - alignment); -} - -#endif /* _LTTNG_RING_BUFFER_FRONTEND_TYPES_H */ diff --git a/src/libringbuffer/getcpu.h b/src/libringbuffer/getcpu.h deleted file mode 100644 index 52c74413..00000000 --- a/src/libringbuffer/getcpu.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2011 Mathieu Desnoyers - */ - -#ifndef _LTTNG_GETCPU_H -#define _LTTNG_GETCPU_H - -#include -#include -#include - -void lttng_ust_getcpu_init(void) - __attribute__((visibility("hidden"))); - -extern int (*lttng_get_cpu)(void) - __attribute__((visibility("hidden"))); - -#ifdef LTTNG_UST_DEBUG_VALGRIND - -/* - * Fallback on cpu 0 if liblttng-ust is build with Valgrind support. - * get_cpu() returns the current CPU number. It may change due to - * migration, so it is only statistically accurate. - */ -static inline -int lttng_ust_get_cpu_internal(void) -{ - return 0; -} - -#else - -/* - * sched_getcpu. - */ -#ifdef __linux__ - -#if !HAVE_SCHED_GETCPU -#include -#define __getcpu(cpu, node, cache) syscall(__NR_getcpu, cpu, node, cache) -/* - * If getcpu is not implemented in the kernel, use cpu 0 as fallback. - */ -static inline -int lttng_ust_get_cpu_internal(void) -{ - int cpu, ret; - - ret = __getcpu(&cpu, NULL, NULL); - if (caa_unlikely(ret < 0)) - return 0; - return cpu; -} -#else /* HAVE_SCHED_GETCPU */ -#include - -/* - * If getcpu is not implemented in the kernel, use cpu 0 as fallback. - */ -static inline -int lttng_ust_get_cpu_internal(void) -{ - int cpu; - - cpu = sched_getcpu(); - if (caa_unlikely(cpu < 0)) - return 0; - return cpu; -} -#endif /* HAVE_SCHED_GETCPU */ - -#elif (defined(__FreeBSD__) || defined(__CYGWIN__)) - -/* - * FreeBSD and Cygwin do not allow query of CPU ID. Always use CPU - * number 0, with the assocated performance degradation on SMP. - */ -static inline -int lttng_ust_get_cpu_internal(void) -{ - return 0; -} - -#else -#error "Please add support for your OS into liblttng-ust/compat.h." -#endif - -#endif - -static inline -int lttng_ust_get_cpu(void) -{ - int (*getcpu)(void) = CMM_LOAD_SHARED(lttng_get_cpu); - - if (caa_likely(!getcpu)) { - return lttng_ust_get_cpu_internal(); - } else { - return getcpu(); - } -} - -#endif /* _LTTNG_GETCPU_H */ diff --git a/src/libringbuffer/mmap.h b/src/libringbuffer/mmap.h deleted file mode 100644 index 39c06246..00000000 --- a/src/libringbuffer/mmap.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2019 Jonathan Rajotte - */ - -#ifndef _LTTNG_MMAP_H -#define _LTTNG_MMAP_H - -#include - -#if defined(__linux__) && defined(MAP_POPULATE) -# define LTTNG_MAP_POPULATE MAP_POPULATE -#else -# define LTTNG_MAP_POPULATE 0 -#endif /* __linux__ && MAP_POPULATE */ - -#endif /* _LTTNG_MMAP_H */ diff --git a/src/libringbuffer/nohz.h b/src/libringbuffer/nohz.h deleted file mode 100644 index 5d416780..00000000 --- a/src/libringbuffer/nohz.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2011-2012 Mathieu Desnoyers - */ - -#ifndef _LTTNG_RING_BUFFER_NOHZ_H -#define _LTTNG_RING_BUFFER_NOHZ_H - -#ifdef CONFIG_LIB_RING_BUFFER -void lib_ring_buffer_tick_nohz_flush(void) - __attribute__((visibility("hidden"))); - -void lib_ring_buffer_tick_nohz_stop(void) - __attribute__((visibility("hidden"))); - -void lib_ring_buffer_tick_nohz_restart(void) - __attribute__((visibility("hidden"))); - -#else - -static inline void lib_ring_buffer_tick_nohz_flush(void) -{ -} - -static inline void lib_ring_buffer_tick_nohz_stop(void) -{ -} - -static inline void lib_ring_buffer_tick_nohz_restart(void) -{ -} -#endif - -#endif /* _LTTNG_RING_BUFFER_NOHZ_H */ diff --git a/src/libringbuffer/rb-init.h b/src/libringbuffer/rb-init.h deleted file mode 100644 index b3eb7568..00000000 --- a/src/libringbuffer/rb-init.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2012-2016 Mathieu Desnoyers - */ - -#ifndef _LTTNG_UST_LIB_RINGBUFFER_RB_INIT_H -#define _LTTNG_UST_LIB_RINGBUFFER_RB_INIT_H - -void lttng_fixup_ringbuffer_tls(void) - __attribute__((visibility("hidden"))); - -void lttng_ust_ringbuffer_set_allow_blocking(void) - __attribute__((visibility("hidden"))); - -#endif /* _LTTNG_UST_LIB_RINGBUFFER_RB_INIT_H */ diff --git a/src/libringbuffer/ring_buffer_backend.c b/src/libringbuffer/ring_buffer_backend.c deleted file mode 100644 index b0a7c513..00000000 --- a/src/libringbuffer/ring_buffer_backend.c +++ /dev/null @@ -1,593 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2005-2012 Mathieu Desnoyers - */ - -#define _LGPL_SOURCE -#include -#include -#include -#include -#include - -#include -#include - -#include "ringbuffer-config.h" -#include "vatomic.h" -#include "backend.h" -#include "frontend.h" -#include "smp.h" -#include "shm.h" -#include "common/align.h" - -/** - * lib_ring_buffer_backend_allocate - allocate a channel buffer - * @config: ring buffer instance configuration - * @buf: the buffer struct - * @size: total size of the buffer - * @num_subbuf: number of subbuffers - * @extra_reader_sb: need extra subbuffer for reader - */ -static -int lib_ring_buffer_backend_allocate(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_backend *bufb, - size_t size __attribute__((unused)), size_t num_subbuf, - int extra_reader_sb, - struct lttng_ust_shm_handle *handle, - struct shm_object *shmobj) -{ - struct channel_backend *chanb; - unsigned long subbuf_size, mmap_offset = 0; - unsigned long num_subbuf_alloc; - unsigned long i; - long page_size; - - chanb = &shmp(handle, bufb->chan)->backend; - if (!chanb) - return -EINVAL; - - subbuf_size = chanb->subbuf_size; - num_subbuf_alloc = num_subbuf; - - if (extra_reader_sb) - num_subbuf_alloc++; - - page_size = LTTNG_UST_PAGE_SIZE; - if (page_size <= 0) { - goto page_size_error; - } - - align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer_backend_pages_shmp)); - set_shmp(bufb->array, zalloc_shm(shmobj, - sizeof(struct lttng_ust_lib_ring_buffer_backend_pages_shmp) * num_subbuf_alloc)); - if (caa_unlikely(!shmp(handle, bufb->array))) - goto array_error; - - /* - * This is the largest element (the buffer pages) which needs to - * be aligned on page size. - */ - align_shm(shmobj, page_size); - set_shmp(bufb->memory_map, zalloc_shm(shmobj, - subbuf_size * num_subbuf_alloc)); - if (caa_unlikely(!shmp(handle, bufb->memory_map))) - goto memory_map_error; - - /* Allocate backend pages array elements */ - for (i = 0; i < num_subbuf_alloc; i++) { - align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer_backend_pages)); - set_shmp(shmp_index(handle, bufb->array, i)->shmp, - zalloc_shm(shmobj, - sizeof(struct lttng_ust_lib_ring_buffer_backend_pages))); - if (!shmp(handle, shmp_index(handle, bufb->array, i)->shmp)) - goto free_array; - } - - /* Allocate write-side subbuffer table */ - align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer_backend_subbuffer)); - set_shmp(bufb->buf_wsb, zalloc_shm(shmobj, - sizeof(struct lttng_ust_lib_ring_buffer_backend_subbuffer) - * num_subbuf)); - if (caa_unlikely(!shmp(handle, bufb->buf_wsb))) - goto free_array; - - for (i = 0; i < num_subbuf; i++) { - struct lttng_ust_lib_ring_buffer_backend_subbuffer *sb; - - sb = shmp_index(handle, bufb->buf_wsb, i); - if (!sb) - goto free_array; - sb->id = subbuffer_id(config, 0, 1, i); - } - - /* Assign read-side subbuffer table */ - if (extra_reader_sb) - bufb->buf_rsb.id = subbuffer_id(config, 0, 1, - num_subbuf_alloc - 1); - else - bufb->buf_rsb.id = subbuffer_id(config, 0, 1, 0); - - /* Allocate subbuffer packet counter table */ - align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer_backend_counts)); - set_shmp(bufb->buf_cnt, zalloc_shm(shmobj, - sizeof(struct lttng_ust_lib_ring_buffer_backend_counts) - * num_subbuf)); - if (caa_unlikely(!shmp(handle, bufb->buf_cnt))) - goto free_wsb; - - /* Assign pages to page index */ - for (i = 0; i < num_subbuf_alloc; i++) { - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *sbp; - struct lttng_ust_lib_ring_buffer_backend_pages *pages; - struct shm_ref ref; - - ref.index = bufb->memory_map._ref.index; - ref.offset = bufb->memory_map._ref.offset; - ref.offset += i * subbuf_size; - - sbp = shmp_index(handle, bufb->array, i); - if (!sbp) - goto free_array; - pages = shmp(handle, sbp->shmp); - if (!pages) - goto free_array; - set_shmp(pages->p, ref); - if (config->output == RING_BUFFER_MMAP) { - pages->mmap_offset = mmap_offset; - mmap_offset += subbuf_size; - } - } - return 0; - -free_wsb: - /* bufb->buf_wsb will be freed by shm teardown */ -free_array: - /* bufb->array[i] will be freed by shm teardown */ -memory_map_error: - /* bufb->array will be freed by shm teardown */ -array_error: -page_size_error: - return -ENOMEM; -} - -int lib_ring_buffer_backend_create(struct lttng_ust_lib_ring_buffer_backend *bufb, - struct channel_backend *chanb, int cpu, - struct lttng_ust_shm_handle *handle, - struct shm_object *shmobj) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config; - - set_shmp(bufb->chan, handle->chan._ref); - bufb->cpu = cpu; - - return lib_ring_buffer_backend_allocate(config, bufb, chanb->buf_size, - chanb->num_subbuf, - chanb->extra_reader_sb, - handle, shmobj); -} - -void lib_ring_buffer_backend_reset(struct lttng_ust_lib_ring_buffer_backend *bufb, - struct lttng_ust_shm_handle *handle) -{ - struct channel_backend *chanb; - const struct lttng_ust_lib_ring_buffer_config *config; - unsigned long num_subbuf_alloc; - unsigned int i; - - chanb = &shmp(handle, bufb->chan)->backend; - if (!chanb) - return; - config = &chanb->config; - - num_subbuf_alloc = chanb->num_subbuf; - if (chanb->extra_reader_sb) - num_subbuf_alloc++; - - for (i = 0; i < chanb->num_subbuf; i++) { - struct lttng_ust_lib_ring_buffer_backend_subbuffer *sb; - - sb = shmp_index(handle, bufb->buf_wsb, i); - if (!sb) - return; - sb->id = subbuffer_id(config, 0, 1, i); - } - if (chanb->extra_reader_sb) - bufb->buf_rsb.id = subbuffer_id(config, 0, 1, - num_subbuf_alloc - 1); - else - bufb->buf_rsb.id = subbuffer_id(config, 0, 1, 0); - - for (i = 0; i < num_subbuf_alloc; i++) { - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *sbp; - struct lttng_ust_lib_ring_buffer_backend_pages *pages; - - sbp = shmp_index(handle, bufb->array, i); - if (!sbp) - return; - pages = shmp(handle, sbp->shmp); - if (!pages) - return; - /* Don't reset mmap_offset */ - v_set(config, &pages->records_commit, 0); - v_set(config, &pages->records_unread, 0); - pages->data_size = 0; - /* Don't reset backend page and virt addresses */ - } - /* Don't reset num_pages_per_subbuf, cpu, allocated */ - v_set(config, &bufb->records_read, 0); -} - -/* - * The frontend is responsible for also calling ring_buffer_backend_reset for - * each buffer when calling channel_backend_reset. - */ -void channel_backend_reset(struct channel_backend *chanb) -{ - struct lttng_ust_lib_ring_buffer_channel *chan = caa_container_of(chanb, - struct lttng_ust_lib_ring_buffer_channel, backend); - const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config; - - /* - * Don't reset buf_size, subbuf_size, subbuf_size_order, - * num_subbuf_order, buf_size_order, extra_reader_sb, num_subbuf, - * priv, notifiers, config, cpumask and name. - */ - chanb->start_tsc = config->cb.ring_buffer_clock_read(chan); -} - -/** - * channel_backend_init - initialize a channel backend - * @chanb: channel backend - * @name: channel name - * @config: client ring buffer configuration - * @parent: dentry of parent directory, %NULL for root directory - * @subbuf_size: size of sub-buffers (> page size, power of 2) - * @num_subbuf: number of sub-buffers (power of 2) - * @lttng_ust_shm_handle: shared memory handle - * @stream_fds: stream file descriptors. - * - * Returns channel pointer if successful, %NULL otherwise. - * - * Creates per-cpu channel buffers using the sizes and attributes - * specified. The created channel buffer files will be named - * name_0...name_N-1. File permissions will be %S_IRUSR. - * - * Called with CPU hotplug disabled. - */ -int channel_backend_init(struct channel_backend *chanb, - const char *name, - const struct lttng_ust_lib_ring_buffer_config *config, - size_t subbuf_size, size_t num_subbuf, - struct lttng_ust_shm_handle *handle, - const int *stream_fds) -{ - struct lttng_ust_lib_ring_buffer_channel *chan = caa_container_of(chanb, - struct lttng_ust_lib_ring_buffer_channel, backend); - unsigned int i; - int ret; - size_t shmsize = 0, num_subbuf_alloc; - long page_size; - - if (!name) - return -EPERM; - - page_size = LTTNG_UST_PAGE_SIZE; - if (page_size <= 0) { - return -ENOMEM; - } - /* Check that the subbuffer size is larger than a page. */ - if (subbuf_size < page_size) - return -EINVAL; - - /* - * Make sure the number of subbuffers and subbuffer size are - * power of 2, and nonzero. - */ - if (!subbuf_size || (subbuf_size & (subbuf_size - 1))) - return -EINVAL; - if (!num_subbuf || (num_subbuf & (num_subbuf - 1))) - return -EINVAL; - /* - * Overwrite mode buffers require at least 2 subbuffers per - * buffer. - */ - if (config->mode == RING_BUFFER_OVERWRITE && num_subbuf < 2) - return -EINVAL; - - ret = subbuffer_id_check_index(config, num_subbuf); - if (ret) - return ret; - - chanb->buf_size = num_subbuf * subbuf_size; - chanb->subbuf_size = subbuf_size; - chanb->buf_size_order = get_count_order(chanb->buf_size); - chanb->subbuf_size_order = get_count_order(subbuf_size); - chanb->num_subbuf_order = get_count_order(num_subbuf); - chanb->extra_reader_sb = - (config->mode == RING_BUFFER_OVERWRITE) ? 1 : 0; - chanb->num_subbuf = num_subbuf; - strncpy(chanb->name, name, NAME_MAX); - chanb->name[NAME_MAX - 1] = '\0'; - memcpy(&chanb->config, config, sizeof(*config)); - - /* Per-cpu buffer size: control (prior to backend) */ - shmsize = lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer)); - shmsize += sizeof(struct lttng_ust_lib_ring_buffer); - shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct commit_counters_hot)); - shmsize += sizeof(struct commit_counters_hot) * num_subbuf; - shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct commit_counters_cold)); - shmsize += sizeof(struct commit_counters_cold) * num_subbuf; - /* Sampled timestamp end */ - shmsize += lttng_ust_offset_align(shmsize, __alignof__(uint64_t)); - shmsize += sizeof(uint64_t) * num_subbuf; - - /* Per-cpu buffer size: backend */ - /* num_subbuf + 1 is the worse case */ - num_subbuf_alloc = num_subbuf + 1; - shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_backend_pages_shmp)); - shmsize += sizeof(struct lttng_ust_lib_ring_buffer_backend_pages_shmp) * num_subbuf_alloc; - shmsize += lttng_ust_offset_align(shmsize, page_size); - shmsize += subbuf_size * num_subbuf_alloc; - shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_backend_pages)); - shmsize += sizeof(struct lttng_ust_lib_ring_buffer_backend_pages) * num_subbuf_alloc; - shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_backend_subbuffer)); - shmsize += sizeof(struct lttng_ust_lib_ring_buffer_backend_subbuffer) * num_subbuf; - shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_backend_counts)); - shmsize += sizeof(struct lttng_ust_lib_ring_buffer_backend_counts) * num_subbuf; - - if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { - struct lttng_ust_lib_ring_buffer *buf; - /* - * We need to allocate for all possible cpus. - */ - for_each_possible_cpu(i) { - struct shm_object *shmobj; - - shmobj = shm_object_table_alloc(handle->table, shmsize, - SHM_OBJECT_SHM, stream_fds[i], i); - if (!shmobj) - goto end; - align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer)); - set_shmp(chanb->buf[i].shmp, zalloc_shm(shmobj, sizeof(struct lttng_ust_lib_ring_buffer))); - buf = shmp(handle, chanb->buf[i].shmp); - if (!buf) - goto end; - set_shmp(buf->self, chanb->buf[i].shmp._ref); - ret = lib_ring_buffer_create(buf, chanb, i, - handle, shmobj); - if (ret) - goto free_bufs; /* cpu hotplug locked */ - } - } else { - struct shm_object *shmobj; - struct lttng_ust_lib_ring_buffer *buf; - - shmobj = shm_object_table_alloc(handle->table, shmsize, - SHM_OBJECT_SHM, stream_fds[0], -1); - if (!shmobj) - goto end; - align_shm(shmobj, __alignof__(struct lttng_ust_lib_ring_buffer)); - set_shmp(chanb->buf[0].shmp, zalloc_shm(shmobj, sizeof(struct lttng_ust_lib_ring_buffer))); - buf = shmp(handle, chanb->buf[0].shmp); - if (!buf) - goto end; - set_shmp(buf->self, chanb->buf[0].shmp._ref); - ret = lib_ring_buffer_create(buf, chanb, -1, - handle, shmobj); - if (ret) - goto free_bufs; - } - chanb->start_tsc = config->cb.ring_buffer_clock_read(chan); - - return 0; - -free_bufs: - /* We only free the buffer data upon shm teardown */ -end: - return -ENOMEM; -} - -/** - * channel_backend_free - destroy the channel - * @chan: the channel - * - * Destroy all channel buffers and frees the channel. - */ -void channel_backend_free(struct channel_backend *chanb __attribute__((unused)), - struct lttng_ust_shm_handle *handle __attribute__((unused))) -{ - /* SHM teardown takes care of everything */ -} - -/** - * lib_ring_buffer_read - read data from ring_buffer_buffer. - * @bufb : buffer backend - * @offset : offset within the buffer - * @dest : destination address - * @len : length to copy to destination - * - * Should be protected by get_subbuf/put_subbuf. - * Returns the length copied. - */ -size_t lib_ring_buffer_read(struct lttng_ust_lib_ring_buffer_backend *bufb, size_t offset, - void *dest, size_t len, struct lttng_ust_shm_handle *handle) -{ - struct channel_backend *chanb; - const struct lttng_ust_lib_ring_buffer_config *config; - ssize_t orig_len; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - unsigned long sb_bindex, id; - void *src; - - chanb = &shmp(handle, bufb->chan)->backend; - if (!chanb) - return 0; - config = &chanb->config; - orig_len = len; - offset &= chanb->buf_size - 1; - - if (caa_unlikely(!len)) - return 0; - id = bufb->buf_rsb.id; - sb_bindex = subbuffer_id_get_index(config, id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (!rpages) - return 0; - /* - * Underlying layer should never ask for reads across - * subbuffers. - */ - CHAN_WARN_ON(chanb, offset >= chanb->buf_size); - CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE - && subbuffer_id_is_noref(config, id)); - backend_pages = shmp(handle, rpages->shmp); - if (!backend_pages) - return 0; - src = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); - if (caa_unlikely(!src)) - return 0; - memcpy(dest, src, len); - return orig_len; -} - -/** - * lib_ring_buffer_read_cstr - read a C-style string from ring_buffer. - * @bufb : buffer backend - * @offset : offset within the buffer - * @dest : destination address - * @len : destination's length - * - * Return string's length, or -EINVAL on error. - * Should be protected by get_subbuf/put_subbuf. - * Destination length should be at least 1 to hold '\0'. - */ -int lib_ring_buffer_read_cstr(struct lttng_ust_lib_ring_buffer_backend *bufb, size_t offset, - void *dest, size_t len, struct lttng_ust_shm_handle *handle) -{ - struct channel_backend *chanb; - const struct lttng_ust_lib_ring_buffer_config *config; - ssize_t string_len, orig_offset; - char *str; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - unsigned long sb_bindex, id; - - chanb = &shmp(handle, bufb->chan)->backend; - if (!chanb) - return -EINVAL; - config = &chanb->config; - if (caa_unlikely(!len)) - return -EINVAL; - offset &= chanb->buf_size - 1; - orig_offset = offset; - id = bufb->buf_rsb.id; - sb_bindex = subbuffer_id_get_index(config, id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (!rpages) - return -EINVAL; - /* - * Underlying layer should never ask for reads across - * subbuffers. - */ - CHAN_WARN_ON(chanb, offset >= chanb->buf_size); - CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE - && subbuffer_id_is_noref(config, id)); - backend_pages = shmp(handle, rpages->shmp); - if (!backend_pages) - return -EINVAL; - str = shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); - if (caa_unlikely(!str)) - return -EINVAL; - string_len = strnlen(str, len); - if (dest && len) { - memcpy(dest, str, string_len); - ((char *)dest)[0] = 0; - } - return offset - orig_offset; -} - -/** - * lib_ring_buffer_read_offset_address - get address of a buffer location - * @bufb : buffer backend - * @offset : offset within the buffer. - * - * Return the address where a given offset is located (for read). - * Should be used to get the current subbuffer header pointer. Given we know - * it's never on a page boundary, it's safe to read/write directly - * from/to this address, as long as the read/write is never bigger than - * a page size. - */ -void *lib_ring_buffer_read_offset_address(struct lttng_ust_lib_ring_buffer_backend *bufb, - size_t offset, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - struct channel_backend *chanb; - const struct lttng_ust_lib_ring_buffer_config *config; - unsigned long sb_bindex, id; - - chanb = &shmp(handle, bufb->chan)->backend; - if (!chanb) - return NULL; - config = &chanb->config; - offset &= chanb->buf_size - 1; - id = bufb->buf_rsb.id; - sb_bindex = subbuffer_id_get_index(config, id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (!rpages) - return NULL; - CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE - && subbuffer_id_is_noref(config, id)); - backend_pages = shmp(handle, rpages->shmp); - if (!backend_pages) - return NULL; - return shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); -} - -/** - * lib_ring_buffer_offset_address - get address of a location within the buffer - * @bufb : buffer backend - * @offset : offset within the buffer. - * - * Return the address where a given offset is located. - * Should be used to get the current subbuffer header pointer. Given we know - * it's always at the beginning of a page, it's safe to write directly to this - * address, as long as the write is never bigger than a page size. - */ -void *lib_ring_buffer_offset_address(struct lttng_ust_lib_ring_buffer_backend *bufb, - size_t offset, - struct lttng_ust_shm_handle *handle) -{ - size_t sbidx; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - struct channel_backend *chanb; - const struct lttng_ust_lib_ring_buffer_config *config; - unsigned long sb_bindex, id; - struct lttng_ust_lib_ring_buffer_backend_subbuffer *sb; - - chanb = &shmp(handle, bufb->chan)->backend; - if (!chanb) - return NULL; - config = &chanb->config; - offset &= chanb->buf_size - 1; - sbidx = offset >> chanb->subbuf_size_order; - sb = shmp_index(handle, bufb->buf_wsb, sbidx); - if (!sb) - return NULL; - id = sb->id; - sb_bindex = subbuffer_id_get_index(config, id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (!rpages) - return NULL; - CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE - && subbuffer_id_is_noref(config, id)); - backend_pages = shmp(handle, rpages->shmp); - if (!backend_pages) - return NULL; - return shmp_index(handle, backend_pages->p, offset & (chanb->subbuf_size - 1)); -} diff --git a/src/libringbuffer/ring_buffer_frontend.c b/src/libringbuffer/ring_buffer_frontend.c deleted file mode 100644 index 296368f3..00000000 --- a/src/libringbuffer/ring_buffer_frontend.c +++ /dev/null @@ -1,2581 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2005-2012 Mathieu Desnoyers - * - * Ring buffer wait-free buffer synchronization. Producer-consumer and flight - * recorder (overwrite) modes. See thesis: - * - * Desnoyers, Mathieu (2009), "Low-Impact Operating System Tracing", Ph.D. - * dissertation, Ecole Polytechnique de Montreal. - * http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12.pdf - * - * - Algorithm presentation in Chapter 5: - * "Lockless Multi-Core High-Throughput Buffering". - * - Algorithm formal verification in Section 8.6: - * "Formal verification of LTTng" - * - * Author: - * Mathieu Desnoyers - * - * Inspired from LTT and RelayFS: - * Karim Yaghmour - * Tom Zanussi - * Bob Wisniewski - * And from K42 : - * Bob Wisniewski - * - * Buffer reader semantic : - * - * - get_subbuf_size - * while buffer is not finalized and empty - * - get_subbuf - * - if return value != 0, continue - * - splice one subbuffer worth of data to a pipe - * - splice the data from pipe to disk/network - * - put_subbuf - */ - -#define _LGPL_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "common/macros.h" - -#include -#include - -#include "smp.h" -#include "ringbuffer-config.h" -#include "vatomic.h" -#include "backend.h" -#include "frontend.h" -#include "shm.h" -#include "rb-init.h" -#include "../liblttng-ust/compat.h" /* For ENODATA */ - -/* Print DBG() messages about events lost only every 1048576 hits */ -#define DBG_PRINT_NR_LOST (1UL << 20) - -#define LTTNG_UST_RB_SIG_FLUSH SIGRTMIN -#define LTTNG_UST_RB_SIG_READ SIGRTMIN + 1 -#define LTTNG_UST_RB_SIG_TEARDOWN SIGRTMIN + 2 -#define CLOCKID CLOCK_MONOTONIC -#define LTTNG_UST_RING_BUFFER_GET_RETRY 10 -#define LTTNG_UST_RING_BUFFER_RETRY_DELAY_MS 10 -#define RETRY_DELAY_MS 100 /* 100 ms. */ - -/* - * Non-static to ensure the compiler does not optimize away the xor. - */ -uint8_t lttng_crash_magic_xor[] = RB_CRASH_DUMP_ABI_MAGIC_XOR; - -/* - * Use POSIX SHM: shm_open(3) and shm_unlink(3). - * close(2) to close the fd returned by shm_open. - * shm_unlink releases the shared memory object name. - * ftruncate(2) sets the size of the memory object. - * mmap/munmap maps the shared memory obj to a virtual address in the - * calling proceess (should be done both in libust and consumer). - * See shm_overview(7) for details. - * Pass file descriptor returned by shm_open(3) to ltt-sessiond through - * a UNIX socket. - * - * Since we don't need to access the object using its name, we can - * immediately shm_unlink(3) it, and only keep the handle with its file - * descriptor. - */ - -/* - * Internal structure representing offsets to use at a sub-buffer switch. - */ -struct switch_offsets { - unsigned long begin, end, old; - size_t pre_header_padding, size; - unsigned int switch_new_start:1, switch_new_end:1, switch_old_start:1, - switch_old_end:1; -}; - -DEFINE_URCU_TLS(unsigned int, lib_ring_buffer_nesting); - -/* - * wakeup_fd_mutex protects wakeup fd use by timer from concurrent - * close. - */ -static pthread_mutex_t wakeup_fd_mutex = PTHREAD_MUTEX_INITIALIZER; - -static -void lib_ring_buffer_print_errors(struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_lib_ring_buffer *buf, int cpu, - struct lttng_ust_shm_handle *handle); - -/* - * Handle timer teardown race wrt memory free of private data by - * ring buffer signals are handled by a single thread, which permits - * a synchronization point between handling of each signal. - * Protected by the lock within the structure. - */ -struct timer_signal_data { - pthread_t tid; /* thread id managing signals */ - int setup_done; - int qs_done; - pthread_mutex_t lock; -}; - -static struct timer_signal_data timer_signal = { - .tid = 0, - .setup_done = 0, - .qs_done = 0, - .lock = PTHREAD_MUTEX_INITIALIZER, -}; - -static bool lttng_ust_allow_blocking; - -void lttng_ust_ringbuffer_set_allow_blocking(void) -{ - lttng_ust_allow_blocking = true; -} - -/* Get blocking timeout, in ms */ -static int lttng_ust_ringbuffer_get_timeout(struct lttng_ust_lib_ring_buffer_channel *chan) -{ - if (!lttng_ust_allow_blocking) - return 0; - return chan->u.s.blocking_timeout_ms; -} - -/** - * lib_ring_buffer_reset - Reset ring buffer to initial values. - * @buf: Ring buffer. - * - * Effectively empty the ring buffer. Should be called when the buffer is not - * used for writing. The ring buffer can be opened for reading, but the reader - * should not be using the iterator concurrently with reset. The previous - * current iterator record is reset. - */ -void lib_ring_buffer_reset(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_channel *chan; - const struct lttng_ust_lib_ring_buffer_config *config; - unsigned int i; - - chan = shmp(handle, buf->backend.chan); - if (!chan) - return; - config = &chan->backend.config; - /* - * Reset iterator first. It will put the subbuffer if it currently holds - * it. - */ - v_set(config, &buf->offset, 0); - for (i = 0; i < chan->backend.num_subbuf; i++) { - struct commit_counters_hot *cc_hot; - struct commit_counters_cold *cc_cold; - uint64_t *ts_end; - - cc_hot = shmp_index(handle, buf->commit_hot, i); - if (!cc_hot) - return; - cc_cold = shmp_index(handle, buf->commit_cold, i); - if (!cc_cold) - return; - ts_end = shmp_index(handle, buf->ts_end, i); - if (!ts_end) - return; - v_set(config, &cc_hot->cc, 0); - v_set(config, &cc_hot->seq, 0); - v_set(config, &cc_cold->cc_sb, 0); - *ts_end = 0; - } - uatomic_set(&buf->consumed, 0); - uatomic_set(&buf->record_disabled, 0); - v_set(config, &buf->last_tsc, 0); - lib_ring_buffer_backend_reset(&buf->backend, handle); - /* Don't reset number of active readers */ - v_set(config, &buf->records_lost_full, 0); - v_set(config, &buf->records_lost_wrap, 0); - v_set(config, &buf->records_lost_big, 0); - v_set(config, &buf->records_count, 0); - v_set(config, &buf->records_overrun, 0); - buf->finalized = 0; -} - -/** - * channel_reset - Reset channel to initial values. - * @chan: Channel. - * - * Effectively empty the channel. Should be called when the channel is not used - * for writing. The channel can be opened for reading, but the reader should not - * be using the iterator concurrently with reset. The previous current iterator - * record is reset. - */ -void channel_reset(struct lttng_ust_lib_ring_buffer_channel *chan) -{ - /* - * Reset iterators first. Will put the subbuffer if held for reading. - */ - uatomic_set(&chan->record_disabled, 0); - /* Don't reset commit_count_mask, still valid */ - channel_backend_reset(&chan->backend); - /* Don't reset switch/read timer interval */ - /* Don't reset notifiers and notifier enable bits */ - /* Don't reset reader reference count */ -} - -static -void init_crash_abi(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_crash_abi *crash_abi, - struct lttng_ust_lib_ring_buffer *buf, - struct channel_backend *chanb, - struct shm_object *shmobj, - struct lttng_ust_shm_handle *handle) -{ - int i; - - for (i = 0; i < RB_CRASH_DUMP_ABI_MAGIC_LEN; i++) - crash_abi->magic[i] = lttng_crash_magic_xor[i] ^ 0xFF; - crash_abi->mmap_length = shmobj->memory_map_size; - crash_abi->endian = RB_CRASH_ENDIAN; - crash_abi->major = RB_CRASH_DUMP_ABI_MAJOR; - crash_abi->minor = RB_CRASH_DUMP_ABI_MINOR; - crash_abi->word_size = sizeof(unsigned long); - crash_abi->layout_type = LTTNG_CRASH_TYPE_UST; - - /* Offset of fields */ - crash_abi->offset.prod_offset = - (uint32_t) ((char *) &buf->offset - (char *) buf); - crash_abi->offset.consumed_offset = - (uint32_t) ((char *) &buf->consumed - (char *) buf); - crash_abi->offset.commit_hot_array = - (uint32_t) ((char *) shmp(handle, buf->commit_hot) - (char *) buf); - crash_abi->offset.commit_hot_seq = - offsetof(struct commit_counters_hot, seq); - crash_abi->offset.buf_wsb_array = - (uint32_t) ((char *) shmp(handle, buf->backend.buf_wsb) - (char *) buf); - crash_abi->offset.buf_wsb_id = - offsetof(struct lttng_ust_lib_ring_buffer_backend_subbuffer, id); - crash_abi->offset.sb_array = - (uint32_t) ((char *) shmp(handle, buf->backend.array) - (char *) buf); - crash_abi->offset.sb_array_shmp_offset = - offsetof(struct lttng_ust_lib_ring_buffer_backend_pages_shmp, - shmp._ref.offset); - crash_abi->offset.sb_backend_p_offset = - offsetof(struct lttng_ust_lib_ring_buffer_backend_pages, - p._ref.offset); - - /* Field length */ - crash_abi->length.prod_offset = sizeof(buf->offset); - crash_abi->length.consumed_offset = sizeof(buf->consumed); - crash_abi->length.commit_hot_seq = - sizeof(((struct commit_counters_hot *) NULL)->seq); - crash_abi->length.buf_wsb_id = - sizeof(((struct lttng_ust_lib_ring_buffer_backend_subbuffer *) NULL)->id); - crash_abi->length.sb_array_shmp_offset = - sizeof(((struct lttng_ust_lib_ring_buffer_backend_pages_shmp *) NULL)->shmp._ref.offset); - crash_abi->length.sb_backend_p_offset = - sizeof(((struct lttng_ust_lib_ring_buffer_backend_pages *) NULL)->p._ref.offset); - - /* Array stride */ - crash_abi->stride.commit_hot_array = - sizeof(struct commit_counters_hot); - crash_abi->stride.buf_wsb_array = - sizeof(struct lttng_ust_lib_ring_buffer_backend_subbuffer); - crash_abi->stride.sb_array = - sizeof(struct lttng_ust_lib_ring_buffer_backend_pages_shmp); - - /* Buffer constants */ - crash_abi->buf_size = chanb->buf_size; - crash_abi->subbuf_size = chanb->subbuf_size; - crash_abi->num_subbuf = chanb->num_subbuf; - crash_abi->mode = (uint32_t) chanb->config.mode; - - if (config->cb.content_size_field) { - size_t offset, length; - - config->cb.content_size_field(config, &offset, &length); - crash_abi->offset.content_size = offset; - crash_abi->length.content_size = length; - } else { - crash_abi->offset.content_size = 0; - crash_abi->length.content_size = 0; - } - if (config->cb.packet_size_field) { - size_t offset, length; - - config->cb.packet_size_field(config, &offset, &length); - crash_abi->offset.packet_size = offset; - crash_abi->length.packet_size = length; - } else { - crash_abi->offset.packet_size = 0; - crash_abi->length.packet_size = 0; - } -} - -/* - * Must be called under cpu hotplug protection. - */ -int lib_ring_buffer_create(struct lttng_ust_lib_ring_buffer *buf, - struct channel_backend *chanb, int cpu, - struct lttng_ust_shm_handle *handle, - struct shm_object *shmobj) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config; - struct lttng_ust_lib_ring_buffer_channel *chan = caa_container_of(chanb, - struct lttng_ust_lib_ring_buffer_channel, backend); - struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; - struct lttng_ust_lib_ring_buffer_channel *shmp_chan; - struct commit_counters_hot *cc_hot; - void *priv = channel_get_private_config(chan); - size_t subbuf_header_size; - uint64_t tsc; - int ret; - - /* Test for cpu hotplug */ - if (buf->backend.allocated) - return 0; - - align_shm(shmobj, __alignof__(struct commit_counters_hot)); - set_shmp(buf->commit_hot, - zalloc_shm(shmobj, - sizeof(struct commit_counters_hot) * chan->backend.num_subbuf)); - if (!shmp(handle, buf->commit_hot)) { - return -ENOMEM; - } - - align_shm(shmobj, __alignof__(struct commit_counters_cold)); - set_shmp(buf->commit_cold, - zalloc_shm(shmobj, - sizeof(struct commit_counters_cold) * chan->backend.num_subbuf)); - if (!shmp(handle, buf->commit_cold)) { - ret = -ENOMEM; - goto free_commit; - } - - align_shm(shmobj, __alignof__(uint64_t)); - set_shmp(buf->ts_end, - zalloc_shm(shmobj, - sizeof(uint64_t) * chan->backend.num_subbuf)); - if (!shmp(handle, buf->ts_end)) { - ret = -ENOMEM; - goto free_commit_cold; - } - - - ret = lib_ring_buffer_backend_create(&buf->backend, &chan->backend, - cpu, handle, shmobj); - if (ret) { - goto free_init; - } - - /* - * Write the subbuffer header for first subbuffer so we know the total - * duration of data gathering. - */ - subbuf_header_size = config->cb.subbuffer_header_size(); - v_set(config, &buf->offset, subbuf_header_size); - wsb = shmp_index(handle, buf->backend.buf_wsb, 0); - if (!wsb) { - ret = -EPERM; - goto free_chanbuf; - } - subbuffer_id_clear_noref(config, &wsb->id); - shmp_chan = shmp(handle, buf->backend.chan); - if (!shmp_chan) { - ret = -EPERM; - goto free_chanbuf; - } - tsc = config->cb.ring_buffer_clock_read(shmp_chan); - config->cb.buffer_begin(buf, tsc, 0, handle); - cc_hot = shmp_index(handle, buf->commit_hot, 0); - if (!cc_hot) { - ret = -EPERM; - goto free_chanbuf; - } - v_add(config, subbuf_header_size, &cc_hot->cc); - v_add(config, subbuf_header_size, &cc_hot->seq); - - if (config->cb.buffer_create) { - ret = config->cb.buffer_create(buf, priv, cpu, chanb->name, handle); - if (ret) - goto free_chanbuf; - } - - init_crash_abi(config, &buf->crash_abi, buf, chanb, shmobj, handle); - - buf->backend.allocated = 1; - return 0; - - /* Error handling */ -free_init: - /* ts_end will be freed by shm teardown */ -free_commit_cold: - /* commit_cold will be freed by shm teardown */ -free_commit: - /* commit_hot will be freed by shm teardown */ -free_chanbuf: - return ret; -} - -static -void lib_ring_buffer_channel_switch_timer(int sig __attribute__((unused)), - siginfo_t *si, void *uc __attribute__((unused))) -{ - const struct lttng_ust_lib_ring_buffer_config *config; - struct lttng_ust_shm_handle *handle; - struct lttng_ust_lib_ring_buffer_channel *chan; - int cpu; - - assert(CMM_LOAD_SHARED(timer_signal.tid) == pthread_self()); - - chan = si->si_value.sival_ptr; - handle = chan->handle; - config = &chan->backend.config; - - DBG("Switch timer for channel %p\n", chan); - - /* - * Only flush buffers periodically if readers are active. - */ - pthread_mutex_lock(&wakeup_fd_mutex); - if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { - for_each_possible_cpu(cpu) { - struct lttng_ust_lib_ring_buffer *buf = - shmp(handle, chan->backend.buf[cpu].shmp); - - if (!buf) - goto end; - if (uatomic_read(&buf->active_readers)) - lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE, - chan->handle); - } - } else { - struct lttng_ust_lib_ring_buffer *buf = - shmp(handle, chan->backend.buf[0].shmp); - - if (!buf) - goto end; - if (uatomic_read(&buf->active_readers)) - lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE, - chan->handle); - } -end: - pthread_mutex_unlock(&wakeup_fd_mutex); - return; -} - -static -int lib_ring_buffer_poll_deliver(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle) -{ - unsigned long consumed_old, consumed_idx, commit_count, write_offset; - struct commit_counters_cold *cc_cold; - - consumed_old = uatomic_read(&buf->consumed); - consumed_idx = subbuf_index(consumed_old, chan); - cc_cold = shmp_index(handle, buf->commit_cold, consumed_idx); - if (!cc_cold) - return 0; - commit_count = v_read(config, &cc_cold->cc_sb); - /* - * No memory barrier here, since we are only interested - * in a statistically correct polling result. The next poll will - * get the data is we are racing. The mb() that ensures correct - * memory order is in get_subbuf. - */ - write_offset = v_read(config, &buf->offset); - - /* - * Check that the subbuffer we are trying to consume has been - * already fully committed. - */ - - if (((commit_count - chan->backend.subbuf_size) - & chan->commit_count_mask) - - (buf_trunc(consumed_old, chan) - >> chan->backend.num_subbuf_order) - != 0) - return 0; - - /* - * Check that we are not about to read the same subbuffer in - * which the writer head is. - */ - if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_old, chan) - == 0) - return 0; - - return 1; -} - -static -void lib_ring_buffer_wakeup(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) -{ - int wakeup_fd = shm_get_wakeup_fd(handle, &buf->self._ref); - sigset_t sigpipe_set, pending_set, old_set; - int ret, sigpipe_was_pending = 0; - - if (wakeup_fd < 0) - return; - - /* - * Wake-up the other end by writing a null byte in the pipe - * (non-blocking). Important note: Because writing into the - * pipe is non-blocking (and therefore we allow dropping wakeup - * data, as long as there is wakeup data present in the pipe - * buffer to wake up the consumer), the consumer should perform - * the following sequence for waiting: - * 1) empty the pipe (reads). - * 2) check if there is data in the buffer. - * 3) wait on the pipe (poll). - * - * Discard the SIGPIPE from write(), not disturbing any SIGPIPE - * that might be already pending. If a bogus SIGPIPE is sent to - * the entire process concurrently by a malicious user, it may - * be simply discarded. - */ - ret = sigemptyset(&pending_set); - assert(!ret); - /* - * sigpending returns the mask of signals that are _both_ - * blocked for the thread _and_ pending for either the thread or - * the entire process. - */ - ret = sigpending(&pending_set); - assert(!ret); - sigpipe_was_pending = sigismember(&pending_set, SIGPIPE); - /* - * If sigpipe was pending, it means it was already blocked, so - * no need to block it. - */ - if (!sigpipe_was_pending) { - ret = sigemptyset(&sigpipe_set); - assert(!ret); - ret = sigaddset(&sigpipe_set, SIGPIPE); - assert(!ret); - ret = pthread_sigmask(SIG_BLOCK, &sigpipe_set, &old_set); - assert(!ret); - } - do { - ret = write(wakeup_fd, "", 1); - } while (ret == -1L && errno == EINTR); - if (ret == -1L && errno == EPIPE && !sigpipe_was_pending) { - struct timespec timeout = { 0, 0 }; - do { - ret = sigtimedwait(&sigpipe_set, NULL, - &timeout); - } while (ret == -1L && errno == EINTR); - } - if (!sigpipe_was_pending) { - ret = pthread_sigmask(SIG_SETMASK, &old_set, NULL); - assert(!ret); - } -} - -static -void lib_ring_buffer_channel_do_read(struct lttng_ust_lib_ring_buffer_channel *chan) -{ - const struct lttng_ust_lib_ring_buffer_config *config; - struct lttng_ust_shm_handle *handle; - int cpu; - - handle = chan->handle; - config = &chan->backend.config; - - /* - * Only flush buffers periodically if readers are active. - */ - pthread_mutex_lock(&wakeup_fd_mutex); - if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { - for_each_possible_cpu(cpu) { - struct lttng_ust_lib_ring_buffer *buf = - shmp(handle, chan->backend.buf[cpu].shmp); - - if (!buf) - goto end; - if (uatomic_read(&buf->active_readers) - && lib_ring_buffer_poll_deliver(config, buf, - chan, handle)) { - lib_ring_buffer_wakeup(buf, handle); - } - } - } else { - struct lttng_ust_lib_ring_buffer *buf = - shmp(handle, chan->backend.buf[0].shmp); - - if (!buf) - goto end; - if (uatomic_read(&buf->active_readers) - && lib_ring_buffer_poll_deliver(config, buf, - chan, handle)) { - lib_ring_buffer_wakeup(buf, handle); - } - } -end: - pthread_mutex_unlock(&wakeup_fd_mutex); -} - -static -void lib_ring_buffer_channel_read_timer(int sig __attribute__((unused)), - siginfo_t *si, void *uc __attribute__((unused))) -{ - struct lttng_ust_lib_ring_buffer_channel *chan; - - assert(CMM_LOAD_SHARED(timer_signal.tid) == pthread_self()); - chan = si->si_value.sival_ptr; - DBG("Read timer for channel %p\n", chan); - lib_ring_buffer_channel_do_read(chan); - return; -} - -static -void rb_setmask(sigset_t *mask) -{ - int ret; - - ret = sigemptyset(mask); - if (ret) { - PERROR("sigemptyset"); - } - ret = sigaddset(mask, LTTNG_UST_RB_SIG_FLUSH); - if (ret) { - PERROR("sigaddset"); - } - ret = sigaddset(mask, LTTNG_UST_RB_SIG_READ); - if (ret) { - PERROR("sigaddset"); - } - ret = sigaddset(mask, LTTNG_UST_RB_SIG_TEARDOWN); - if (ret) { - PERROR("sigaddset"); - } -} - -static -void *sig_thread(void *arg __attribute__((unused))) -{ - sigset_t mask; - siginfo_t info; - int signr; - - /* Only self thread will receive signal mask. */ - rb_setmask(&mask); - CMM_STORE_SHARED(timer_signal.tid, pthread_self()); - - for (;;) { - signr = sigwaitinfo(&mask, &info); - if (signr == -1) { - if (errno != EINTR) - PERROR("sigwaitinfo"); - continue; - } - if (signr == LTTNG_UST_RB_SIG_FLUSH) { - lib_ring_buffer_channel_switch_timer(info.si_signo, - &info, NULL); - } else if (signr == LTTNG_UST_RB_SIG_READ) { - lib_ring_buffer_channel_read_timer(info.si_signo, - &info, NULL); - } else if (signr == LTTNG_UST_RB_SIG_TEARDOWN) { - cmm_smp_mb(); - CMM_STORE_SHARED(timer_signal.qs_done, 1); - cmm_smp_mb(); - } else { - ERR("Unexptected signal %d\n", info.si_signo); - } - } - return NULL; -} - -/* - * Ensure only a single thread listens on the timer signal. - */ -static -void lib_ring_buffer_setup_timer_thread(void) -{ - pthread_t thread; - int ret; - - pthread_mutex_lock(&timer_signal.lock); - if (timer_signal.setup_done) - goto end; - - ret = pthread_create(&thread, NULL, &sig_thread, NULL); - if (ret) { - errno = ret; - PERROR("pthread_create"); - } - ret = pthread_detach(thread); - if (ret) { - errno = ret; - PERROR("pthread_detach"); - } - timer_signal.setup_done = 1; -end: - pthread_mutex_unlock(&timer_signal.lock); -} - -/* - * Wait for signal-handling thread quiescent state. - */ -static -void lib_ring_buffer_wait_signal_thread_qs(unsigned int signr) -{ - sigset_t pending_set; - int ret; - - /* - * We need to be the only thread interacting with the thread - * that manages signals for teardown synchronization. - */ - pthread_mutex_lock(&timer_signal.lock); - - /* - * Ensure we don't have any signal queued for this channel. - */ - for (;;) { - ret = sigemptyset(&pending_set); - if (ret == -1) { - PERROR("sigemptyset"); - } - ret = sigpending(&pending_set); - if (ret == -1) { - PERROR("sigpending"); - } - if (!sigismember(&pending_set, signr)) - break; - caa_cpu_relax(); - } - - /* - * From this point, no new signal handler will be fired that - * would try to access "chan". However, we still need to wait - * for any currently executing handler to complete. - */ - cmm_smp_mb(); - CMM_STORE_SHARED(timer_signal.qs_done, 0); - cmm_smp_mb(); - - /* - * Kill with LTTNG_UST_RB_SIG_TEARDOWN, so signal management - * thread wakes up. - */ - kill(getpid(), LTTNG_UST_RB_SIG_TEARDOWN); - - while (!CMM_LOAD_SHARED(timer_signal.qs_done)) - caa_cpu_relax(); - cmm_smp_mb(); - - pthread_mutex_unlock(&timer_signal.lock); -} - -static -void lib_ring_buffer_channel_switch_timer_start(struct lttng_ust_lib_ring_buffer_channel *chan) -{ - struct sigevent sev; - struct itimerspec its; - int ret; - - if (!chan->switch_timer_interval || chan->switch_timer_enabled) - return; - - chan->switch_timer_enabled = 1; - - lib_ring_buffer_setup_timer_thread(); - - memset(&sev, 0, sizeof(sev)); - sev.sigev_notify = SIGEV_SIGNAL; - sev.sigev_signo = LTTNG_UST_RB_SIG_FLUSH; - sev.sigev_value.sival_ptr = chan; - ret = timer_create(CLOCKID, &sev, &chan->switch_timer); - if (ret == -1) { - PERROR("timer_create"); - } - - its.it_value.tv_sec = chan->switch_timer_interval / 1000000; - its.it_value.tv_nsec = (chan->switch_timer_interval % 1000000) * 1000; - its.it_interval.tv_sec = its.it_value.tv_sec; - its.it_interval.tv_nsec = its.it_value.tv_nsec; - - ret = timer_settime(chan->switch_timer, 0, &its, NULL); - if (ret == -1) { - PERROR("timer_settime"); - } -} - -static -void lib_ring_buffer_channel_switch_timer_stop(struct lttng_ust_lib_ring_buffer_channel *chan) -{ - int ret; - - if (!chan->switch_timer_interval || !chan->switch_timer_enabled) - return; - - ret = timer_delete(chan->switch_timer); - if (ret == -1) { - PERROR("timer_delete"); - } - - lib_ring_buffer_wait_signal_thread_qs(LTTNG_UST_RB_SIG_FLUSH); - - chan->switch_timer = 0; - chan->switch_timer_enabled = 0; -} - -static -void lib_ring_buffer_channel_read_timer_start(struct lttng_ust_lib_ring_buffer_channel *chan) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - struct sigevent sev; - struct itimerspec its; - int ret; - - if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER - || !chan->read_timer_interval || chan->read_timer_enabled) - return; - - chan->read_timer_enabled = 1; - - lib_ring_buffer_setup_timer_thread(); - - sev.sigev_notify = SIGEV_SIGNAL; - sev.sigev_signo = LTTNG_UST_RB_SIG_READ; - sev.sigev_value.sival_ptr = chan; - ret = timer_create(CLOCKID, &sev, &chan->read_timer); - if (ret == -1) { - PERROR("timer_create"); - } - - its.it_value.tv_sec = chan->read_timer_interval / 1000000; - its.it_value.tv_nsec = (chan->read_timer_interval % 1000000) * 1000; - its.it_interval.tv_sec = its.it_value.tv_sec; - its.it_interval.tv_nsec = its.it_value.tv_nsec; - - ret = timer_settime(chan->read_timer, 0, &its, NULL); - if (ret == -1) { - PERROR("timer_settime"); - } -} - -static -void lib_ring_buffer_channel_read_timer_stop(struct lttng_ust_lib_ring_buffer_channel *chan) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - int ret; - - if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER - || !chan->read_timer_interval || !chan->read_timer_enabled) - return; - - ret = timer_delete(chan->read_timer); - if (ret == -1) { - PERROR("timer_delete"); - } - - /* - * do one more check to catch data that has been written in the last - * timer period. - */ - lib_ring_buffer_channel_do_read(chan); - - lib_ring_buffer_wait_signal_thread_qs(LTTNG_UST_RB_SIG_READ); - - chan->read_timer = 0; - chan->read_timer_enabled = 0; -} - -static void channel_unregister_notifiers(struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle __attribute__((unused))) -{ - lib_ring_buffer_channel_switch_timer_stop(chan); - lib_ring_buffer_channel_read_timer_stop(chan); -} - -static void channel_print_errors(struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle) -{ - const struct lttng_ust_lib_ring_buffer_config *config = - &chan->backend.config; - int cpu; - - if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { - for_each_possible_cpu(cpu) { - struct lttng_ust_lib_ring_buffer *buf = - shmp(handle, chan->backend.buf[cpu].shmp); - if (buf) - lib_ring_buffer_print_errors(chan, buf, cpu, handle); - } - } else { - struct lttng_ust_lib_ring_buffer *buf = - shmp(handle, chan->backend.buf[0].shmp); - - if (buf) - lib_ring_buffer_print_errors(chan, buf, -1, handle); - } -} - -static void channel_free(struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle, - int consumer) -{ - channel_backend_free(&chan->backend, handle); - /* chan is freed by shm teardown */ - shm_object_table_destroy(handle->table, consumer); - free(handle); -} - -/** - * channel_create - Create channel. - * @config: ring buffer instance configuration - * @name: name of the channel - * @priv_data_align: alignment, in bytes, of the private data area. (config) - * @priv_data_size: length, in bytes, of the private data area. (config) - * @priv_data_init: initialization data for private data. (config) - * @priv: local private data (memory owner by caller) - * @buf_addr: pointer the the beginning of the preallocated buffer contiguous - * address mapping. It is used only by RING_BUFFER_STATIC - * configuration. It can be set to NULL for other backends. - * @subbuf_size: subbuffer size - * @num_subbuf: number of subbuffers - * @switch_timer_interval: Time interval (in us) to fill sub-buffers with - * padding to let readers get those sub-buffers. - * Used for live streaming. - * @read_timer_interval: Time interval (in us) to wake up pending readers. - * @stream_fds: array of stream file descriptors. - * @nr_stream_fds: number of file descriptors in array. - * - * Holds cpu hotplug. - * Returns NULL on failure. - */ -struct lttng_ust_shm_handle *channel_create(const struct lttng_ust_lib_ring_buffer_config *config, - const char *name, - size_t priv_data_align, - size_t priv_data_size, - void *priv_data_init, - void *priv, - void *buf_addr __attribute__((unused)), size_t subbuf_size, - size_t num_subbuf, unsigned int switch_timer_interval, - unsigned int read_timer_interval, - const int *stream_fds, int nr_stream_fds, - int64_t blocking_timeout) -{ - int ret; - size_t shmsize, chansize; - struct lttng_ust_lib_ring_buffer_channel *chan; - struct lttng_ust_shm_handle *handle; - struct shm_object *shmobj; - unsigned int nr_streams; - int64_t blocking_timeout_ms; - - if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) - nr_streams = num_possible_cpus(); - else - nr_streams = 1; - - if (nr_stream_fds != nr_streams) - return NULL; - - if (blocking_timeout < -1) { - return NULL; - } - /* usec to msec */ - if (blocking_timeout == -1) { - blocking_timeout_ms = -1; - } else { - blocking_timeout_ms = blocking_timeout / 1000; - if (blocking_timeout_ms != (int32_t) blocking_timeout_ms) { - return NULL; - } - } - - if (lib_ring_buffer_check_config(config, switch_timer_interval, - read_timer_interval)) - return NULL; - - handle = zmalloc(sizeof(struct lttng_ust_shm_handle)); - if (!handle) - return NULL; - - /* Allocate table for channel + per-cpu buffers */ - handle->table = shm_object_table_create(1 + num_possible_cpus()); - if (!handle->table) - goto error_table_alloc; - - /* Calculate the shm allocation layout */ - shmsize = sizeof(struct lttng_ust_lib_ring_buffer_channel); - shmsize += lttng_ust_offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_shmp)); - shmsize += sizeof(struct lttng_ust_lib_ring_buffer_shmp) * nr_streams; - chansize = shmsize; - if (priv_data_align) - shmsize += lttng_ust_offset_align(shmsize, priv_data_align); - shmsize += priv_data_size; - - /* Allocate normal memory for channel (not shared) */ - shmobj = shm_object_table_alloc(handle->table, shmsize, SHM_OBJECT_MEM, - -1, -1); - if (!shmobj) - goto error_append; - /* struct lttng_ust_lib_ring_buffer_channel is at object 0, offset 0 (hardcoded) */ - set_shmp(handle->chan, zalloc_shm(shmobj, chansize)); - assert(handle->chan._ref.index == 0); - assert(handle->chan._ref.offset == 0); - chan = shmp(handle, handle->chan); - if (!chan) - goto error_append; - chan->nr_streams = nr_streams; - - /* space for private data */ - if (priv_data_size) { - void *priv_config; - - DECLARE_SHMP(void, priv_data_alloc); - - align_shm(shmobj, priv_data_align); - chan->priv_data_offset = shmobj->allocated_len; - set_shmp(priv_data_alloc, zalloc_shm(shmobj, priv_data_size)); - if (!shmp(handle, priv_data_alloc)) - goto error_append; - priv_config = channel_get_private_config(chan); - memcpy(priv_config, priv_data_init, priv_data_size); - } else { - chan->priv_data_offset = -1; - } - - chan->u.s.blocking_timeout_ms = (int32_t) blocking_timeout_ms; - - channel_set_private(chan, priv); - - ret = channel_backend_init(&chan->backend, name, config, - subbuf_size, num_subbuf, handle, - stream_fds); - if (ret) - goto error_backend_init; - - chan->handle = handle; - chan->commit_count_mask = (~0UL >> chan->backend.num_subbuf_order); - - chan->switch_timer_interval = switch_timer_interval; - chan->read_timer_interval = read_timer_interval; - lib_ring_buffer_channel_switch_timer_start(chan); - lib_ring_buffer_channel_read_timer_start(chan); - - return handle; - -error_backend_init: -error_append: - shm_object_table_destroy(handle->table, 1); -error_table_alloc: - free(handle); - return NULL; -} - -struct lttng_ust_shm_handle *channel_handle_create(void *data, - uint64_t memory_map_size, - int wakeup_fd) -{ - struct lttng_ust_shm_handle *handle; - struct shm_object *object; - - handle = zmalloc(sizeof(struct lttng_ust_shm_handle)); - if (!handle) - return NULL; - - /* Allocate table for channel + per-cpu buffers */ - handle->table = shm_object_table_create(1 + num_possible_cpus()); - if (!handle->table) - goto error_table_alloc; - /* Add channel object */ - object = shm_object_table_append_mem(handle->table, data, - memory_map_size, wakeup_fd); - if (!object) - goto error_table_object; - /* struct lttng_ust_lib_ring_buffer_channel is at object 0, offset 0 (hardcoded) */ - handle->chan._ref.index = 0; - handle->chan._ref.offset = 0; - return handle; - -error_table_object: - shm_object_table_destroy(handle->table, 0); -error_table_alloc: - free(handle); - return NULL; -} - -int channel_handle_add_stream(struct lttng_ust_shm_handle *handle, - int shm_fd, int wakeup_fd, uint32_t stream_nr, - uint64_t memory_map_size) -{ - struct shm_object *object; - - /* Add stream object */ - object = shm_object_table_append_shm(handle->table, - shm_fd, wakeup_fd, stream_nr, - memory_map_size); - if (!object) - return -EINVAL; - return 0; -} - -unsigned int channel_handle_get_nr_streams(struct lttng_ust_shm_handle *handle) -{ - assert(handle->table); - return handle->table->allocated_len - 1; -} - -static -void channel_release(struct lttng_ust_lib_ring_buffer_channel *chan, struct lttng_ust_shm_handle *handle, - int consumer) -{ - channel_free(chan, handle, consumer); -} - -/** - * channel_destroy - Finalize, wait for q.s. and destroy channel. - * @chan: channel to destroy - * - * Holds cpu hotplug. - * Call "destroy" callback, finalize channels, decrement the channel - * reference count. Note that when readers have completed data - * consumption of finalized channels, get_subbuf() will return -ENODATA. - * They should release their handle at that point. - */ -void channel_destroy(struct lttng_ust_lib_ring_buffer_channel *chan, struct lttng_ust_shm_handle *handle, - int consumer) -{ - if (consumer) { - /* - * Note: the consumer takes care of finalizing and - * switching the buffers. - */ - channel_unregister_notifiers(chan, handle); - /* - * The consumer prints errors. - */ - channel_print_errors(chan, handle); - } - - /* - * sessiond/consumer are keeping a reference on the shm file - * descriptor directly. No need to refcount. - */ - channel_release(chan, handle, consumer); - return; -} - -struct lttng_ust_lib_ring_buffer *channel_get_ring_buffer( - const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_channel *chan, int cpu, - struct lttng_ust_shm_handle *handle, - int *shm_fd, int *wait_fd, - int *wakeup_fd, - uint64_t *memory_map_size) -{ - struct shm_ref *ref; - - if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) { - cpu = 0; - } else { - if (cpu >= num_possible_cpus()) - return NULL; - } - ref = &chan->backend.buf[cpu].shmp._ref; - *shm_fd = shm_get_shm_fd(handle, ref); - *wait_fd = shm_get_wait_fd(handle, ref); - *wakeup_fd = shm_get_wakeup_fd(handle, ref); - if (shm_get_shm_size(handle, ref, memory_map_size)) - return NULL; - return shmp(handle, chan->backend.buf[cpu].shmp); -} - -int ring_buffer_channel_close_wait_fd( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer_channel *chan __attribute__((unused)), - struct lttng_ust_shm_handle *handle) -{ - struct shm_ref *ref; - - ref = &handle->chan._ref; - return shm_close_wait_fd(handle, ref); -} - -int ring_buffer_channel_close_wakeup_fd( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer_channel *chan __attribute__((unused)), - struct lttng_ust_shm_handle *handle) -{ - struct shm_ref *ref; - - ref = &handle->chan._ref; - return shm_close_wakeup_fd(handle, ref); -} - -int ring_buffer_stream_close_wait_fd(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle, - int cpu) -{ - struct shm_ref *ref; - - if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) { - cpu = 0; - } else { - if (cpu >= num_possible_cpus()) - return -EINVAL; - } - ref = &chan->backend.buf[cpu].shmp._ref; - return shm_close_wait_fd(handle, ref); -} - -int ring_buffer_stream_close_wakeup_fd(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_shm_handle *handle, - int cpu) -{ - struct shm_ref *ref; - int ret; - - if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) { - cpu = 0; - } else { - if (cpu >= num_possible_cpus()) - return -EINVAL; - } - ref = &chan->backend.buf[cpu].shmp._ref; - pthread_mutex_lock(&wakeup_fd_mutex); - ret = shm_close_wakeup_fd(handle, ref); - pthread_mutex_unlock(&wakeup_fd_mutex); - return ret; -} - -int lib_ring_buffer_open_read(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle __attribute__((unused))) -{ - if (uatomic_cmpxchg(&buf->active_readers, 0, 1) != 0) - return -EBUSY; - cmm_smp_mb(); - return 0; -} - -void lib_ring_buffer_release_read(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_channel *chan = shmp(handle, buf->backend.chan); - - if (!chan) - return; - CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1); - cmm_smp_mb(); - uatomic_dec(&buf->active_readers); -} - -/** - * lib_ring_buffer_snapshot - save subbuffer position snapshot (for read) - * @buf: ring buffer - * @consumed: consumed count indicating the position where to read - * @produced: produced count, indicates position when to stop reading - * - * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no - * data to read at consumed position, or 0 if the get operation succeeds. - */ - -int lib_ring_buffer_snapshot(struct lttng_ust_lib_ring_buffer *buf, - unsigned long *consumed, unsigned long *produced, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_channel *chan; - const struct lttng_ust_lib_ring_buffer_config *config; - unsigned long consumed_cur, write_offset; - int finalized; - - chan = shmp(handle, buf->backend.chan); - if (!chan) - return -EPERM; - config = &chan->backend.config; - finalized = CMM_ACCESS_ONCE(buf->finalized); - /* - * Read finalized before counters. - */ - cmm_smp_rmb(); - consumed_cur = uatomic_read(&buf->consumed); - /* - * No need to issue a memory barrier between consumed count read and - * write offset read, because consumed count can only change - * concurrently in overwrite mode, and we keep a sequence counter - * identifier derived from the write offset to check we are getting - * the same sub-buffer we are expecting (the sub-buffers are atomically - * "tagged" upon writes, tags are checked upon read). - */ - write_offset = v_read(config, &buf->offset); - - /* - * Check that we are not about to read the same subbuffer in - * which the writer head is. - */ - if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan) - == 0) - goto nodata; - - *consumed = consumed_cur; - *produced = subbuf_trunc(write_offset, chan); - - return 0; - -nodata: - /* - * The memory barriers __wait_event()/wake_up_interruptible() take care - * of "raw_spin_is_locked" memory ordering. - */ - if (finalized) - return -ENODATA; - else - return -EAGAIN; -} - -/** - * Performs the same function as lib_ring_buffer_snapshot(), but the positions - * are saved regardless of whether the consumed and produced positions are - * in the same subbuffer. - * @buf: ring buffer - * @consumed: consumed byte count indicating the last position read - * @produced: produced byte count indicating the last position written - * - * This function is meant to provide information on the exact producer and - * consumer positions without regard for the "snapshot" feature. - */ -int lib_ring_buffer_snapshot_sample_positions( - struct lttng_ust_lib_ring_buffer *buf, - unsigned long *consumed, unsigned long *produced, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_channel *chan; - const struct lttng_ust_lib_ring_buffer_config *config; - - chan = shmp(handle, buf->backend.chan); - if (!chan) - return -EPERM; - config = &chan->backend.config; - cmm_smp_rmb(); - *consumed = uatomic_read(&buf->consumed); - /* - * No need to issue a memory barrier between consumed count read and - * write offset read, because consumed count can only change - * concurrently in overwrite mode, and we keep a sequence counter - * identifier derived from the write offset to check we are getting - * the same sub-buffer we are expecting (the sub-buffers are atomically - * "tagged" upon writes, tags are checked upon read). - */ - *produced = v_read(config, &buf->offset); - return 0; -} - -/** - * lib_ring_buffer_move_consumer - move consumed counter forward - * @buf: ring buffer - * @consumed_new: new consumed count value - */ -void lib_ring_buffer_move_consumer(struct lttng_ust_lib_ring_buffer *buf, - unsigned long consumed_new, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend; - struct lttng_ust_lib_ring_buffer_channel *chan; - unsigned long consumed; - - chan = shmp(handle, bufb->chan); - if (!chan) - return; - CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1); - - /* - * Only push the consumed value forward. - * If the consumed cmpxchg fails, this is because we have been pushed by - * the writer in flight recorder mode. - */ - consumed = uatomic_read(&buf->consumed); - while ((long) consumed - (long) consumed_new < 0) - consumed = uatomic_cmpxchg(&buf->consumed, consumed, - consumed_new); -} - -/** - * lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading - * @buf: ring buffer - * @consumed: consumed count indicating the position where to read - * - * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no - * data to read at consumed position, or 0 if the get operation succeeds. - */ -int lib_ring_buffer_get_subbuf(struct lttng_ust_lib_ring_buffer *buf, - unsigned long consumed, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_channel *chan; - const struct lttng_ust_lib_ring_buffer_config *config; - unsigned long consumed_cur, consumed_idx, commit_count, write_offset; - int ret, finalized, nr_retry = LTTNG_UST_RING_BUFFER_GET_RETRY; - struct commit_counters_cold *cc_cold; - - chan = shmp(handle, buf->backend.chan); - if (!chan) - return -EPERM; - config = &chan->backend.config; -retry: - finalized = CMM_ACCESS_ONCE(buf->finalized); - /* - * Read finalized before counters. - */ - cmm_smp_rmb(); - consumed_cur = uatomic_read(&buf->consumed); - consumed_idx = subbuf_index(consumed, chan); - cc_cold = shmp_index(handle, buf->commit_cold, consumed_idx); - if (!cc_cold) - return -EPERM; - commit_count = v_read(config, &cc_cold->cc_sb); - /* - * Make sure we read the commit count before reading the buffer - * data and the write offset. Correct consumed offset ordering - * wrt commit count is insured by the use of cmpxchg to update - * the consumed offset. - */ - /* - * Local rmb to match the remote wmb to read the commit count - * before the buffer data and the write offset. - */ - cmm_smp_rmb(); - - write_offset = v_read(config, &buf->offset); - - /* - * Check that the buffer we are getting is after or at consumed_cur - * position. - */ - if ((long) subbuf_trunc(consumed, chan) - - (long) subbuf_trunc(consumed_cur, chan) < 0) - goto nodata; - - /* - * Check that the subbuffer we are trying to consume has been - * already fully committed. There are a few causes that can make - * this unavailability situation occur: - * - * Temporary (short-term) situation: - * - Application is running on a different CPU, between reserve - * and commit ring buffer operations, - * - Application is preempted between reserve and commit ring - * buffer operations, - * - * Long-term situation: - * - Application is stopped (SIGSTOP) between reserve and commit - * ring buffer operations. Could eventually be resumed by - * SIGCONT. - * - Application is killed (SIGTERM, SIGINT, SIGKILL) between - * reserve and commit ring buffer operation. - * - * From a consumer perspective, handling short-term - * unavailability situations is performed by retrying a few - * times after a delay. Handling long-term unavailability - * situations is handled by failing to get the sub-buffer. - * - * In all of those situations, if the application is taking a - * long time to perform its commit after ring buffer space - * reservation, we can end up in a situation where the producer - * will fill the ring buffer and try to write into the same - * sub-buffer again (which has a missing commit). This is - * handled by the producer in the sub-buffer switch handling - * code of the reserve routine by detecting unbalanced - * reserve/commit counters and discarding all further events - * until the situation is resolved in those situations. Two - * scenarios can occur: - * - * 1) The application causing the reserve/commit counters to be - * unbalanced has been terminated. In this situation, all - * further events will be discarded in the buffers, and no - * further buffer data will be readable by the consumer - * daemon. Tearing down the UST tracing session and starting - * anew is a work-around for those situations. Note that this - * only affects per-UID tracing. In per-PID tracing, the - * application vanishes with the termination, and therefore - * no more data needs to be written to the buffers. - * 2) The application causing the unbalance has been delayed for - * a long time, but will eventually try to increment the - * commit counter after eventually writing to the sub-buffer. - * This situation can cause events to be discarded until the - * application resumes its operations. - */ - if (((commit_count - chan->backend.subbuf_size) - & chan->commit_count_mask) - - (buf_trunc(consumed, chan) - >> chan->backend.num_subbuf_order) - != 0) { - if (nr_retry-- > 0) { - if (nr_retry <= (LTTNG_UST_RING_BUFFER_GET_RETRY >> 1)) - (void) poll(NULL, 0, LTTNG_UST_RING_BUFFER_RETRY_DELAY_MS); - goto retry; - } else { - goto nodata; - } - } - - /* - * Check that we are not about to read the same subbuffer in - * which the writer head is. - */ - if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed, chan) - == 0) - goto nodata; - - /* - * Failure to get the subbuffer causes a busy-loop retry without going - * to a wait queue. These are caused by short-lived race windows where - * the writer is getting access to a subbuffer we were trying to get - * access to. Also checks that the "consumed" buffer count we are - * looking for matches the one contained in the subbuffer id. - * - * The short-lived race window described here can be affected by - * application signals and preemption, thus requiring to bound - * the loop to a maximum number of retry. - */ - ret = update_read_sb_index(config, &buf->backend, &chan->backend, - consumed_idx, buf_trunc_val(consumed, chan), - handle); - if (ret) { - if (nr_retry-- > 0) { - if (nr_retry <= (LTTNG_UST_RING_BUFFER_GET_RETRY >> 1)) - (void) poll(NULL, 0, LTTNG_UST_RING_BUFFER_RETRY_DELAY_MS); - goto retry; - } else { - goto nodata; - } - } - subbuffer_id_clear_noref(config, &buf->backend.buf_rsb.id); - - buf->get_subbuf_consumed = consumed; - buf->get_subbuf = 1; - - return 0; - -nodata: - /* - * The memory barriers __wait_event()/wake_up_interruptible() take care - * of "raw_spin_is_locked" memory ordering. - */ - if (finalized) - return -ENODATA; - else - return -EAGAIN; -} - -/** - * lib_ring_buffer_put_subbuf - release exclusive subbuffer access - * @buf: ring buffer - */ -void lib_ring_buffer_put_subbuf(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend; - struct lttng_ust_lib_ring_buffer_channel *chan; - const struct lttng_ust_lib_ring_buffer_config *config; - unsigned long sb_bindex, consumed_idx, consumed; - struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; - struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; - - chan = shmp(handle, bufb->chan); - if (!chan) - return; - config = &chan->backend.config; - CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1); - - if (!buf->get_subbuf) { - /* - * Reader puts a subbuffer it did not get. - */ - CHAN_WARN_ON(chan, 1); - return; - } - consumed = buf->get_subbuf_consumed; - buf->get_subbuf = 0; - - /* - * Clear the records_unread counter. (overruns counter) - * Can still be non-zero if a file reader simply grabbed the data - * without using iterators. - * Can be below zero if an iterator is used on a snapshot more than - * once. - */ - sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id); - rpages = shmp_index(handle, bufb->array, sb_bindex); - if (!rpages) - return; - backend_pages = shmp(handle, rpages->shmp); - if (!backend_pages) - return; - v_add(config, v_read(config, &backend_pages->records_unread), - &bufb->records_read); - v_set(config, &backend_pages->records_unread, 0); - CHAN_WARN_ON(chan, config->mode == RING_BUFFER_OVERWRITE - && subbuffer_id_is_noref(config, bufb->buf_rsb.id)); - subbuffer_id_set_noref(config, &bufb->buf_rsb.id); - - /* - * Exchange the reader subbuffer with the one we put in its place in the - * writer subbuffer table. Expect the original consumed count. If - * update_read_sb_index fails, this is because the writer updated the - * subbuffer concurrently. We should therefore keep the subbuffer we - * currently have: it has become invalid to try reading this sub-buffer - * consumed count value anyway. - */ - consumed_idx = subbuf_index(consumed, chan); - update_read_sb_index(config, &buf->backend, &chan->backend, - consumed_idx, buf_trunc_val(consumed, chan), - handle); - /* - * update_read_sb_index return value ignored. Don't exchange sub-buffer - * if the writer concurrently updated it. - */ -} - -/* - * cons_offset is an iterator on all subbuffer offsets between the reader - * position and the writer position. (inclusive) - */ -static -void lib_ring_buffer_print_subbuffer_errors(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - unsigned long cons_offset, - int cpu, - struct lttng_ust_shm_handle *handle) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - unsigned long cons_idx, commit_count, commit_count_sb; - struct commit_counters_hot *cc_hot; - struct commit_counters_cold *cc_cold; - - cons_idx = subbuf_index(cons_offset, chan); - cc_hot = shmp_index(handle, buf->commit_hot, cons_idx); - if (!cc_hot) - return; - cc_cold = shmp_index(handle, buf->commit_cold, cons_idx); - if (!cc_cold) - return; - commit_count = v_read(config, &cc_hot->cc); - commit_count_sb = v_read(config, &cc_cold->cc_sb); - - if (subbuf_offset(commit_count, chan) != 0) - DBG("ring buffer %s, cpu %d: " - "commit count in subbuffer %lu,\n" - "expecting multiples of %lu bytes\n" - " [ %lu bytes committed, %lu bytes reader-visible ]\n", - chan->backend.name, cpu, cons_idx, - chan->backend.subbuf_size, - commit_count, commit_count_sb); - - DBG("ring buffer: %s, cpu %d: %lu bytes committed\n", - chan->backend.name, cpu, commit_count); -} - -static -void lib_ring_buffer_print_buffer_errors(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - int cpu, struct lttng_ust_shm_handle *handle) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - unsigned long write_offset, cons_offset; - - /* - * No need to order commit_count, write_offset and cons_offset reads - * because we execute at teardown when no more writer nor reader - * references are left. - */ - write_offset = v_read(config, &buf->offset); - cons_offset = uatomic_read(&buf->consumed); - if (write_offset != cons_offset) - DBG("ring buffer %s, cpu %d: " - "non-consumed data\n" - " [ %lu bytes written, %lu bytes read ]\n", - chan->backend.name, cpu, write_offset, cons_offset); - - for (cons_offset = uatomic_read(&buf->consumed); - (long) (subbuf_trunc((unsigned long) v_read(config, &buf->offset), - chan) - - cons_offset) > 0; - cons_offset = subbuf_align(cons_offset, chan)) - lib_ring_buffer_print_subbuffer_errors(buf, chan, cons_offset, - cpu, handle); -} - -static -void lib_ring_buffer_print_errors(struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_lib_ring_buffer *buf, int cpu, - struct lttng_ust_shm_handle *handle) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - - if (!strcmp(chan->backend.name, "relay-metadata-mmap")) { - DBG("ring buffer %s: %lu records written, " - "%lu records overrun\n", - chan->backend.name, - v_read(config, &buf->records_count), - v_read(config, &buf->records_overrun)); - } else { - DBG("ring buffer %s, cpu %d: %lu records written, " - "%lu records overrun\n", - chan->backend.name, cpu, - v_read(config, &buf->records_count), - v_read(config, &buf->records_overrun)); - - if (v_read(config, &buf->records_lost_full) - || v_read(config, &buf->records_lost_wrap) - || v_read(config, &buf->records_lost_big)) - DBG("ring buffer %s, cpu %d: records were lost. Caused by:\n" - " [ %lu buffer full, %lu nest buffer wrap-around, " - "%lu event too big ]\n", - chan->backend.name, cpu, - v_read(config, &buf->records_lost_full), - v_read(config, &buf->records_lost_wrap), - v_read(config, &buf->records_lost_big)); - } - lib_ring_buffer_print_buffer_errors(buf, chan, cpu, handle); -} - -/* - * lib_ring_buffer_switch_old_start: Populate old subbuffer header. - * - * Only executed by SWITCH_FLUSH, which can be issued while tracing is - * active or at buffer finalization (destroy). - */ -static -void lib_ring_buffer_switch_old_start(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct switch_offsets *offsets, - uint64_t tsc, - struct lttng_ust_shm_handle *handle) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - unsigned long oldidx = subbuf_index(offsets->old, chan); - unsigned long commit_count; - struct commit_counters_hot *cc_hot; - - config->cb.buffer_begin(buf, tsc, oldidx, handle); - - /* - * Order all writes to buffer before the commit count update that will - * determine that the subbuffer is full. - */ - cmm_smp_wmb(); - cc_hot = shmp_index(handle, buf->commit_hot, oldidx); - if (!cc_hot) - return; - v_add(config, config->cb.subbuffer_header_size(), - &cc_hot->cc); - commit_count = v_read(config, &cc_hot->cc); - /* Check if the written buffer has to be delivered */ - lib_ring_buffer_check_deliver(config, buf, chan, offsets->old, - commit_count, oldidx, handle, tsc); - lib_ring_buffer_write_commit_counter(config, buf, chan, - offsets->old + config->cb.subbuffer_header_size(), - commit_count, handle, cc_hot); -} - -/* - * lib_ring_buffer_switch_old_end: switch old subbuffer - * - * Note : offset_old should never be 0 here. It is ok, because we never perform - * buffer switch on an empty subbuffer in SWITCH_ACTIVE mode. The caller - * increments the offset_old value when doing a SWITCH_FLUSH on an empty - * subbuffer. - */ -static -void lib_ring_buffer_switch_old_end(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct switch_offsets *offsets, - uint64_t tsc, - struct lttng_ust_shm_handle *handle) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - unsigned long oldidx = subbuf_index(offsets->old - 1, chan); - unsigned long commit_count, padding_size, data_size; - struct commit_counters_hot *cc_hot; - uint64_t *ts_end; - - data_size = subbuf_offset(offsets->old - 1, chan) + 1; - padding_size = chan->backend.subbuf_size - data_size; - subbuffer_set_data_size(config, &buf->backend, oldidx, data_size, - handle); - - ts_end = shmp_index(handle, buf->ts_end, oldidx); - if (!ts_end) - return; - /* - * This is the last space reservation in that sub-buffer before - * it gets delivered. This provides exclusive access to write to - * this sub-buffer's ts_end. There are also no concurrent - * readers of that ts_end because delivery of that sub-buffer is - * postponed until the commit counter is incremented for the - * current space reservation. - */ - *ts_end = tsc; - - /* - * Order all writes to buffer and store to ts_end before the commit - * count update that will determine that the subbuffer is full. - */ - cmm_smp_wmb(); - cc_hot = shmp_index(handle, buf->commit_hot, oldidx); - if (!cc_hot) - return; - v_add(config, padding_size, &cc_hot->cc); - commit_count = v_read(config, &cc_hot->cc); - lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1, - commit_count, oldidx, handle, tsc); - lib_ring_buffer_write_commit_counter(config, buf, chan, - offsets->old + padding_size, commit_count, handle, - cc_hot); -} - -/* - * lib_ring_buffer_switch_new_start: Populate new subbuffer. - * - * This code can be executed unordered : writers may already have written to the - * sub-buffer before this code gets executed, caution. The commit makes sure - * that this code is executed before the deliver of this sub-buffer. - */ -static -void lib_ring_buffer_switch_new_start(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct switch_offsets *offsets, - uint64_t tsc, - struct lttng_ust_shm_handle *handle) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - unsigned long beginidx = subbuf_index(offsets->begin, chan); - unsigned long commit_count; - struct commit_counters_hot *cc_hot; - - config->cb.buffer_begin(buf, tsc, beginidx, handle); - - /* - * Order all writes to buffer before the commit count update that will - * determine that the subbuffer is full. - */ - cmm_smp_wmb(); - cc_hot = shmp_index(handle, buf->commit_hot, beginidx); - if (!cc_hot) - return; - v_add(config, config->cb.subbuffer_header_size(), &cc_hot->cc); - commit_count = v_read(config, &cc_hot->cc); - /* Check if the written buffer has to be delivered */ - lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin, - commit_count, beginidx, handle, tsc); - lib_ring_buffer_write_commit_counter(config, buf, chan, - offsets->begin + config->cb.subbuffer_header_size(), - commit_count, handle, cc_hot); -} - -/* - * lib_ring_buffer_switch_new_end: finish switching current subbuffer - * - * Calls subbuffer_set_data_size() to set the data size of the current - * sub-buffer. We do not need to perform check_deliver nor commit here, - * since this task will be done by the "commit" of the event for which - * we are currently doing the space reservation. - */ -static -void lib_ring_buffer_switch_new_end(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct switch_offsets *offsets, - uint64_t tsc, - struct lttng_ust_shm_handle *handle) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - unsigned long endidx, data_size; - uint64_t *ts_end; - - endidx = subbuf_index(offsets->end - 1, chan); - data_size = subbuf_offset(offsets->end - 1, chan) + 1; - subbuffer_set_data_size(config, &buf->backend, endidx, data_size, - handle); - ts_end = shmp_index(handle, buf->ts_end, endidx); - if (!ts_end) - return; - /* - * This is the last space reservation in that sub-buffer before - * it gets delivered. This provides exclusive access to write to - * this sub-buffer's ts_end. There are also no concurrent - * readers of that ts_end because delivery of that sub-buffer is - * postponed until the commit counter is incremented for the - * current space reservation. - */ - *ts_end = tsc; -} - -/* - * Returns : - * 0 if ok - * !0 if execution must be aborted. - */ -static -int lib_ring_buffer_try_switch_slow(enum switch_mode mode, - struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct switch_offsets *offsets, - uint64_t *tsc, - struct lttng_ust_shm_handle *handle) -{ - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - unsigned long off, reserve_commit_diff; - - offsets->begin = v_read(config, &buf->offset); - offsets->old = offsets->begin; - offsets->switch_old_start = 0; - off = subbuf_offset(offsets->begin, chan); - - *tsc = config->cb.ring_buffer_clock_read(chan); - - /* - * Ensure we flush the header of an empty subbuffer when doing the - * finalize (SWITCH_FLUSH). This ensures that we end up knowing the - * total data gathering duration even if there were no records saved - * after the last buffer switch. - * In SWITCH_ACTIVE mode, switch the buffer when it contains events. - * SWITCH_ACTIVE only flushes the current subbuffer, dealing with end of - * subbuffer header as appropriate. - * The next record that reserves space will be responsible for - * populating the following subbuffer header. We choose not to populate - * the next subbuffer header here because we want to be able to use - * SWITCH_ACTIVE for periodical buffer flush, which must - * guarantee that all the buffer content (records and header - * timestamps) are visible to the reader. This is required for - * quiescence guarantees for the fusion merge. - */ - if (mode != SWITCH_FLUSH && !off) - return -1; /* we do not have to switch : buffer is empty */ - - if (caa_unlikely(off == 0)) { - unsigned long sb_index, commit_count; - struct commit_counters_cold *cc_cold; - - /* - * We are performing a SWITCH_FLUSH. There may be concurrent - * writes into the buffer if e.g. invoked while performing a - * snapshot on an active trace. - * - * If the client does not save any header information - * (sub-buffer header size == 0), don't switch empty subbuffer - * on finalize, because it is invalid to deliver a completely - * empty subbuffer. - */ - if (!config->cb.subbuffer_header_size()) - return -1; - - /* Test new buffer integrity */ - sb_index = subbuf_index(offsets->begin, chan); - cc_cold = shmp_index(handle, buf->commit_cold, sb_index); - if (!cc_cold) - return -1; - commit_count = v_read(config, &cc_cold->cc_sb); - reserve_commit_diff = - (buf_trunc(offsets->begin, chan) - >> chan->backend.num_subbuf_order) - - (commit_count & chan->commit_count_mask); - if (caa_likely(reserve_commit_diff == 0)) { - /* Next subbuffer not being written to. */ - if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE && - subbuf_trunc(offsets->begin, chan) - - subbuf_trunc((unsigned long) - uatomic_read(&buf->consumed), chan) - >= chan->backend.buf_size)) { - /* - * We do not overwrite non consumed buffers - * and we are full : don't switch. - */ - return -1; - } else { - /* - * Next subbuffer not being written to, and we - * are either in overwrite mode or the buffer is - * not full. It's safe to write in this new - * subbuffer. - */ - } - } else { - /* - * Next subbuffer reserve offset does not match the - * commit offset. Don't perform switch in - * producer-consumer and overwrite mode. Caused by - * either a writer OOPS or too many nested writes over a - * reserve/commit pair. - */ - return -1; - } - - /* - * Need to write the subbuffer start header on finalize. - */ - offsets->switch_old_start = 1; - } - offsets->begin = subbuf_align(offsets->begin, chan); - /* Note: old points to the next subbuf at offset 0 */ - offsets->end = offsets->begin; - return 0; -} - -/* - * Force a sub-buffer switch. This operation is completely reentrant : can be - * called while tracing is active with absolutely no lock held. - * - * For RING_BUFFER_SYNC_PER_CPU ring buffers, as a v_cmpxchg is used for - * some atomic operations, this function must be called from the CPU - * which owns the buffer for a ACTIVE flush. However, for - * RING_BUFFER_SYNC_GLOBAL ring buffers, this function can be called - * from any CPU. - */ -void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, enum switch_mode mode, - struct lttng_ust_shm_handle *handle) -{ - struct lttng_ust_lib_ring_buffer_channel *chan; - const struct lttng_ust_lib_ring_buffer_config *config; - struct switch_offsets offsets; - unsigned long oldidx; - uint64_t tsc; - - chan = shmp(handle, buf->backend.chan); - if (!chan) - return; - config = &chan->backend.config; - - offsets.size = 0; - - /* - * Perform retryable operations. - */ - do { - if (lib_ring_buffer_try_switch_slow(mode, buf, chan, &offsets, - &tsc, handle)) - return; /* Switch not needed */ - } while (v_cmpxchg(config, &buf->offset, offsets.old, offsets.end) - != offsets.old); - - /* - * Atomically update last_tsc. This update races against concurrent - * atomic updates, but the race will always cause supplementary full TSC - * records, never the opposite (missing a full TSC record when it would - * be needed). - */ - save_last_tsc(config, buf, tsc); - - /* - * Push the reader if necessary - */ - lib_ring_buffer_reserve_push_reader(buf, chan, offsets.old); - - oldidx = subbuf_index(offsets.old, chan); - lib_ring_buffer_clear_noref(config, &buf->backend, oldidx, handle); - - /* - * May need to populate header start on SWITCH_FLUSH. - */ - if (offsets.switch_old_start) { - lib_ring_buffer_switch_old_start(buf, chan, &offsets, tsc, handle); - offsets.old += config->cb.subbuffer_header_size(); - } - - /* - * Switch old subbuffer. - */ - lib_ring_buffer_switch_old_end(buf, chan, &offsets, tsc, handle); -} - -static -bool handle_blocking_retry(int *timeout_left_ms) -{ - int timeout = *timeout_left_ms, delay; - - if (caa_likely(!timeout)) - return false; /* Do not retry, discard event. */ - if (timeout < 0) /* Wait forever. */ - delay = RETRY_DELAY_MS; - else - delay = min_t(int, timeout, RETRY_DELAY_MS); - (void) poll(NULL, 0, delay); - if (timeout > 0) - *timeout_left_ms -= delay; - return true; /* Retry. */ -} - -/* - * Returns : - * 0 if ok - * -ENOSPC if event size is too large for packet. - * -ENOBUFS if there is currently not enough space in buffer for the event. - * -EIO if data cannot be written into the buffer for any other reason. - */ -static -int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct switch_offsets *offsets, - struct lttng_ust_lib_ring_buffer_ctx *ctx, - void *client_ctx) -{ - struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - struct lttng_ust_shm_handle *handle = chan->handle; - unsigned long reserve_commit_diff, offset_cmp; - int timeout_left_ms = lttng_ust_ringbuffer_get_timeout(chan); - -retry: - offsets->begin = offset_cmp = v_read(config, &buf->offset); - offsets->old = offsets->begin; - offsets->switch_new_start = 0; - offsets->switch_new_end = 0; - offsets->switch_old_end = 0; - offsets->pre_header_padding = 0; - - ctx_private->tsc = config->cb.ring_buffer_clock_read(chan); - if ((int64_t) ctx_private->tsc == -EIO) - return -EIO; - - if (last_tsc_overflow(config, buf, ctx_private->tsc)) - ctx_private->rflags |= RING_BUFFER_RFLAG_FULL_TSC; - - if (caa_unlikely(subbuf_offset(offsets->begin, chan) == 0)) { - offsets->switch_new_start = 1; /* For offsets->begin */ - } else { - offsets->size = config->cb.record_header_size(config, chan, - offsets->begin, - &offsets->pre_header_padding, - ctx, client_ctx); - offsets->size += - lttng_ust_lib_ring_buffer_align(offsets->begin + offsets->size, - ctx->largest_align) - + ctx->data_size; - if (caa_unlikely(subbuf_offset(offsets->begin, chan) + - offsets->size > chan->backend.subbuf_size)) { - offsets->switch_old_end = 1; /* For offsets->old */ - offsets->switch_new_start = 1; /* For offsets->begin */ - } - } - if (caa_unlikely(offsets->switch_new_start)) { - unsigned long sb_index, commit_count; - struct commit_counters_cold *cc_cold; - - /* - * We are typically not filling the previous buffer completely. - */ - if (caa_likely(offsets->switch_old_end)) - offsets->begin = subbuf_align(offsets->begin, chan); - offsets->begin = offsets->begin - + config->cb.subbuffer_header_size(); - /* Test new buffer integrity */ - sb_index = subbuf_index(offsets->begin, chan); - /* - * Read buf->offset before buf->commit_cold[sb_index].cc_sb. - * lib_ring_buffer_check_deliver() has the matching - * memory barriers required around commit_cold cc_sb - * updates to ensure reserve and commit counter updates - * are not seen reordered when updated by another CPU. - */ - cmm_smp_rmb(); - cc_cold = shmp_index(handle, buf->commit_cold, sb_index); - if (!cc_cold) - return -1; - commit_count = v_read(config, &cc_cold->cc_sb); - /* Read buf->commit_cold[sb_index].cc_sb before buf->offset. */ - cmm_smp_rmb(); - if (caa_unlikely(offset_cmp != v_read(config, &buf->offset))) { - /* - * The reserve counter have been concurrently updated - * while we read the commit counter. This means the - * commit counter we read might not match buf->offset - * due to concurrent update. We therefore need to retry. - */ - goto retry; - } - reserve_commit_diff = - (buf_trunc(offsets->begin, chan) - >> chan->backend.num_subbuf_order) - - (commit_count & chan->commit_count_mask); - if (caa_likely(reserve_commit_diff == 0)) { - /* Next subbuffer not being written to. */ - if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE && - subbuf_trunc(offsets->begin, chan) - - subbuf_trunc((unsigned long) - uatomic_read(&buf->consumed), chan) - >= chan->backend.buf_size)) { - unsigned long nr_lost; - - if (handle_blocking_retry(&timeout_left_ms)) - goto retry; - - /* - * We do not overwrite non consumed buffers - * and we are full : record is lost. - */ - nr_lost = v_read(config, &buf->records_lost_full); - v_inc(config, &buf->records_lost_full); - if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) { - DBG("%lu or more records lost in (%s:%d) (buffer full)\n", - nr_lost + 1, chan->backend.name, - buf->backend.cpu); - } - return -ENOBUFS; - } else { - /* - * Next subbuffer not being written to, and we - * are either in overwrite mode or the buffer is - * not full. It's safe to write in this new - * subbuffer. - */ - } - } else { - unsigned long nr_lost; - - /* - * Next subbuffer reserve offset does not match the - * commit offset, and this did not involve update to the - * reserve counter. Drop record in producer-consumer and - * overwrite mode. Caused by either a writer OOPS or too - * many nested writes over a reserve/commit pair. - */ - nr_lost = v_read(config, &buf->records_lost_wrap); - v_inc(config, &buf->records_lost_wrap); - if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) { - DBG("%lu or more records lost in (%s:%d) (wrap-around)\n", - nr_lost + 1, chan->backend.name, - buf->backend.cpu); - } - return -EIO; - } - offsets->size = - config->cb.record_header_size(config, chan, - offsets->begin, - &offsets->pre_header_padding, - ctx, client_ctx); - offsets->size += - lttng_ust_lib_ring_buffer_align(offsets->begin + offsets->size, - ctx->largest_align) - + ctx->data_size; - if (caa_unlikely(subbuf_offset(offsets->begin, chan) - + offsets->size > chan->backend.subbuf_size)) { - unsigned long nr_lost; - - /* - * Record too big for subbuffers, report error, don't - * complete the sub-buffer switch. - */ - nr_lost = v_read(config, &buf->records_lost_big); - v_inc(config, &buf->records_lost_big); - if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) { - DBG("%lu or more records lost in (%s:%d) record size " - " of %zu bytes is too large for buffer\n", - nr_lost + 1, chan->backend.name, - buf->backend.cpu, offsets->size); - } - return -ENOSPC; - } else { - /* - * We just made a successful buffer switch and the - * record fits in the new subbuffer. Let's write. - */ - } - } else { - /* - * Record fits in the current buffer and we are not on a switch - * boundary. It's safe to write. - */ - } - offsets->end = offsets->begin + offsets->size; - - if (caa_unlikely(subbuf_offset(offsets->end, chan) == 0)) { - /* - * The offset_end will fall at the very beginning of the next - * subbuffer. - */ - offsets->switch_new_end = 1; /* For offsets->begin */ - } - return 0; -} - -/** - * lib_ring_buffer_reserve_slow - Atomic slot reservation in a buffer. - * @ctx: ring buffer context. - * - * Return : -NOBUFS if not enough space, -ENOSPC if event size too large, - * -EIO for other errors, else returns 0. - * It will take care of sub-buffer switching. - */ -int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx, - void *client_ctx) -{ - struct lttng_ust_lib_ring_buffer_ctx_private *ctx_private = ctx->priv; - struct lttng_ust_lib_ring_buffer_channel *chan = ctx_private->chan; - struct lttng_ust_shm_handle *handle = chan->handle; - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - struct lttng_ust_lib_ring_buffer *buf; - struct switch_offsets offsets; - int ret; - - if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) - buf = shmp(handle, chan->backend.buf[ctx_private->reserve_cpu].shmp); - else - buf = shmp(handle, chan->backend.buf[0].shmp); - if (!buf) - return -EIO; - ctx_private->buf = buf; - - offsets.size = 0; - - do { - ret = lib_ring_buffer_try_reserve_slow(buf, chan, &offsets, - ctx, client_ctx); - if (caa_unlikely(ret)) - return ret; - } while (caa_unlikely(v_cmpxchg(config, &buf->offset, offsets.old, - offsets.end) - != offsets.old)); - - /* - * Atomically update last_tsc. This update races against concurrent - * atomic updates, but the race will always cause supplementary full TSC - * records, never the opposite (missing a full TSC record when it would - * be needed). - */ - save_last_tsc(config, buf, ctx_private->tsc); - - /* - * Push the reader if necessary - */ - lib_ring_buffer_reserve_push_reader(buf, chan, offsets.end - 1); - - /* - * Clear noref flag for this subbuffer. - */ - lib_ring_buffer_clear_noref(config, &buf->backend, - subbuf_index(offsets.end - 1, chan), - handle); - - /* - * Switch old subbuffer if needed. - */ - if (caa_unlikely(offsets.switch_old_end)) { - lib_ring_buffer_clear_noref(config, &buf->backend, - subbuf_index(offsets.old - 1, chan), - handle); - lib_ring_buffer_switch_old_end(buf, chan, &offsets, ctx_private->tsc, handle); - } - - /* - * Populate new subbuffer. - */ - if (caa_unlikely(offsets.switch_new_start)) - lib_ring_buffer_switch_new_start(buf, chan, &offsets, ctx_private->tsc, handle); - - if (caa_unlikely(offsets.switch_new_end)) - lib_ring_buffer_switch_new_end(buf, chan, &offsets, ctx_private->tsc, handle); - - ctx_private->slot_size = offsets.size; - ctx_private->pre_offset = offsets.begin; - ctx_private->buf_offset = offsets.begin + offsets.pre_header_padding; - return 0; -} - -static -void lib_ring_buffer_vmcore_check_deliver(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, - unsigned long commit_count, - unsigned long idx, - struct lttng_ust_shm_handle *handle) -{ - struct commit_counters_hot *cc_hot; - - if (config->oops != RING_BUFFER_OOPS_CONSISTENCY) - return; - cc_hot = shmp_index(handle, buf->commit_hot, idx); - if (!cc_hot) - return; - v_set(config, &cc_hot->seq, commit_count); -} - -/* - * The ring buffer can count events recorded and overwritten per buffer, - * but it is disabled by default due to its performance overhead. - */ -#ifdef LTTNG_RING_BUFFER_COUNT_EVENTS -static -void deliver_count_events(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, - unsigned long idx, - struct lttng_ust_shm_handle *handle) -{ - v_add(config, subbuffer_get_records_count(config, - &buf->backend, idx, handle), - &buf->records_count); - v_add(config, subbuffer_count_records_overrun(config, - &buf->backend, idx, handle), - &buf->records_overrun); -} -#else /* LTTNG_RING_BUFFER_COUNT_EVENTS */ -static -void deliver_count_events( - const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), - struct lttng_ust_lib_ring_buffer *buf __attribute__((unused)), - unsigned long idx __attribute__((unused)), - struct lttng_ust_shm_handle *handle __attribute__((unused))) -{ -} -#endif /* #else LTTNG_RING_BUFFER_COUNT_EVENTS */ - -void lib_ring_buffer_check_deliver_slow(const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer *buf, - struct lttng_ust_lib_ring_buffer_channel *chan, - unsigned long offset, - unsigned long commit_count, - unsigned long idx, - struct lttng_ust_shm_handle *handle, - uint64_t tsc __attribute__((unused))) -{ - unsigned long old_commit_count = commit_count - - chan->backend.subbuf_size; - struct commit_counters_cold *cc_cold; - - /* - * If we succeeded at updating cc_sb below, we are the subbuffer - * writer delivering the subbuffer. Deals with concurrent - * updates of the "cc" value without adding a add_return atomic - * operation to the fast path. - * - * We are doing the delivery in two steps: - * - First, we cmpxchg() cc_sb to the new value - * old_commit_count + 1. This ensures that we are the only - * subbuffer user successfully filling the subbuffer, but we - * do _not_ set the cc_sb value to "commit_count" yet. - * Therefore, other writers that would wrap around the ring - * buffer and try to start writing to our subbuffer would - * have to drop records, because it would appear as - * non-filled. - * We therefore have exclusive access to the subbuffer control - * structures. This mutual exclusion with other writers is - * crucially important to perform record overruns count in - * flight recorder mode locklessly. - * - When we are ready to release the subbuffer (either for - * reading or for overrun by other writers), we simply set the - * cc_sb value to "commit_count" and perform delivery. - * - * The subbuffer size is least 2 bytes (minimum size: 1 page). - * This guarantees that old_commit_count + 1 != commit_count. - */ - - /* - * Order prior updates to reserve count prior to the - * commit_cold cc_sb update. - */ - cmm_smp_wmb(); - cc_cold = shmp_index(handle, buf->commit_cold, idx); - if (!cc_cold) - return; - if (caa_likely(v_cmpxchg(config, &cc_cold->cc_sb, - old_commit_count, old_commit_count + 1) - == old_commit_count)) { - uint64_t *ts_end; - - /* - * Start of exclusive subbuffer access. We are - * guaranteed to be the last writer in this subbuffer - * and any other writer trying to access this subbuffer - * in this state is required to drop records. - * - * We can read the ts_end for the current sub-buffer - * which has been saved by the very last space - * reservation for the current sub-buffer. - * - * Order increment of commit counter before reading ts_end. - */ - cmm_smp_mb(); - ts_end = shmp_index(handle, buf->ts_end, idx); - if (!ts_end) - return; - deliver_count_events(config, buf, idx, handle); - config->cb.buffer_end(buf, *ts_end, idx, - lib_ring_buffer_get_data_size(config, - buf, - idx, - handle), - handle); - - /* - * Increment the packet counter while we have exclusive - * access. - */ - subbuffer_inc_packet_count(config, &buf->backend, idx, handle); - - /* - * Set noref flag and offset for this subbuffer id. - * Contains a memory barrier that ensures counter stores - * are ordered before set noref and offset. - */ - lib_ring_buffer_set_noref_offset(config, &buf->backend, idx, - buf_trunc_val(offset, chan), handle); - - /* - * Order set_noref and record counter updates before the - * end of subbuffer exclusive access. Orders with - * respect to writers coming into the subbuffer after - * wrap around, and also order wrt concurrent readers. - */ - cmm_smp_mb(); - /* End of exclusive subbuffer access */ - v_set(config, &cc_cold->cc_sb, commit_count); - /* - * Order later updates to reserve count after - * the commit cold cc_sb update. - */ - cmm_smp_wmb(); - lib_ring_buffer_vmcore_check_deliver(config, buf, - commit_count, idx, handle); - - /* - * RING_BUFFER_WAKEUP_BY_WRITER wakeup is not lock-free. - */ - if (config->wakeup == RING_BUFFER_WAKEUP_BY_WRITER - && uatomic_read(&buf->active_readers) - && lib_ring_buffer_poll_deliver(config, buf, chan, handle)) { - lib_ring_buffer_wakeup(buf, handle); - } - } -} - -/* - * Force a read (imply TLS fixup for dlopen) of TLS variables. - */ -void lttng_fixup_ringbuffer_tls(void) -{ - asm volatile ("" : : "m" (URCU_TLS(lib_ring_buffer_nesting))); -} - -void lib_ringbuffer_signal_init(void) -{ - sigset_t mask; - int ret; - - /* - * Block signal for entire process, so only our thread processes - * it. - */ - rb_setmask(&mask); - ret = pthread_sigmask(SIG_BLOCK, &mask, NULL); - if (ret) { - errno = ret; - PERROR("pthread_sigmask"); - } -} diff --git a/src/libringbuffer/ringbuffer-config.h b/src/libringbuffer/ringbuffer-config.h deleted file mode 100644 index 2e10de03..00000000 --- a/src/libringbuffer/ringbuffer-config.h +++ /dev/null @@ -1,242 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright (C) 2010-2021 Mathieu Desnoyers - * - * Ring buffer configuration header. Note: after declaring the standard inline - * functions, clients should also include linux/ringbuffer/api.h. - */ - -#ifndef _LTTNG_RING_BUFFER_CONFIG_H -#define _LTTNG_RING_BUFFER_CONFIG_H - -#include -#include -#include -#include -#include - -#include -#include -#include - -struct lttng_ust_lib_ring_buffer; -struct lttng_ust_lib_ring_buffer_channel; -struct lttng_ust_lib_ring_buffer_config; -struct lttng_ust_lib_ring_buffer_ctx_private; -struct lttng_ust_shm_handle; - -/* - * Ring buffer client callbacks. Only used by slow path, never on fast path. - * For the fast path, record_header_size(), ring_buffer_clock_read() should be - * provided as inline functions too. These may simply return 0 if not used by - * the client. - */ -struct lttng_ust_lib_ring_buffer_client_cb { - /* Mandatory callbacks */ - - /* A static inline version is also required for fast path */ - uint64_t (*ring_buffer_clock_read) (struct lttng_ust_lib_ring_buffer_channel *chan); - size_t (*record_header_size) (const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_channel *chan, - size_t offset, - size_t *pre_header_padding, - struct lttng_ust_lib_ring_buffer_ctx *ctx, - void *client_ctx); - - /* Slow path only, at subbuffer switch */ - size_t (*subbuffer_header_size) (void); - void (*buffer_begin) (struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc, - unsigned int subbuf_idx, - struct lttng_ust_shm_handle *handle); - void (*buffer_end) (struct lttng_ust_lib_ring_buffer *buf, uint64_t tsc, - unsigned int subbuf_idx, unsigned long data_size, - struct lttng_ust_shm_handle *handle); - - /* Optional callbacks (can be set to NULL) */ - - /* Called at buffer creation/finalize */ - int (*buffer_create) (struct lttng_ust_lib_ring_buffer *buf, void *priv, - int cpu, const char *name, - struct lttng_ust_shm_handle *handle); - /* - * Clients should guarantee that no new reader handle can be opened - * after finalize. - */ - void (*buffer_finalize) (struct lttng_ust_lib_ring_buffer *buf, - void *priv, int cpu, - struct lttng_ust_shm_handle *handle); - - /* - * Extract header length, payload length and timestamp from event - * record. Used by buffer iterators. Timestamp is only used by channel - * iterator. - */ - void (*record_get) (const struct lttng_ust_lib_ring_buffer_config *config, - struct lttng_ust_lib_ring_buffer_channel *chan, - struct lttng_ust_lib_ring_buffer *buf, - size_t offset, size_t *header_len, - size_t *payload_len, uint64_t *timestamp, - struct lttng_ust_shm_handle *handle); - /* - * Offset and size of content size field in client. - */ - void (*content_size_field) (const struct lttng_ust_lib_ring_buffer_config *config, - size_t *offset, size_t *length); - void (*packet_size_field) (const struct lttng_ust_lib_ring_buffer_config *config, - size_t *offset, size_t *length); -}; - -/* - * Ring buffer instance configuration. - * - * Declare as "static const" within the client object to ensure the inline fast - * paths can be optimized. - * - * alloc/sync pairs: - * - * RING_BUFFER_ALLOC_PER_CPU and RING_BUFFER_SYNC_PER_CPU : - * Per-cpu buffers with per-cpu synchronization. - * - * RING_BUFFER_ALLOC_PER_CPU and RING_BUFFER_SYNC_GLOBAL : - * Per-cpu buffer with global synchronization. Tracing can be performed with - * preemption enabled, statistically stays on the local buffers. - * - * RING_BUFFER_ALLOC_GLOBAL and RING_BUFFER_SYNC_PER_CPU : - * Should only be used for buffers belonging to a single thread or protected - * by mutual exclusion by the client. Note that periodical sub-buffer switch - * should be disabled in this kind of configuration. - * - * RING_BUFFER_ALLOC_GLOBAL and RING_BUFFER_SYNC_GLOBAL : - * Global shared buffer with global synchronization. - * - * wakeup: - * - * RING_BUFFER_WAKEUP_BY_TIMER uses per-cpu deferrable timers to poll the - * buffers and wake up readers if data is ready. Mainly useful for tracers which - * don't want to call into the wakeup code on the tracing path. Use in - * combination with "read_timer_interval" channel_create() argument. - * - * RING_BUFFER_WAKEUP_BY_WRITER directly wakes up readers when a subbuffer is - * ready to read. Lower latencies before the reader is woken up. Mainly suitable - * for drivers. - * - * RING_BUFFER_WAKEUP_NONE does not perform any wakeup whatsoever. The client - * has the responsibility to perform wakeups. - */ -#define LTTNG_UST_RING_BUFFER_CONFIG_PADDING 20 - -enum lttng_ust_lib_ring_buffer_alloc_types { - RING_BUFFER_ALLOC_PER_CPU, - RING_BUFFER_ALLOC_GLOBAL, -}; - -enum lttng_ust_lib_ring_buffer_sync_types { - RING_BUFFER_SYNC_PER_CPU, /* Wait-free */ - RING_BUFFER_SYNC_GLOBAL, /* Lock-free */ -}; - -enum lttng_ust_lib_ring_buffer_mode_types { - RING_BUFFER_OVERWRITE = 0, /* Overwrite when buffer full */ - RING_BUFFER_DISCARD = 1, /* Discard when buffer full */ -}; - -enum lttng_ust_lib_ring_buffer_output_types { - RING_BUFFER_SPLICE, - RING_BUFFER_MMAP, - RING_BUFFER_READ, /* TODO */ - RING_BUFFER_ITERATOR, - RING_BUFFER_NONE, -}; - -enum lttng_ust_lib_ring_buffer_backend_types { - RING_BUFFER_PAGE, - RING_BUFFER_VMAP, /* TODO */ - RING_BUFFER_STATIC, /* TODO */ -}; - -enum lttng_ust_lib_ring_buffer_oops_types { - RING_BUFFER_NO_OOPS_CONSISTENCY, - RING_BUFFER_OOPS_CONSISTENCY, -}; - -enum lttng_ust_lib_ring_buffer_ipi_types { - RING_BUFFER_IPI_BARRIER, - RING_BUFFER_NO_IPI_BARRIER, -}; - -enum lttng_ust_lib_ring_buffer_wakeup_types { - RING_BUFFER_WAKEUP_BY_TIMER, /* wake up performed by timer */ - RING_BUFFER_WAKEUP_BY_WRITER, /* - * writer wakes up reader, - * not lock-free - * (takes spinlock). - */ -}; - -struct lttng_ust_lib_ring_buffer_config { - enum lttng_ust_lib_ring_buffer_alloc_types alloc; - enum lttng_ust_lib_ring_buffer_sync_types sync; - enum lttng_ust_lib_ring_buffer_mode_types mode; - enum lttng_ust_lib_ring_buffer_output_types output; - enum lttng_ust_lib_ring_buffer_backend_types backend; - enum lttng_ust_lib_ring_buffer_oops_types oops; - enum lttng_ust_lib_ring_buffer_ipi_types ipi; - enum lttng_ust_lib_ring_buffer_wakeup_types wakeup; - /* - * tsc_bits: timestamp bits saved at each record. - * 0 and 64 disable the timestamp compression scheme. - */ - unsigned int tsc_bits; - struct lttng_ust_lib_ring_buffer_client_cb cb; - /* - * client_type is used by the consumer process (which is in a - * different address space) to lookup the appropriate client - * callbacks and update the cb pointers. - */ - int client_type; - int _unused1; - const struct lttng_ust_lib_ring_buffer_client_cb *cb_ptr; - char padding[LTTNG_UST_RING_BUFFER_CONFIG_PADDING]; -}; - -/* - * Reservation flags. - * - * RING_BUFFER_RFLAG_FULL_TSC - * - * This flag is passed to record_header_size() and to the primitive used to - * write the record header. It indicates that the full 64-bit time value is - * needed in the record header. If this flag is not set, the record header needs - * only to contain "tsc_bits" bit of time value. - * - * Reservation flags can be added by the client, starting from - * "(RING_BUFFER_FLAGS_END << 0)". It can be used to pass information from - * record_header_size() to lib_ring_buffer_write_record_header(). - */ -#define RING_BUFFER_RFLAG_FULL_TSC (1U << 0) -#define RING_BUFFER_RFLAG_END (1U << 1) - -/* - * lib_ring_buffer_check_config() returns 0 on success. - * Used internally to check for valid configurations at channel creation. - */ -static inline -int lib_ring_buffer_check_config(const struct lttng_ust_lib_ring_buffer_config *config, - unsigned int switch_timer_interval, - unsigned int read_timer_interval) - lttng_ust_notrace; - -static inline -int lib_ring_buffer_check_config(const struct lttng_ust_lib_ring_buffer_config *config, - unsigned int switch_timer_interval, - unsigned int read_timer_interval __attribute__((unused))) -{ - if (config->alloc == RING_BUFFER_ALLOC_GLOBAL - && config->sync == RING_BUFFER_SYNC_PER_CPU - && switch_timer_interval) - return -EINVAL; - return 0; -} - -#endif /* _LTTNG_RING_BUFFER_CONFIG_H */ diff --git a/src/libringbuffer/shm.c b/src/libringbuffer/shm.c deleted file mode 100644 index a5de019c..00000000 --- a/src/libringbuffer/shm.c +++ /dev/null @@ -1,522 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2005-2012 Mathieu Desnoyers - */ - -#define _LGPL_SOURCE -#include "shm.h" -#include -#include -#include -#include -#include /* For mode constants */ -#include /* For O_* constants */ -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_LIBNUMA -#include -#include -#endif - -#include - -#include "common/macros.h" -#include "common/ust-fd.h" -#include "mmap.h" - -/* - * Ensure we have the required amount of space available by writing 0 - * into the entire buffer. Not doing so can trigger SIGBUS when going - * beyond the available shm space. - */ -static -int zero_file(int fd, size_t len) -{ - ssize_t retlen; - size_t written = 0; - char *zeropage; - long pagelen; - int ret; - - pagelen = sysconf(_SC_PAGESIZE); - if (pagelen < 0) - return (int) pagelen; - zeropage = calloc(pagelen, 1); - if (!zeropage) - return -ENOMEM; - - while (len > written) { - do { - retlen = write(fd, zeropage, - min_t(size_t, pagelen, len - written)); - } while (retlen == -1UL && errno == EINTR); - if (retlen < 0) { - ret = (int) retlen; - goto error; - } - written += retlen; - } - ret = 0; -error: - free(zeropage); - return ret; -} - -struct shm_object_table *shm_object_table_create(size_t max_nb_obj) -{ - struct shm_object_table *table; - - table = zmalloc(sizeof(struct shm_object_table) + - max_nb_obj * sizeof(table->objects[0])); - if (!table) - return NULL; - table->size = max_nb_obj; - return table; -} - -static -struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table, - size_t memory_map_size, - int stream_fd) -{ - int shmfd, waitfd[2], ret, i; - struct shm_object *obj; - char *memory_map; - - if (stream_fd < 0) - return NULL; - if (table->allocated_len >= table->size) - return NULL; - obj = &table->objects[table->allocated_len]; - - /* wait_fd: create pipe */ - ret = pipe(waitfd); - if (ret < 0) { - PERROR("pipe"); - goto error_pipe; - } - for (i = 0; i < 2; i++) { - ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC); - if (ret < 0) { - PERROR("fcntl"); - goto error_fcntl; - } - } - /* The write end of the pipe needs to be non-blocking */ - ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK); - if (ret < 0) { - PERROR("fcntl"); - goto error_fcntl; - } - memcpy(obj->wait_fd, waitfd, sizeof(waitfd)); - - /* - * Set POSIX shared memory object size - * - * First, use ftruncate() to set its size, some implementations won't - * allow writes past the size set by ftruncate. - * Then, use write() to fill it with zeros, this allows us to fully - * allocate it and detect a shortage of shm space without dealing with - * a SIGBUS. - */ - - shmfd = stream_fd; - ret = ftruncate(shmfd, memory_map_size); - if (ret) { - PERROR("ftruncate"); - goto error_ftruncate; - } - ret = zero_file(shmfd, memory_map_size); - if (ret) { - PERROR("zero_file"); - goto error_zero_file; - } - - /* - * Also ensure the file metadata is synced with the storage by using - * fsync(2). Some platforms don't allow fsync on POSIX shm fds, ignore - * EINVAL accordingly. - */ - ret = fsync(shmfd); - if (ret && errno != EINVAL) { - PERROR("fsync"); - goto error_fsync; - } - obj->shm_fd_ownership = 0; - obj->shm_fd = shmfd; - - /* memory_map: mmap */ - memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE, - MAP_SHARED | LTTNG_MAP_POPULATE, shmfd, 0); - if (memory_map == MAP_FAILED) { - PERROR("mmap"); - goto error_mmap; - } - obj->type = SHM_OBJECT_SHM; - obj->memory_map = memory_map; - obj->memory_map_size = memory_map_size; - obj->allocated_len = 0; - obj->index = table->allocated_len++; - - return obj; - -error_mmap: -error_fsync: -error_ftruncate: -error_zero_file: -error_fcntl: - for (i = 0; i < 2; i++) { - ret = close(waitfd[i]); - if (ret) { - PERROR("close"); - assert(0); - } - } -error_pipe: - return NULL; -} - -static -struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table, - size_t memory_map_size) -{ - struct shm_object *obj; - void *memory_map; - int waitfd[2], i, ret; - - if (table->allocated_len >= table->size) - return NULL; - obj = &table->objects[table->allocated_len]; - - memory_map = zmalloc(memory_map_size); - if (!memory_map) - goto alloc_error; - - /* wait_fd: create pipe */ - ret = pipe(waitfd); - if (ret < 0) { - PERROR("pipe"); - goto error_pipe; - } - for (i = 0; i < 2; i++) { - ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC); - if (ret < 0) { - PERROR("fcntl"); - goto error_fcntl; - } - } - /* The write end of the pipe needs to be non-blocking */ - ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK); - if (ret < 0) { - PERROR("fcntl"); - goto error_fcntl; - } - memcpy(obj->wait_fd, waitfd, sizeof(waitfd)); - - /* no shm_fd */ - obj->shm_fd = -1; - obj->shm_fd_ownership = 0; - - obj->type = SHM_OBJECT_MEM; - obj->memory_map = memory_map; - obj->memory_map_size = memory_map_size; - obj->allocated_len = 0; - obj->index = table->allocated_len++; - - return obj; - -error_fcntl: - for (i = 0; i < 2; i++) { - ret = close(waitfd[i]); - if (ret) { - PERROR("close"); - assert(0); - } - } -error_pipe: - free(memory_map); -alloc_error: - return NULL; -} - -/* - * libnuma prints errors on the console even for numa_available(). - * Work-around this limitation by using get_mempolicy() directly to - * check whether the kernel supports mempolicy. - */ -#ifdef HAVE_LIBNUMA -static bool lttng_is_numa_available(void) -{ - int ret; - - ret = get_mempolicy(NULL, NULL, 0, NULL, 0); - if (ret && errno == ENOSYS) { - return false; - } - return numa_available() > 0; -} -#endif - -struct shm_object *shm_object_table_alloc(struct shm_object_table *table, - size_t memory_map_size, - enum shm_object_type type, - int stream_fd, - int cpu) -{ - struct shm_object *shm_object; -#ifdef HAVE_LIBNUMA - int oldnode = 0, node; - bool numa_avail; - - numa_avail = lttng_is_numa_available(); - if (numa_avail) { - oldnode = numa_preferred(); - if (cpu >= 0) { - node = numa_node_of_cpu(cpu); - if (node >= 0) - numa_set_preferred(node); - } - if (cpu < 0 || node < 0) - numa_set_localalloc(); - } -#endif /* HAVE_LIBNUMA */ - switch (type) { - case SHM_OBJECT_SHM: - shm_object = _shm_object_table_alloc_shm(table, memory_map_size, - stream_fd); - break; - case SHM_OBJECT_MEM: - shm_object = _shm_object_table_alloc_mem(table, memory_map_size); - break; - default: - assert(0); - } -#ifdef HAVE_LIBNUMA - if (numa_avail) - numa_set_preferred(oldnode); -#endif /* HAVE_LIBNUMA */ - return shm_object; -} - -struct shm_object *shm_object_table_append_shm(struct shm_object_table *table, - int shm_fd, int wakeup_fd, uint32_t stream_nr, - size_t memory_map_size) -{ - struct shm_object *obj; - char *memory_map; - int ret; - - if (table->allocated_len >= table->size) - return NULL; - /* streams _must_ be received in sequential order, else fail. */ - if (stream_nr + 1 != table->allocated_len) - return NULL; - - obj = &table->objects[table->allocated_len]; - - /* wait_fd: set write end of the pipe. */ - obj->wait_fd[0] = -1; /* read end is unset */ - obj->wait_fd[1] = wakeup_fd; - obj->shm_fd = shm_fd; - obj->shm_fd_ownership = 1; - - /* The write end of the pipe needs to be non-blocking */ - ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK); - if (ret < 0) { - PERROR("fcntl"); - goto error_fcntl; - } - - /* memory_map: mmap */ - memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE, - MAP_SHARED | LTTNG_MAP_POPULATE, shm_fd, 0); - if (memory_map == MAP_FAILED) { - PERROR("mmap"); - goto error_mmap; - } - obj->type = SHM_OBJECT_SHM; - obj->memory_map = memory_map; - obj->memory_map_size = memory_map_size; - obj->allocated_len = memory_map_size; - obj->index = table->allocated_len++; - - return obj; - -error_fcntl: -error_mmap: - return NULL; -} - -/* - * Passing ownership of mem to object. - */ -struct shm_object *shm_object_table_append_mem(struct shm_object_table *table, - void *mem, size_t memory_map_size, int wakeup_fd) -{ - struct shm_object *obj; - int ret; - - if (table->allocated_len >= table->size) - return NULL; - obj = &table->objects[table->allocated_len]; - - obj->wait_fd[0] = -1; /* read end is unset */ - obj->wait_fd[1] = wakeup_fd; - obj->shm_fd = -1; - obj->shm_fd_ownership = 0; - - ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC); - if (ret < 0) { - PERROR("fcntl"); - goto error_fcntl; - } - /* The write end of the pipe needs to be non-blocking */ - ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK); - if (ret < 0) { - PERROR("fcntl"); - goto error_fcntl; - } - - obj->type = SHM_OBJECT_MEM; - obj->memory_map = mem; - obj->memory_map_size = memory_map_size; - obj->allocated_len = memory_map_size; - obj->index = table->allocated_len++; - - return obj; - -error_fcntl: - return NULL; -} - -static -void shmp_object_destroy(struct shm_object *obj, int consumer) -{ - switch (obj->type) { - case SHM_OBJECT_SHM: - { - int ret, i; - - ret = munmap(obj->memory_map, obj->memory_map_size); - if (ret) { - PERROR("umnmap"); - assert(0); - } - - if (obj->shm_fd_ownership) { - /* Delete FDs only if called from app (not consumer). */ - if (!consumer) { - lttng_ust_lock_fd_tracker(); - ret = close(obj->shm_fd); - if (!ret) { - lttng_ust_delete_fd_from_tracker(obj->shm_fd); - } else { - PERROR("close"); - assert(0); - } - lttng_ust_unlock_fd_tracker(); - } else { - ret = close(obj->shm_fd); - if (ret) { - PERROR("close"); - assert(0); - } - } - } - for (i = 0; i < 2; i++) { - if (obj->wait_fd[i] < 0) - continue; - if (!consumer) { - lttng_ust_lock_fd_tracker(); - ret = close(obj->wait_fd[i]); - if (!ret) { - lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]); - } else { - PERROR("close"); - assert(0); - } - lttng_ust_unlock_fd_tracker(); - } else { - ret = close(obj->wait_fd[i]); - if (ret) { - PERROR("close"); - assert(0); - } - } - } - break; - } - case SHM_OBJECT_MEM: - { - int ret, i; - - for (i = 0; i < 2; i++) { - if (obj->wait_fd[i] < 0) - continue; - if (!consumer) { - lttng_ust_lock_fd_tracker(); - ret = close(obj->wait_fd[i]); - if (!ret) { - lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]); - } else { - PERROR("close"); - assert(0); - } - lttng_ust_unlock_fd_tracker(); - } else { - ret = close(obj->wait_fd[i]); - if (ret) { - PERROR("close"); - assert(0); - } - } - } - free(obj->memory_map); - break; - } - default: - assert(0); - } -} - -void shm_object_table_destroy(struct shm_object_table *table, int consumer) -{ - int i; - - for (i = 0; i < table->allocated_len; i++) - shmp_object_destroy(&table->objects[i], consumer); - free(table); -} - -/* - * zalloc_shm - allocate memory within a shm object. - * - * Shared memory is already zeroed by shmget. - * *NOT* multithread-safe (should be protected by mutex). - * Returns a -1, -1 tuple on error. - */ -struct shm_ref zalloc_shm(struct shm_object *obj, size_t len) -{ - struct shm_ref ref; - struct shm_ref shm_ref_error = { -1, -1 }; - - if (obj->memory_map_size - obj->allocated_len < len) - return shm_ref_error; - ref.index = obj->index; - ref.offset = obj->allocated_len; - obj->allocated_len += len; - return ref; -} - -void align_shm(struct shm_object *obj, size_t align) -{ - size_t offset_len = lttng_ust_offset_align(obj->allocated_len, align); - obj->allocated_len += offset_len; -} diff --git a/src/libringbuffer/shm.h b/src/libringbuffer/shm.h deleted file mode 100644 index 6e4f7f7b..00000000 --- a/src/libringbuffer/shm.h +++ /dev/null @@ -1,221 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2011-2012 Mathieu Desnoyers - */ - -#ifndef _LIBRINGBUFFER_SHM_H -#define _LIBRINGBUFFER_SHM_H - -#include -#include -#include -#include "common/logging.h" -#include -#include "shm_types.h" - -/* channel_handle_create - for UST. */ -extern -struct lttng_ust_shm_handle *channel_handle_create(void *data, - uint64_t memory_map_size, int wakeup_fd) - __attribute__((visibility("hidden"))); - -/* channel_handle_add_stream - for UST. */ -extern -int channel_handle_add_stream(struct lttng_ust_shm_handle *handle, - int shm_fd, int wakeup_fd, uint32_t stream_nr, - uint64_t memory_map_size) - __attribute__((visibility("hidden"))); - -unsigned int channel_handle_get_nr_streams(struct lttng_ust_shm_handle *handle) - __attribute__((visibility("hidden"))); - -/* - * Pointer dereferencing. We don't trust the shm_ref, so we validate - * both the index and offset with known boundaries. - * - * "shmp" and "shmp_index" guarantee that it's safe to use the pointer - * target type, even in the occurrence of shm_ref modification by an - * untrusted process having write access to the shm_ref. We return a - * NULL pointer if the ranges are invalid. - */ -static inline -char *_shmp_offset(struct shm_object_table *table, struct shm_ref *ref, - size_t idx, size_t elem_size) -{ - struct shm_object *obj; - size_t objindex, ref_offset; - - objindex = (size_t) ref->index; - if (caa_unlikely(objindex >= table->allocated_len)) - return NULL; - obj = &table->objects[objindex]; - ref_offset = (size_t) ref->offset; - ref_offset += idx * elem_size; - /* Check if part of the element returned would exceed the limits. */ - if (caa_unlikely(ref_offset + elem_size > obj->memory_map_size)) - return NULL; - return &obj->memory_map[ref_offset]; -} - -#define shmp_index(handle, ref, index) \ - ((__typeof__((ref)._type)) _shmp_offset((handle)->table, &(ref)._ref, index, sizeof(*((ref)._type)))) - -#define shmp(handle, ref) shmp_index(handle, ref, 0) - -static inline -void _set_shmp(struct shm_ref *ref, struct shm_ref src) -{ - *ref = src; -} - -#define set_shmp(ref, src) _set_shmp(&(ref)._ref, src) - -struct shm_object_table *shm_object_table_create(size_t max_nb_obj) - __attribute__((visibility("hidden"))); - -struct shm_object *shm_object_table_alloc(struct shm_object_table *table, - size_t memory_map_size, - enum shm_object_type type, - const int stream_fd, - int cpu) - __attribute__((visibility("hidden"))); - -struct shm_object *shm_object_table_append_shm(struct shm_object_table *table, - int shm_fd, int wakeup_fd, uint32_t stream_nr, - size_t memory_map_size) - __attribute__((visibility("hidden"))); - -/* mem ownership is passed to shm_object_table_append_mem(). */ -struct shm_object *shm_object_table_append_mem(struct shm_object_table *table, - void *mem, size_t memory_map_size, int wakeup_fd) - __attribute__((visibility("hidden"))); - -void shm_object_table_destroy(struct shm_object_table *table, int consumer) - __attribute__((visibility("hidden"))); - -/* - * zalloc_shm - allocate memory within a shm object. - * - * Shared memory is already zeroed by shmget. - * *NOT* multithread-safe (should be protected by mutex). - * Returns a -1, -1 tuple on error. - */ -struct shm_ref zalloc_shm(struct shm_object *obj, size_t len) - __attribute__((visibility("hidden"))); - -void align_shm(struct shm_object *obj, size_t align) - __attribute__((visibility("hidden"))); - -static inline -int shm_get_wait_fd(struct lttng_ust_shm_handle *handle, struct shm_ref *ref) -{ - struct shm_object_table *table = handle->table; - struct shm_object *obj; - size_t index; - - index = (size_t) ref->index; - if (caa_unlikely(index >= table->allocated_len)) - return -EPERM; - obj = &table->objects[index]; - return obj->wait_fd[0]; -} - -static inline -int shm_get_wakeup_fd(struct lttng_ust_shm_handle *handle, struct shm_ref *ref) -{ - struct shm_object_table *table = handle->table; - struct shm_object *obj; - size_t index; - - index = (size_t) ref->index; - if (caa_unlikely(index >= table->allocated_len)) - return -EPERM; - obj = &table->objects[index]; - return obj->wait_fd[1]; -} - -static inline -int shm_close_wait_fd(struct lttng_ust_shm_handle *handle, - struct shm_ref *ref) -{ - struct shm_object_table *table = handle->table; - struct shm_object *obj; - int wait_fd; - size_t index; - int ret; - - index = (size_t) ref->index; - if (caa_unlikely(index >= table->allocated_len)) - return -EPERM; - obj = &table->objects[index]; - wait_fd = obj->wait_fd[0]; - if (wait_fd < 0) - return -ENOENT; - obj->wait_fd[0] = -1; - ret = close(wait_fd); - if (ret) { - ret = -errno; - return ret; - } - return 0; -} - -static inline -int shm_close_wakeup_fd(struct lttng_ust_shm_handle *handle, - struct shm_ref *ref) -{ - struct shm_object_table *table = handle->table; - struct shm_object *obj; - int wakeup_fd; - size_t index; - int ret; - - index = (size_t) ref->index; - if (caa_unlikely(index >= table->allocated_len)) - return -EPERM; - obj = &table->objects[index]; - wakeup_fd = obj->wait_fd[1]; - if (wakeup_fd < 0) - return -ENOENT; - obj->wait_fd[1] = -1; - ret = close(wakeup_fd); - if (ret) { - ret = -errno; - return ret; - } - return 0; -} - -static inline -int shm_get_shm_fd(struct lttng_ust_shm_handle *handle, struct shm_ref *ref) -{ - struct shm_object_table *table = handle->table; - struct shm_object *obj; - size_t index; - - index = (size_t) ref->index; - if (caa_unlikely(index >= table->allocated_len)) - return -EPERM; - obj = &table->objects[index]; - return obj->shm_fd; -} - - -static inline -int shm_get_shm_size(struct lttng_ust_shm_handle *handle, struct shm_ref *ref, - uint64_t *size) -{ - struct shm_object_table *table = handle->table; - struct shm_object *obj; - size_t index; - - index = (size_t) ref->index; - if (caa_unlikely(index >= table->allocated_len)) - return -EPERM; - obj = &table->objects[index]; - *size = obj->memory_map_size; - return 0; -} - -#endif /* _LIBRINGBUFFER_SHM_H */ diff --git a/src/libringbuffer/shm_internal.h b/src/libringbuffer/shm_internal.h deleted file mode 100644 index 0e92b451..00000000 --- a/src/libringbuffer/shm_internal.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2011-2012 Mathieu Desnoyers - */ - -#ifndef _LIBRINGBUFFER_SHM_INTERNAL_H -#define _LIBRINGBUFFER_SHM_INTERNAL_H - -struct shm_ref { - volatile ssize_t index; /* within the object table */ - volatile ssize_t offset; /* within the object */ -}; - -#define DECLARE_SHMP(type, name) \ - union { \ - struct shm_ref _ref; \ - type *_type; \ - } name - -#endif /* _LIBRINGBUFFER_SHM_INTERNAL_H */ diff --git a/src/libringbuffer/shm_types.h b/src/libringbuffer/shm_types.h deleted file mode 100644 index c1ad7b68..00000000 --- a/src/libringbuffer/shm_types.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2011-2012 Mathieu Desnoyers - */ - -#ifndef _LIBRINGBUFFER_SHM_TYPES_H -#define _LIBRINGBUFFER_SHM_TYPES_H - -#include -#include -#include -#include "shm_internal.h" - -struct lttng_ust_lib_ring_buffer_channel; - -enum shm_object_type { - SHM_OBJECT_SHM, - SHM_OBJECT_MEM, -}; - -struct shm_object { - enum shm_object_type type; - size_t index; /* within the object table */ - int shm_fd; /* shm fd */ - int wait_fd[2]; /* fd for wait/wakeup */ - char *memory_map; - size_t memory_map_size; - uint64_t allocated_len; - int shm_fd_ownership; -}; - -struct shm_object_table { - size_t size; - size_t allocated_len; - struct shm_object objects[]; -}; - -struct lttng_ust_shm_handle { - struct shm_object_table *table; - DECLARE_SHMP(struct lttng_ust_lib_ring_buffer_channel, chan); -}; - -#endif /* _LIBRINGBUFFER_SHM_TYPES_H */ diff --git a/src/libringbuffer/smp.c b/src/libringbuffer/smp.c deleted file mode 100644 index 39bd5559..00000000 --- a/src/libringbuffer/smp.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2011-2012 Mathieu Desnoyers - * Copyright (C) 2019 Michael Jeanson - */ - -#define _LGPL_SOURCE -#include -#include -#include "smp.h" - -int __num_possible_cpus; - -#if (defined(__GLIBC__) || defined( __UCLIBC__)) -void _get_num_possible_cpus(void) -{ - int result; - - /* On Linux, when some processors are offline - * _SC_NPROCESSORS_CONF counts the offline - * processors, whereas _SC_NPROCESSORS_ONLN - * does not. If we used _SC_NPROCESSORS_ONLN, - * getcpu() could return a value greater than - * this sysconf, in which case the arrays - * indexed by processor would overflow. - */ - result = sysconf(_SC_NPROCESSORS_CONF); - if (result == -1) - return; - __num_possible_cpus = result; -} - -#else - -/* - * The MUSL libc implementation of the _SC_NPROCESSORS_CONF sysconf does not - * return the number of configured CPUs in the system but relies on the cpu - * affinity mask of the current task. - * - * So instead we use a strategy similar to GLIBC's, counting the cpu - * directories in "/sys/devices/system/cpu" and fallback on the value from - * sysconf if it fails. - */ - -#include -#include -#include -#include -#include - -#define __max(a,b) ((a)>(b)?(a):(b)) - -void _get_num_possible_cpus(void) -{ - int result, count = 0; - DIR *cpudir; - struct dirent *entry; - - cpudir = opendir("/sys/devices/system/cpu"); - if (cpudir == NULL) - goto end; - - /* - * Count the number of directories named "cpu" followed by and - * integer. This is the same strategy as glibc uses. - */ - while ((entry = readdir(cpudir))) { - if (entry->d_type == DT_DIR && - strncmp(entry->d_name, "cpu", 3) == 0) { - - char *endptr; - unsigned long cpu_num; - - cpu_num = strtoul(entry->d_name + 3, &endptr, 10); - if ((cpu_num < ULONG_MAX) && (endptr != entry->d_name + 3) - && (*endptr == '\0')) { - count++; - } - } - } - -end: - /* - * Get the sysconf value as a fallback. Keep the highest number. - */ - result = __max(sysconf(_SC_NPROCESSORS_CONF), count); - - /* - * If both methods failed, don't store the value. - */ - if (result < 1) - return; - __num_possible_cpus = result; -} -#endif diff --git a/src/libringbuffer/smp.h b/src/libringbuffer/smp.h deleted file mode 100644 index 028a66f7..00000000 --- a/src/libringbuffer/smp.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * SPDX-License-Identifier: LGPL-2.1-only - * - * Copyright (C) 2011-2012 Mathieu Desnoyers - */ - -#ifndef _LIBRINGBUFFER_SMP_H -#define _LIBRINGBUFFER_SMP_H - -#include "getcpu.h" - -/* - * 4kB of per-cpu data available. Enough to hold the control structures, - * but not ring buffers. - */ -#define PER_CPU_MEM_SIZE 4096 - -extern int __num_possible_cpus - __attribute__((visibility("hidden"))); - -extern void _get_num_possible_cpus(void) - __attribute__((visibility("hidden"))); - -static inline -int num_possible_cpus(void) -{ - if (!__num_possible_cpus) - _get_num_possible_cpus(); - return __num_possible_cpus; -} - -#define for_each_possible_cpu(cpu) \ - for ((cpu) = 0; (cpu) < num_possible_cpus(); (cpu)++) - -#endif /* _LIBRINGBUFFER_SMP_H */ diff --git a/src/libringbuffer/vatomic.h b/src/libringbuffer/vatomic.h deleted file mode 100644 index 199d2c77..00000000 --- a/src/libringbuffer/vatomic.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright (C) 2010-2012 Mathieu Desnoyers - */ - -#ifndef _LTTNG_RING_BUFFER_VATOMIC_H -#define _LTTNG_RING_BUFFER_VATOMIC_H - -#include -#include - -/* - * Same data type (long) accessed differently depending on configuration. - * v field is for non-atomic access (protected by mutual exclusion). - * In the fast-path, the ring_buffer_config structure is constant, so the - * compiler can statically select the appropriate branch. - * local_t is used for per-cpu and per-thread buffers. - * atomic_long_t is used for globally shared buffers. - */ -union v_atomic { - long a; /* accessed through uatomic */ - long v; -}; - -static inline -long v_read(const struct lttng_ust_lib_ring_buffer_config *config, union v_atomic *v_a) -{ - assert(config->sync != RING_BUFFER_SYNC_PER_CPU); - return uatomic_read(&v_a->a); -} - -static inline -void v_set(const struct lttng_ust_lib_ring_buffer_config *config, union v_atomic *v_a, - long v) -{ - assert(config->sync != RING_BUFFER_SYNC_PER_CPU); - uatomic_set(&v_a->a, v); -} - -static inline -void v_add(const struct lttng_ust_lib_ring_buffer_config *config, long v, union v_atomic *v_a) -{ - assert(config->sync != RING_BUFFER_SYNC_PER_CPU); - uatomic_add(&v_a->a, v); -} - -static inline -void v_inc(const struct lttng_ust_lib_ring_buffer_config *config, union v_atomic *v_a) -{ - assert(config->sync != RING_BUFFER_SYNC_PER_CPU); - uatomic_inc(&v_a->a); -} - -/* - * Non-atomic decrement. Only used by reader, apply to reader-owned subbuffer. - */ -static inline -void _v_dec(const struct lttng_ust_lib_ring_buffer_config *config __attribute__((unused)), union v_atomic *v_a) -{ - --v_a->v; -} - -static inline -long v_cmpxchg(const struct lttng_ust_lib_ring_buffer_config *config, union v_atomic *v_a, - long old, long _new) -{ - assert(config->sync != RING_BUFFER_SYNC_PER_CPU); - return uatomic_cmpxchg(&v_a->a, old, _new); -} - -#endif /* _LTTNG_RING_BUFFER_VATOMIC_H */ diff --git a/tests/unit/libringbuffer/Makefile.am b/tests/unit/libringbuffer/Makefile.am index 1954e454..7b086442 100644 --- a/tests/unit/libringbuffer/Makefile.am +++ b/tests/unit/libringbuffer/Makefile.am @@ -5,7 +5,7 @@ AM_CPPFLAGS += -I$(top_srcdir)/tests/utils noinst_PROGRAMS = test_shm test_shm_SOURCES = shm.c test_shm_LDADD = \ - $(top_builddir)/src/libringbuffer/libringbuffer.la \ + $(top_builddir)/src/common/libringbuffer.la \ $(top_builddir)/src/liblttng-ust-comm/liblttng-ust-comm.la \ $(top_builddir)/src/common/libcommon.la \ $(top_builddir)/tests/utils/libtap.a diff --git a/tests/unit/libringbuffer/shm.c b/tests/unit/libringbuffer/shm.c index c0092170..13c74c3d 100644 --- a/tests/unit/libringbuffer/shm.c +++ b/tests/unit/libringbuffer/shm.c @@ -12,7 +12,7 @@ #include #include -#include "../../../src/libringbuffer/shm.h" +#include "common/ringbuffer/shm.h" #include "common/align.h" #include "tap.h"