/*
- * lttng-context-perf-counters.c
- *
- * LTTng UST performance monitoring counters (perf-counters) integration.
+ * SPDX-License-Identifier: LGPL-2.1-only
*
* Copyright (C) 2009-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; only
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * LTTng UST performance monitoring counters (perf-counters) integration.
*/
+#define _LGPL_SOURCE
+#include <limits.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
#include <sys/mman.h>
#include <sys/syscall.h>
-#include <linux/perf_event.h>
+#include <lttng/ust-arch.h>
#include <lttng/ust-events.h>
#include <lttng/ust-tracer.h>
-#include <lttng/ringbuffer-config.h>
+#include <lttng/ringbuffer-context.h>
#include <urcu/system.h>
#include <urcu/arch.h>
#include <urcu/rculist.h>
-#include <helper.h>
+#include <ust-helper.h>
#include <urcu/ref.h>
#include <usterr-signal-safe.h>
#include <signal.h>
+#include <urcu/tls-compat.h>
+#include "perf_event.h"
+
+#include "context-internal.h"
#include "lttng-tracer-core.h"
+#include "ust-events-internal.h"
/*
* We use a global perf counter key and iterate on per-thread RCU lists
struct perf_event_mmap_page *pc;
struct cds_list_head thread_field_node; /* Per-field list of thread fields (node) */
struct cds_list_head rcu_field_node; /* RCU per-thread list of fields (node) */
+ int fd; /* Perf FD */
};
struct lttng_perf_counter_thread {
struct lttng_perf_counter_field {
struct perf_event_attr attr;
struct cds_list_head thread_field_list; /* Per-field list of thread fields */
+ char *name;
};
static pthread_key_t perf_counter_key;
+/*
+ * lttng_perf_lock - Protect lttng-ust perf counter data structures
+ *
+ * Nests within the ust_lock, and therefore within the libc dl lock.
+ * Therefore, we need to fixup the TLS before nesting into this lock.
+ * Nests inside RCU bp read-side lock. Protects against concurrent
+ * fork.
+ */
+static pthread_mutex_t ust_perf_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/*
+ * Cancel state when grabbing the ust_perf_mutex. Saved when locking,
+ * restored on unlock. Protected by ust_perf_mutex.
+ */
+static int ust_perf_saved_cancelstate;
+
+/*
+ * Track whether we are tracing from a signal handler nested on an
+ * application thread.
+ */
+static DEFINE_URCU_TLS(int, ust_perf_mutex_nest);
+
+/*
+ * Force a read (imply TLS fixup for dlopen) of TLS variables.
+ */
+void lttng_ust_fixup_perf_counter_tls(void)
+{
+ asm volatile ("" : : "m" (URCU_TLS(ust_perf_mutex_nest)));
+}
+
+void lttng_perf_lock(void)
+{
+ sigset_t sig_all_blocked, orig_mask;
+ int ret, oldstate;
+
+ ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate);
+ if (ret) {
+ ERR("pthread_setcancelstate: %s", strerror(ret));
+ }
+ sigfillset(&sig_all_blocked);
+ ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
+ if (ret) {
+ ERR("pthread_sigmask: %s", strerror(ret));
+ }
+ if (!URCU_TLS(ust_perf_mutex_nest)++) {
+ /*
+ * Ensure the compiler don't move the store after the close()
+ * call in case close() would be marked as leaf.
+ */
+ cmm_barrier();
+ pthread_mutex_lock(&ust_perf_mutex);
+ ust_perf_saved_cancelstate = oldstate;
+ }
+ ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
+ if (ret) {
+ ERR("pthread_sigmask: %s", strerror(ret));
+ }
+}
+
+void lttng_perf_unlock(void)
+{
+ sigset_t sig_all_blocked, orig_mask;
+ int ret, newstate, oldstate;
+ bool restore_cancel = false;
+
+ sigfillset(&sig_all_blocked);
+ ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
+ if (ret) {
+ ERR("pthread_sigmask: %s", strerror(ret));
+ }
+ /*
+ * Ensure the compiler don't move the store before the close()
+ * call, in case close() would be marked as leaf.
+ */
+ cmm_barrier();
+ if (!--URCU_TLS(ust_perf_mutex_nest)) {
+ newstate = ust_perf_saved_cancelstate;
+ restore_cancel = true;
+ pthread_mutex_unlock(&ust_perf_mutex);
+ }
+ ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
+ if (ret) {
+ ERR("pthread_sigmask: %s", strerror(ret));
+ }
+ if (restore_cancel) {
+ ret = pthread_setcancelstate(newstate, &oldstate);
+ if (ret) {
+ ERR("pthread_setcancelstate: %s", strerror(ret));
+ }
+ }
+}
+
static
-size_t perf_counter_get_size(size_t offset)
+size_t perf_counter_get_size(void *priv __attribute__((unused)),
+ size_t offset)
{
size_t size = 0;
- size += lib_ring_buffer_align(offset, lttng_alignof(uint64_t));
+ size += lttng_ust_lib_ring_buffer_align(offset, lttng_ust_rb_alignof(uint64_t));
size += sizeof(uint64_t);
return size;
}
-#if defined(__x86_64__) || defined(__i386__)
+static
+uint64_t read_perf_counter_syscall(
+ struct lttng_perf_counter_thread_field *thread_field)
+{
+ uint64_t count;
+
+ if (caa_unlikely(thread_field->fd < 0))
+ return 0;
+
+ if (caa_unlikely(read(thread_field->fd, &count, sizeof(count))
+ < sizeof(count)))
+ return 0;
+
+ return count;
+}
+
+#if defined(LTTNG_UST_ARCH_X86)
static
uint64_t rdpmc(unsigned int counter)
return low | ((uint64_t) high) << 32;
}
-#else /* defined(__x86_64__) || defined(__i386__) */
-
-#error "Perf event counters are only supported on x86 so far."
-
-#endif /* #else defined(__x86_64__) || defined(__i386__) */
+static
+bool has_rdpmc(struct perf_event_mmap_page *pc)
+{
+ if (caa_unlikely(!pc->cap_bit0_is_deprecated))
+ return false;
+ /* Since Linux kernel 3.12. */
+ return pc->cap_user_rdpmc;
+}
static
-uint64_t read_perf_counter(struct perf_event_mmap_page *pc)
+uint64_t arch_read_perf_counter(
+ struct lttng_perf_counter_thread_field *thread_field)
{
uint32_t seq, idx;
uint64_t count;
+ struct perf_event_mmap_page *pc = thread_field->pc;
if (caa_unlikely(!pc))
return 0;
cmm_barrier();
idx = pc->index;
- if (idx)
- count = pc->offset + rdpmc(idx - 1);
- else
- count = 0;
-
+ if (caa_likely(has_rdpmc(pc) && idx)) {
+ int64_t pmcval;
+
+ pmcval = rdpmc(idx - 1);
+ /* Sign-extend the pmc register result. */
+ pmcval <<= 64 - pc->pmc_width;
+ pmcval >>= 64 - pc->pmc_width;
+ count = pc->offset + pmcval;
+ } else {
+ /* Fall-back on system call if rdpmc cannot be used. */
+ return read_perf_counter_syscall(thread_field);
+ }
cmm_barrier();
} while (CMM_LOAD_SHARED(pc->lock) != seq);
return count;
}
+static
+int arch_perf_keep_fd(struct lttng_perf_counter_thread_field *thread_field)
+{
+ struct perf_event_mmap_page *pc = thread_field->pc;
+
+ if (!pc)
+ return 0;
+ return !has_rdpmc(pc);
+}
+
+#else
+
+/* Generic (slow) implementation using a read system call. */
+static
+uint64_t arch_read_perf_counter(
+ struct lttng_perf_counter_thread_field *thread_field)
+{
+ return read_perf_counter_syscall(thread_field);
+}
+
+static
+int arch_perf_keep_fd(struct lttng_perf_counter_thread_field *thread_field)
+{
+ return 1;
+}
+
+#endif
+
static
int sys_perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
}
static
-struct perf_event_mmap_page *setup_perf(struct perf_event_attr *attr)
+int open_perf_fd(struct perf_event_attr *attr)
{
- void *perf_addr;
- int fd, ret;
+ int fd;
fd = sys_perf_event_open(attr, 0, -1, -1, 0);
if (fd < 0)
- return NULL;
+ return -1;
+
+ return fd;
+}
+
+static
+void close_perf_fd(int fd)
+{
+ int ret;
+
+ if (fd < 0)
+ return;
- perf_addr = mmap(NULL, sizeof(struct perf_event_mmap_page),
- PROT_READ, MAP_SHARED, fd, 0);
- if (perf_addr == MAP_FAILED)
- return NULL;
ret = close(fd);
if (ret) {
perror("Error closing LTTng-UST perf memory mapping FD");
}
- return perf_addr;
+}
+
+static void setup_perf(struct lttng_perf_counter_thread_field *thread_field)
+{
+ void *perf_addr;
+
+ perf_addr = mmap(NULL, sizeof(struct perf_event_mmap_page),
+ PROT_READ, MAP_SHARED, thread_field->fd, 0);
+ if (perf_addr == MAP_FAILED)
+ perf_addr = NULL;
+ thread_field->pc = perf_addr;
+
+ if (!arch_perf_keep_fd(thread_field)) {
+ close_perf_fd(thread_field->fd);
+ thread_field->fd = -1;
+ }
}
static
if (!thread_field)
abort();
thread_field->field = perf_field;
- thread_field->pc = setup_perf(&perf_field->attr);
- /* Note: thread_field->pc can be NULL if setup_perf() fails. */
- ust_lock_nocheck();
+ thread_field->fd = open_perf_fd(&perf_field->attr);
+ if (thread_field->fd >= 0)
+ setup_perf(thread_field);
+ /*
+ * Note: thread_field->pc can be NULL if setup_perf() fails.
+ * Also, thread_field->fd can be -1 if open_perf_fd() fails.
+ */
+ lttng_perf_lock();
cds_list_add_rcu(&thread_field->rcu_field_node,
&perf_thread->rcu_field_list);
cds_list_add(&thread_field->thread_field_node,
&perf_field->thread_field_list);
- ust_unlock();
+ lttng_perf_unlock();
skip:
ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
if (ret)
}
static
-uint64_t wrapper_perf_counter_read(struct lttng_ctx_field *field)
+uint64_t wrapper_perf_counter_read(void *priv)
{
struct lttng_perf_counter_field *perf_field;
struct lttng_perf_counter_thread_field *perf_thread_field;
- perf_field = field->u.perf_counter;
+ perf_field = (struct lttng_perf_counter_field *) priv;
perf_thread_field = get_thread_field(perf_field);
- return read_perf_counter(perf_thread_field->pc);
+ return arch_read_perf_counter(perf_thread_field);
}
static
-void perf_counter_record(struct lttng_ctx_field *field,
+void perf_counter_record(void *priv,
struct lttng_ust_lib_ring_buffer_ctx *ctx,
- struct lttng_channel *chan)
+ struct lttng_ust_channel_buffer *chan)
{
uint64_t value;
- value = wrapper_perf_counter_read(field);
- lib_ring_buffer_align_ctx(ctx, lttng_alignof(value));
- chan->ops->event_write(ctx, &value, sizeof(value));
+ value = wrapper_perf_counter_read(priv);
+ chan->ops->event_write(ctx, &value, sizeof(value), lttng_ust_rb_alignof(value));
}
static
-void perf_counter_get_value(struct lttng_ctx_field *field,
- union lttng_ctx_value *value)
+void perf_counter_get_value(void *priv,
+ struct lttng_ust_ctx_value *value)
{
- uint64_t v;
-
- v = wrapper_perf_counter_read(field);
- value->s64 = v;
+ value->u.s64 = wrapper_perf_counter_read(priv);
}
-/* Called with UST lock held */
+/* Called with perf lock held */
static
void lttng_destroy_perf_thread_field(
struct lttng_perf_counter_thread_field *thread_field)
{
+ close_perf_fd(thread_field->fd);
unmap_perf_page(thread_field->pc);
cds_list_del_rcu(&thread_field->rcu_field_node);
cds_list_del(&thread_field->thread_field_node);
struct lttng_perf_counter_thread *perf_thread = _key;
struct lttng_perf_counter_thread_field *pos, *p;
- ust_lock_nocheck();
+ lttng_perf_lock();
cds_list_for_each_entry_safe(pos, p, &perf_thread->rcu_field_list,
rcu_field_node)
lttng_destroy_perf_thread_field(pos);
- ust_unlock();
+ lttng_perf_unlock();
free(perf_thread);
}
/* Called with UST lock held */
static
-void lttng_destroy_perf_counter_field(struct lttng_ctx_field *field)
+void lttng_destroy_perf_counter_ctx_field(void *priv)
{
struct lttng_perf_counter_field *perf_field;
struct lttng_perf_counter_thread_field *pos, *p;
- free((char *) field->event_field.name);
- perf_field = field->u.perf_counter;
+ perf_field = (struct lttng_perf_counter_field *) priv;
+ free(perf_field->name);
/*
* This put is performed when no threads can concurrently
* perform a "get" concurrently, thanks to urcu-bp grace
- * period.
+ * period. Holding the lttng perf lock protects against
+ * concurrent modification of the per-thread thread field
+ * list.
*/
+ lttng_perf_lock();
cds_list_for_each_entry_safe(pos, p, &perf_field->thread_field_list,
thread_field_node)
lttng_destroy_perf_thread_field(pos);
+ lttng_perf_unlock();
free(perf_field);
}
+#ifdef LTTNG_UST_ARCH_ARMV7
+
+static
+int perf_get_exclude_kernel(void)
+{
+ return 0;
+}
+
+#else /* LTTNG_UST_ARCH_ARMV7 */
+
+static
+int perf_get_exclude_kernel(void)
+{
+ return 1;
+}
+
+#endif /* LTTNG_UST_ARCH_ARMV7 */
+
+static const struct lttng_ust_type_common *ust_type =
+ lttng_ust_static_type_integer(sizeof(uint64_t) * CHAR_BIT,
+ lttng_ust_rb_alignof(uint64_t) * CHAR_BIT,
+ lttng_ust_is_signed_type(uint64_t),
+ BYTE_ORDER, 10);
+
/* Called with UST lock held */
int lttng_add_perf_counter_to_ctx(uint32_t type,
uint64_t config,
const char *name,
- struct lttng_ctx **ctx)
+ struct lttng_ust_ctx **ctx)
{
- struct lttng_ctx_field *field;
+ struct lttng_ust_ctx_field ctx_field;
+ struct lttng_ust_event_field *event_field;
struct lttng_perf_counter_field *perf_field;
- struct perf_event_mmap_page *tmp_pc;
char *name_alloc;
int ret;
+ if (lttng_find_context(*ctx, name)) {
+ ret = -EEXIST;
+ goto find_error;
+ }
name_alloc = strdup(name);
if (!name_alloc) {
ret = -ENOMEM;
goto name_alloc_error;
}
+ event_field = zmalloc(sizeof(*event_field));
+ if (!event_field) {
+ ret = -ENOMEM;
+ goto event_field_alloc_error;
+ }
+ event_field->name = name_alloc;
+ event_field->type = ust_type;
+
perf_field = zmalloc(sizeof(*perf_field));
if (!perf_field) {
ret = -ENOMEM;
goto perf_field_alloc_error;
}
- field = lttng_append_context(ctx);
- if (!field) {
- ret = -ENOMEM;
- goto append_context_error;
- }
- if (lttng_find_context(*ctx, name_alloc)) {
- ret = -EEXIST;
- goto find_error;
- }
-
- field->destroy = lttng_destroy_perf_counter_field;
-
- field->event_field.name = name_alloc;
- field->event_field.type.atype = atype_integer;
- field->event_field.type.u.basic.integer.size =
- sizeof(uint64_t) * CHAR_BIT;
- field->event_field.type.u.basic.integer.alignment =
- lttng_alignof(uint64_t) * CHAR_BIT;
- field->event_field.type.u.basic.integer.signedness =
- lttng_is_signed_type(uint64_t);
- field->event_field.type.u.basic.integer.reverse_byte_order = 0;
- field->event_field.type.u.basic.integer.base = 10;
- field->event_field.type.u.basic.integer.encoding = lttng_encode_none;
- field->get_size = perf_counter_get_size;
- field->record = perf_counter_record;
- field->get_value = perf_counter_get_value;
-
perf_field->attr.type = type;
perf_field->attr.config = config;
- perf_field->attr.exclude_kernel = 1;
+ perf_field->attr.exclude_kernel = perf_get_exclude_kernel();
CDS_INIT_LIST_HEAD(&perf_field->thread_field_list);
- field->u.perf_counter = perf_field;
+ perf_field->name = name_alloc;
/* Ensure that this perf counter can be used in this process. */
- tmp_pc = setup_perf(&perf_field->attr);
- if (!tmp_pc) {
+ ret = open_perf_fd(&perf_field->attr);
+ if (ret < 0) {
ret = -ENODEV;
goto setup_error;
}
- unmap_perf_page(tmp_pc);
+ close_perf_fd(ret);
- /*
- * Contexts can only be added before tracing is started, so we
- * don't have to synchronize against concurrent threads using
- * the field here.
- */
+ ctx_field.event_field = event_field;
+ ctx_field.get_size = perf_counter_get_size;
+ ctx_field.record = perf_counter_record;
+ ctx_field.get_value = perf_counter_get_value;
+ ctx_field.destroy = lttng_destroy_perf_counter_ctx_field;
+ ctx_field.priv = perf_field;
- lttng_context_update(*ctx);
+ ret = lttng_ust_context_append(ctx, &ctx_field);
+ if (ret) {
+ ret = -ENOMEM;
+ goto append_context_error;
+ }
return 0;
-setup_error:
-find_error:
- lttng_remove_context_field(ctx, field);
append_context_error:
+setup_error:
free(perf_field);
perf_field_alloc_error:
+ free(event_field);
+event_field_alloc_error:
free(name_alloc);
name_alloc_error:
+find_error:
return ret;
}