Fix: pass private data to context callbacks
[lttng-ust.git] / liblttng-ust / lttng-context-perf-counters.c
index d6bd41cc9e20a170dc73f5d76d473781cdd1e737..6f6209112a81b4bf681eae06f5fe3e40c37d6c78 100644 (file)
@@ -1,44 +1,40 @@
 /*
- * lttng-context-perf-counters.c
- *
- * LTTng UST performance monitoring counters (perf-counters) integration.
+ * SPDX-License-Identifier: LGPL-2.1-only
  *
  * Copyright (C) 2009-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; only
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * LTTng UST performance monitoring counters (perf-counters) integration.
  */
 
+#define _LGPL_SOURCE
+#include <limits.h>
 #include <sys/types.h>
 #include <unistd.h>
 #include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
-#include <linux/perf_event.h>
+#include <lttng/ust-arch.h>
 #include <lttng/ust-events.h>
 #include <lttng/ust-tracer.h>
-#include <lttng/ringbuffer-config.h>
+#include <lttng/ringbuffer-context.h>
 #include <urcu/system.h>
 #include <urcu/arch.h>
 #include <urcu/rculist.h>
-#include <helper.h>
+#include <ust-helper.h>
 #include <urcu/ref.h>
 #include <usterr-signal-safe.h>
 #include <signal.h>
+#include <urcu/tls-compat.h>
+#include "perf_event.h"
+
+#include "context-internal.h"
 #include "lttng-tracer-core.h"
+#include "ust-events-internal.h"
 
 /*
  * We use a global perf counter key and iterate on per-thread RCU lists
@@ -55,6 +51,7 @@ struct lttng_perf_counter_thread_field {
        struct perf_event_mmap_page *pc;
        struct cds_list_head thread_field_node; /* Per-field list of thread fields (node) */
        struct cds_list_head rcu_field_node;    /* RCU per-thread list of fields (node) */
+       int fd;                                 /* Perf FD */
 };
 
 struct lttng_perf_counter_thread {
@@ -64,21 +61,131 @@ struct lttng_perf_counter_thread {
 struct lttng_perf_counter_field {
        struct perf_event_attr attr;
        struct cds_list_head thread_field_list; /* Per-field list of thread fields */
+       char *name;
 };
 
 static pthread_key_t perf_counter_key;
 
+/*
+ * lttng_perf_lock - Protect lttng-ust perf counter data structures
+ *
+ * Nests within the ust_lock, and therefore within the libc dl lock.
+ * Therefore, we need to fixup the TLS before nesting into this lock.
+ * Nests inside RCU bp read-side lock. Protects against concurrent
+ * fork.
+ */
+static pthread_mutex_t ust_perf_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/*
+ * Cancel state when grabbing the ust_perf_mutex. Saved when locking,
+ * restored on unlock. Protected by ust_perf_mutex.
+ */
+static int ust_perf_saved_cancelstate;
+
+/*
+ * Track whether we are tracing from a signal handler nested on an
+ * application thread.
+ */
+static DEFINE_URCU_TLS(int, ust_perf_mutex_nest);
+
+/*
+ * Force a read (imply TLS fixup for dlopen) of TLS variables.
+ */
+void lttng_ust_fixup_perf_counter_tls(void)
+{
+       asm volatile ("" : : "m" (URCU_TLS(ust_perf_mutex_nest)));
+}
+
+void lttng_perf_lock(void)
+{
+       sigset_t sig_all_blocked, orig_mask;
+       int ret, oldstate;
+
+       ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate);
+       if (ret) {
+               ERR("pthread_setcancelstate: %s", strerror(ret));
+       }
+       sigfillset(&sig_all_blocked);
+       ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
+       if (ret) {
+               ERR("pthread_sigmask: %s", strerror(ret));
+       }
+       if (!URCU_TLS(ust_perf_mutex_nest)++) {
+               /*
+                * Ensure the compiler don't move the store after the close()
+                * call in case close() would be marked as leaf.
+                */
+               cmm_barrier();
+               pthread_mutex_lock(&ust_perf_mutex);
+               ust_perf_saved_cancelstate = oldstate;
+       }
+       ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
+       if (ret) {
+               ERR("pthread_sigmask: %s", strerror(ret));
+       }
+}
+
+void lttng_perf_unlock(void)
+{
+       sigset_t sig_all_blocked, orig_mask;
+       int ret, newstate, oldstate;
+       bool restore_cancel = false;
+
+       sigfillset(&sig_all_blocked);
+       ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
+       if (ret) {
+               ERR("pthread_sigmask: %s", strerror(ret));
+       }
+       /*
+        * Ensure the compiler don't move the store before the close()
+        * call, in case close() would be marked as leaf.
+        */
+       cmm_barrier();
+       if (!--URCU_TLS(ust_perf_mutex_nest)) {
+               newstate = ust_perf_saved_cancelstate;
+               restore_cancel = true;
+               pthread_mutex_unlock(&ust_perf_mutex);
+       }
+       ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
+       if (ret) {
+               ERR("pthread_sigmask: %s", strerror(ret));
+       }
+       if (restore_cancel) {
+               ret = pthread_setcancelstate(newstate, &oldstate);
+               if (ret) {
+                       ERR("pthread_setcancelstate: %s", strerror(ret));
+               }
+       }
+}
+
 static
-size_t perf_counter_get_size(size_t offset)
+size_t perf_counter_get_size(void *priv __attribute__((unused)),
+               size_t offset)
 {
        size_t size = 0;
 
-       size += lib_ring_buffer_align(offset, lttng_alignof(uint64_t));
+       size += lttng_ust_lib_ring_buffer_align(offset, lttng_ust_rb_alignof(uint64_t));
        size += sizeof(uint64_t);
        return size;
 }
 
-#if defined(__x86_64__) || defined(__i386__)
+static
+uint64_t read_perf_counter_syscall(
+               struct lttng_perf_counter_thread_field *thread_field)
+{
+       uint64_t count;
+
+       if (caa_unlikely(thread_field->fd < 0))
+               return 0;
+
+       if (caa_unlikely(read(thread_field->fd, &count, sizeof(count))
+                               < sizeof(count)))
+               return 0;
+
+       return count;
+}
+
+#if defined(LTTNG_UST_ARCH_X86)
 
 static
 uint64_t rdpmc(unsigned int counter)
@@ -90,17 +197,22 @@ uint64_t rdpmc(unsigned int counter)
        return low | ((uint64_t) high) << 32;
 }
 
-#else /* defined(__x86_64__) || defined(__i386__) */
-
-#error "Perf event counters are only supported on x86 so far."
-
-#endif /* #else defined(__x86_64__) || defined(__i386__) */
+static
+bool has_rdpmc(struct perf_event_mmap_page *pc)
+{
+       if (caa_unlikely(!pc->cap_bit0_is_deprecated))
+               return false;
+       /* Since Linux kernel 3.12. */
+       return pc->cap_user_rdpmc;
+}
 
 static
-uint64_t read_perf_counter(struct perf_event_mmap_page *pc)
+uint64_t arch_read_perf_counter(
+               struct lttng_perf_counter_thread_field *thread_field)
 {
        uint32_t seq, idx;
        uint64_t count;
+       struct perf_event_mmap_page *pc = thread_field->pc;
 
        if (caa_unlikely(!pc))
                return 0;
@@ -110,17 +222,52 @@ uint64_t read_perf_counter(struct perf_event_mmap_page *pc)
                cmm_barrier();
 
                idx = pc->index;
-               if (idx)
-                       count = pc->offset + rdpmc(idx - 1);
-               else
-                       count = 0;
-
+               if (caa_likely(has_rdpmc(pc) && idx)) {
+                       int64_t pmcval;
+
+                       pmcval = rdpmc(idx - 1);
+                       /* Sign-extend the pmc register result. */
+                       pmcval <<= 64 - pc->pmc_width;
+                       pmcval >>= 64 - pc->pmc_width;
+                       count = pc->offset + pmcval;
+               } else {
+                       /* Fall-back on system call if rdpmc cannot be used. */
+                       return read_perf_counter_syscall(thread_field);
+               }
                cmm_barrier();
        } while (CMM_LOAD_SHARED(pc->lock) != seq);
 
        return count;
 }
 
+static
+int arch_perf_keep_fd(struct lttng_perf_counter_thread_field *thread_field)
+{
+       struct perf_event_mmap_page *pc = thread_field->pc;
+
+       if (!pc)
+               return 0;
+       return !has_rdpmc(pc);
+}
+
+#else
+
+/* Generic (slow) implementation using a read system call. */
+static
+uint64_t arch_read_perf_counter(
+               struct lttng_perf_counter_thread_field *thread_field)
+{
+       return read_perf_counter_syscall(thread_field);
+}
+
+static
+int arch_perf_keep_fd(struct lttng_perf_counter_thread_field *thread_field)
+{
+       return 1;
+}
+
+#endif
+
 static
 int sys_perf_event_open(struct perf_event_attr *attr,
                pid_t pid, int cpu, int group_fd,
@@ -131,21 +278,45 @@ int sys_perf_event_open(struct perf_event_attr *attr,
 }
 
 static
-struct perf_event_mmap_page *setup_perf(struct perf_event_attr *attr)
+int open_perf_fd(struct perf_event_attr *attr)
 {
-       void *perf_addr;
        int fd;
 
        fd = sys_perf_event_open(attr, 0, -1, -1, 0);
        if (fd < 0)
-               return NULL;
+               return -1;
+
+       return fd;
+}
+
+static
+void close_perf_fd(int fd)
+{
+       int ret;
+
+       if (fd < 0)
+               return;
+
+       ret = close(fd);
+       if (ret) {
+               perror("Error closing LTTng-UST perf memory mapping FD");
+       }
+}
+
+static void setup_perf(struct lttng_perf_counter_thread_field *thread_field)
+{
+       void *perf_addr;
 
        perf_addr = mmap(NULL, sizeof(struct perf_event_mmap_page),
-                       PROT_READ, MAP_SHARED, fd, 0);
+                       PROT_READ, MAP_SHARED, thread_field->fd, 0);
        if (perf_addr == MAP_FAILED)
-               return NULL;
-       close(fd);
-       return perf_addr;
+               perf_addr = NULL;
+       thread_field->pc = perf_addr;
+
+       if (!arch_perf_keep_fd(thread_field)) {
+               close_perf_fd(thread_field->fd);
+               thread_field->fd = -1;
+       }
 }
 
 static
@@ -218,14 +389,19 @@ struct lttng_perf_counter_thread_field *
        if (!thread_field)
                abort();
        thread_field->field = perf_field;
-       thread_field->pc = setup_perf(&perf_field->attr);
-       /* Note: thread_field->pc can be NULL if setup_perf() fails. */
-       ust_lock_nocheck();
+       thread_field->fd = open_perf_fd(&perf_field->attr);
+       if (thread_field->fd >= 0)
+               setup_perf(thread_field);
+       /*
+        * Note: thread_field->pc can be NULL if setup_perf() fails.
+        * Also, thread_field->fd can be -1 if open_perf_fd() fails.
+        */
+       lttng_perf_lock();
        cds_list_add_rcu(&thread_field->rcu_field_node,
                        &perf_thread->rcu_field_list);
        cds_list_add(&thread_field->thread_field_node,
                        &perf_field->thread_field_list);
-       ust_unlock();
+       lttng_perf_unlock();
 skip:
        ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
        if (ret)
@@ -253,43 +429,40 @@ struct lttng_perf_counter_thread_field *
 }
 
 static
-uint64_t wrapper_perf_counter_read(struct lttng_ctx_field *field)
+uint64_t wrapper_perf_counter_read(void *priv)
 {
        struct lttng_perf_counter_field *perf_field;
        struct lttng_perf_counter_thread_field *perf_thread_field;
 
-       perf_field = field->u.perf_counter;
+       perf_field = (struct lttng_perf_counter_field *) priv;
        perf_thread_field = get_thread_field(perf_field);
-       return read_perf_counter(perf_thread_field->pc);
+       return arch_read_perf_counter(perf_thread_field);
 }
 
 static
-void perf_counter_record(struct lttng_ctx_field *field,
+void perf_counter_record(void *priv,
                 struct lttng_ust_lib_ring_buffer_ctx *ctx,
-                struct lttng_channel *chan)
+                struct lttng_ust_channel_buffer *chan)
 {
        uint64_t value;
 
-       value = wrapper_perf_counter_read(field);
-       lib_ring_buffer_align_ctx(ctx, lttng_alignof(value));
-       chan->ops->event_write(ctx, &value, sizeof(value));
+       value = wrapper_perf_counter_read(priv);
+       chan->ops->event_write(ctx, &value, sizeof(value), lttng_ust_rb_alignof(value));
 }
 
 static
-void perf_counter_get_value(struct lttng_ctx_field *field,
-               union lttng_ctx_value *value)
+void perf_counter_get_value(void *priv,
+               struct lttng_ust_ctx_value *value)
 {
-       uint64_t v;
-
-       v = wrapper_perf_counter_read(field);
-       value->s64 = v;
+       value->u.s64 = wrapper_perf_counter_read(priv);
 }
 
-/* Called with UST lock held */
+/* Called with perf lock held */
 static
 void lttng_destroy_perf_thread_field(
                struct lttng_perf_counter_thread_field *thread_field)
 {
+       close_perf_fd(thread_field->fd);
        unmap_perf_page(thread_field->pc);
        cds_list_del_rcu(&thread_field->rcu_field_node);
        cds_list_del(&thread_field->thread_field_node);
@@ -302,113 +475,133 @@ void lttng_destroy_perf_thread_key(void *_key)
        struct lttng_perf_counter_thread *perf_thread = _key;
        struct lttng_perf_counter_thread_field *pos, *p;
 
-       ust_lock_nocheck();
+       lttng_perf_lock();
        cds_list_for_each_entry_safe(pos, p, &perf_thread->rcu_field_list,
                        rcu_field_node)
                lttng_destroy_perf_thread_field(pos);
-       ust_unlock();
+       lttng_perf_unlock();
        free(perf_thread);
 }
 
 /* Called with UST lock held */
 static
-void lttng_destroy_perf_counter_field(struct lttng_ctx_field *field)
+void lttng_destroy_perf_counter_ctx_field(void *priv)
 {
        struct lttng_perf_counter_field *perf_field;
        struct lttng_perf_counter_thread_field *pos, *p;
 
-       free((char *) field->event_field.name);
-       perf_field = field->u.perf_counter;
+       perf_field = (struct lttng_perf_counter_field *) priv;
+       free(perf_field->name);
        /*
         * This put is performed when no threads can concurrently
         * perform a "get" concurrently, thanks to urcu-bp grace
-        * period.
+        * period. Holding the lttng perf lock protects against
+        * concurrent modification of the per-thread thread field
+        * list.
         */
+       lttng_perf_lock();
        cds_list_for_each_entry_safe(pos, p, &perf_field->thread_field_list,
                        thread_field_node)
                lttng_destroy_perf_thread_field(pos);
+       lttng_perf_unlock();
        free(perf_field);
 }
 
+#ifdef LTTNG_UST_ARCH_ARMV7
+
+static
+int perf_get_exclude_kernel(void)
+{
+       return 0;
+}
+
+#else /* LTTNG_UST_ARCH_ARMV7 */
+
+static
+int perf_get_exclude_kernel(void)
+{
+       return 1;
+}
+
+#endif /* LTTNG_UST_ARCH_ARMV7 */
+
+static const struct lttng_ust_type_common *ust_type =
+       lttng_ust_static_type_integer(sizeof(uint64_t) * CHAR_BIT,
+                       lttng_ust_rb_alignof(uint64_t) * CHAR_BIT,
+                       lttng_ust_is_signed_type(uint64_t),
+                       BYTE_ORDER, 10);
+
 /* Called with UST lock held */
 int lttng_add_perf_counter_to_ctx(uint32_t type,
                                uint64_t config,
                                const char *name,
-                               struct lttng_ctx **ctx)
+                               struct lttng_ust_ctx **ctx)
 {
-       struct lttng_ctx_field *field;
+       struct lttng_ust_ctx_field ctx_field;
+       struct lttng_ust_event_field *event_field;
        struct lttng_perf_counter_field *perf_field;
-       struct perf_event_mmap_page *tmp_pc;
        char *name_alloc;
        int ret;
 
+       if (lttng_find_context(*ctx, name)) {
+               ret = -EEXIST;
+               goto find_error;
+       }
        name_alloc = strdup(name);
        if (!name_alloc) {
                ret = -ENOMEM;
                goto name_alloc_error;
        }
+       event_field = zmalloc(sizeof(*event_field));
+       if (!event_field) {
+               ret = -ENOMEM;
+               goto event_field_alloc_error;
+       }
+       event_field->name = name_alloc;
+       event_field->type = ust_type;
+
        perf_field = zmalloc(sizeof(*perf_field));
        if (!perf_field) {
                ret = -ENOMEM;
                goto perf_field_alloc_error;
        }
-       field = lttng_append_context(ctx);
-       if (!field) {
-               ret = -ENOMEM;
-               goto append_context_error;
-       }
-       if (lttng_find_context(*ctx, name_alloc)) {
-               ret = -EEXIST;
-               goto find_error;
-       }
-
-       field->destroy = lttng_destroy_perf_counter_field;
-
-       field->event_field.name = name_alloc;
-       field->event_field.type.atype = atype_integer;
-       field->event_field.type.u.basic.integer.size =
-                       sizeof(uint64_t) * CHAR_BIT;
-       field->event_field.type.u.basic.integer.alignment =
-                       lttng_alignof(uint64_t) * CHAR_BIT;
-       field->event_field.type.u.basic.integer.signedness =
-                       lttng_is_signed_type(uint64_t);
-       field->event_field.type.u.basic.integer.reverse_byte_order = 0;
-       field->event_field.type.u.basic.integer.base = 10;
-       field->event_field.type.u.basic.integer.encoding = lttng_encode_none;
-       field->get_size = perf_counter_get_size;
-       field->record = perf_counter_record;
-       field->get_value = perf_counter_get_value;
-
        perf_field->attr.type = type;
        perf_field->attr.config = config;
-       perf_field->attr.exclude_kernel = 1;
+       perf_field->attr.exclude_kernel = perf_get_exclude_kernel();
        CDS_INIT_LIST_HEAD(&perf_field->thread_field_list);
-       field->u.perf_counter = perf_field;
+       perf_field->name = name_alloc;
 
        /* Ensure that this perf counter can be used in this process. */
-       tmp_pc = setup_perf(&perf_field->attr);
-       if (!tmp_pc) {
+       ret = open_perf_fd(&perf_field->attr);
+       if (ret < 0) {
                ret = -ENODEV;
                goto setup_error;
        }
-       unmap_perf_page(tmp_pc);
+       close_perf_fd(ret);
 
-       /*
-        * Contexts can only be added before tracing is started, so we
-        * don't have to synchronize against concurrent threads using
-        * the field here.
-        */
+       ctx_field.event_field = event_field;
+       ctx_field.get_size = perf_counter_get_size;
+       ctx_field.record = perf_counter_record;
+       ctx_field.get_value = perf_counter_get_value;
+       ctx_field.destroy = lttng_destroy_perf_counter_ctx_field;
+       ctx_field.priv = perf_field;
 
+       ret = lttng_ust_context_append(ctx, &ctx_field);
+       if (ret) {
+               ret = -ENOMEM;
+               goto append_context_error;
+       }
        return 0;
 
-setup_error:
-find_error:
-       lttng_remove_context_field(ctx, field);
 append_context_error:
+setup_error:
        free(perf_field);
 perf_field_alloc_error:
+       free(event_field);
+event_field_alloc_error:
        free(name_alloc);
 name_alloc_error:
+find_error:
        return ret;
 }
 
This page took 0.029711 seconds and 4 git commands to generate.