Add perf context support for ARMv7
[lttng-ust.git] / liblttng-ust / lttng-context-perf-counters.c
1 /*
2 * lttng-context-perf-counters.c
3 *
4 * LTTng UST performance monitoring counters (perf-counters) integration.
5 *
6 * Copyright (C) 2009-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; only
11 * version 2.1 of the License.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <sys/types.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <sys/mman.h>
29 #include <sys/syscall.h>
30 #include <linux/perf_event.h>
31 #include <lttng/ust-events.h>
32 #include <lttng/ust-tracer.h>
33 #include <lttng/ringbuffer-config.h>
34 #include <urcu/system.h>
35 #include <urcu/arch.h>
36 #include <urcu/rculist.h>
37 #include <helper.h>
38 #include <urcu/ref.h>
39 #include <usterr-signal-safe.h>
40 #include <signal.h>
41 #include "lttng-tracer-core.h"
42
43 /*
44 * We use a global perf counter key and iterate on per-thread RCU lists
45 * of fields in the fast path, even though this is not strictly speaking
46 * what would provide the best fast-path complexity, to ensure teardown
47 * of sessions vs thread exit is handled racelessly.
48 *
49 * Updates and traversals of thread_list are protected by UST lock.
50 * Updates to rcu_field_list are protected by UST lock.
51 */
52
53 struct lttng_perf_counter_thread_field {
54 struct lttng_perf_counter_field *field; /* Back reference */
55 struct perf_event_mmap_page *pc;
56 struct cds_list_head thread_field_node; /* Per-field list of thread fields (node) */
57 struct cds_list_head rcu_field_node; /* RCU per-thread list of fields (node) */
58 int fd; /* Perf FD */
59 };
60
61 struct lttng_perf_counter_thread {
62 struct cds_list_head rcu_field_list; /* RCU per-thread list of fields */
63 };
64
65 struct lttng_perf_counter_field {
66 struct perf_event_attr attr;
67 struct cds_list_head thread_field_list; /* Per-field list of thread fields */
68 };
69
70 static pthread_key_t perf_counter_key;
71
72 static
73 size_t perf_counter_get_size(struct lttng_ctx_field *field, size_t offset)
74 {
75 size_t size = 0;
76
77 size += lib_ring_buffer_align(offset, lttng_alignof(uint64_t));
78 size += sizeof(uint64_t);
79 return size;
80 }
81
82 #if defined(__x86_64__) || defined(__i386__)
83
84 static
85 uint64_t rdpmc(unsigned int counter)
86 {
87 unsigned int low, high;
88
89 asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
90
91 return low | ((uint64_t) high) << 32;
92 }
93
94 static bool arch_perf_use_read(void)
95 {
96 return false;
97 }
98
99 static
100 uint64_t read_perf_counter(
101 struct lttng_perf_counter_thread_field *thread_field)
102 {
103 uint32_t seq, idx;
104 uint64_t count;
105 struct perf_event_mmap_page *pc = thread_field->pc;
106
107 if (caa_unlikely(!pc))
108 return 0;
109
110 do {
111 seq = CMM_LOAD_SHARED(pc->lock);
112 cmm_barrier();
113
114 idx = pc->index;
115 if (idx)
116 count = pc->offset + rdpmc(idx - 1);
117 else
118 count = 0;
119
120 cmm_barrier();
121 } while (CMM_LOAD_SHARED(pc->lock) != seq);
122
123 return count;
124 }
125
126 #elif defined (__ARM_ARCH_7A__)
127
128 static bool arch_perf_use_read(void)
129 {
130 return true;
131 }
132
133 static
134 uint64_t read_perf_counter(
135 struct lttng_perf_counter_thread_field *thread_field)
136 {
137 uint64_t count;
138
139 if (caa_unlikely(thread_field->fd < 0))
140 return 0;
141
142 if (caa_unlikely(read(thread_field->fd, &count, sizeof(count))
143 < sizeof(count)))
144 return 0;
145
146 return count;
147 }
148
149 #else /* defined(__x86_64__) || defined(__i386__) || defined(__ARM_ARCH_7A__) */
150
151 #error "Perf event counters are only supported on x86 and ARMv7 so far."
152
153 #endif /* #else defined(__x86_64__) || defined(__i386__) || defined(__ARM_ARCH_7A__) */
154
155 static
156 int sys_perf_event_open(struct perf_event_attr *attr,
157 pid_t pid, int cpu, int group_fd,
158 unsigned long flags)
159 {
160 return syscall(SYS_perf_event_open, attr, pid, cpu,
161 group_fd, flags);
162 }
163
164 static
165 int open_perf_fd(struct perf_event_attr *attr)
166 {
167 int fd;
168
169 fd = sys_perf_event_open(attr, 0, -1, -1, 0);
170 if (fd < 0)
171 return -1;
172
173 return fd;
174 }
175
176 static
177 void close_perf_fd(int fd)
178 {
179 int ret;
180
181 if (fd < 0)
182 return;
183
184 ret = close(fd);
185 if (ret) {
186 perror("Error closing LTTng-UST perf memory mapping FD");
187 }
188 }
189
190 static
191 struct perf_event_mmap_page *setup_perf(
192 struct lttng_perf_counter_thread_field *thread_field)
193 {
194 void *perf_addr;
195
196 perf_addr = mmap(NULL, sizeof(struct perf_event_mmap_page),
197 PROT_READ, MAP_SHARED, thread_field->fd, 0);
198 if (perf_addr == MAP_FAILED)
199 perf_addr = NULL;
200
201 if (!arch_perf_use_read()) {
202 close_perf_fd(thread_field->fd);
203 thread_field->fd = -1;
204 }
205
206 return perf_addr;
207 }
208
209 static
210 void unmap_perf_page(struct perf_event_mmap_page *pc)
211 {
212 int ret;
213
214 if (!pc)
215 return;
216 ret = munmap(pc, sizeof(struct perf_event_mmap_page));
217 if (ret < 0) {
218 PERROR("Error in munmap");
219 abort();
220 }
221 }
222
223 static
224 struct lttng_perf_counter_thread *alloc_perf_counter_thread(void)
225 {
226 struct lttng_perf_counter_thread *perf_thread;
227 sigset_t newmask, oldmask;
228 int ret;
229
230 ret = sigfillset(&newmask);
231 if (ret)
232 abort();
233 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
234 if (ret)
235 abort();
236 /* Check again with signals disabled */
237 perf_thread = pthread_getspecific(perf_counter_key);
238 if (perf_thread)
239 goto skip;
240 perf_thread = zmalloc(sizeof(*perf_thread));
241 if (!perf_thread)
242 abort();
243 CDS_INIT_LIST_HEAD(&perf_thread->rcu_field_list);
244 ret = pthread_setspecific(perf_counter_key, perf_thread);
245 if (ret)
246 abort();
247 skip:
248 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
249 if (ret)
250 abort();
251 return perf_thread;
252 }
253
254 static
255 struct lttng_perf_counter_thread_field *
256 add_thread_field(struct lttng_perf_counter_field *perf_field,
257 struct lttng_perf_counter_thread *perf_thread)
258 {
259 struct lttng_perf_counter_thread_field *thread_field;
260 sigset_t newmask, oldmask;
261 int ret;
262
263 ret = sigfillset(&newmask);
264 if (ret)
265 abort();
266 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
267 if (ret)
268 abort();
269 /* Check again with signals disabled */
270 cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
271 rcu_field_node) {
272 if (thread_field->field == perf_field)
273 goto skip;
274 }
275 thread_field = zmalloc(sizeof(*thread_field));
276 if (!thread_field)
277 abort();
278 thread_field->field = perf_field;
279 thread_field->fd = open_perf_fd(&perf_field->attr);
280 if (thread_field->fd >= 0)
281 thread_field->pc = setup_perf(thread_field);
282 /*
283 * Note: thread_field->pc can be NULL if setup_perf() fails.
284 * Also, thread_field->fd can be -1 if open_perf_fd() fails.
285 */
286 ust_lock_nocheck();
287 cds_list_add_rcu(&thread_field->rcu_field_node,
288 &perf_thread->rcu_field_list);
289 cds_list_add(&thread_field->thread_field_node,
290 &perf_field->thread_field_list);
291 ust_unlock();
292 skip:
293 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
294 if (ret)
295 abort();
296 return thread_field;
297 }
298
299 static
300 struct lttng_perf_counter_thread_field *
301 get_thread_field(struct lttng_perf_counter_field *field)
302 {
303 struct lttng_perf_counter_thread *perf_thread;
304 struct lttng_perf_counter_thread_field *thread_field;
305
306 perf_thread = pthread_getspecific(perf_counter_key);
307 if (!perf_thread)
308 perf_thread = alloc_perf_counter_thread();
309 cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
310 rcu_field_node) {
311 if (thread_field->field == field)
312 return thread_field;
313 }
314 /* perf_counter_thread_field not found, need to add one */
315 return add_thread_field(field, perf_thread);
316 }
317
318 static
319 uint64_t wrapper_perf_counter_read(struct lttng_ctx_field *field)
320 {
321 struct lttng_perf_counter_field *perf_field;
322 struct lttng_perf_counter_thread_field *perf_thread_field;
323
324 perf_field = field->u.perf_counter;
325 perf_thread_field = get_thread_field(perf_field);
326 return read_perf_counter(perf_thread_field);
327 }
328
329 static
330 void perf_counter_record(struct lttng_ctx_field *field,
331 struct lttng_ust_lib_ring_buffer_ctx *ctx,
332 struct lttng_channel *chan)
333 {
334 uint64_t value;
335
336 value = wrapper_perf_counter_read(field);
337 lib_ring_buffer_align_ctx(ctx, lttng_alignof(value));
338 chan->ops->event_write(ctx, &value, sizeof(value));
339 }
340
341 static
342 void perf_counter_get_value(struct lttng_ctx_field *field,
343 struct lttng_ctx_value *value)
344 {
345 uint64_t v;
346
347 v = wrapper_perf_counter_read(field);
348 value->u.s64 = v;
349 }
350
351 /* Called with UST lock held */
352 static
353 void lttng_destroy_perf_thread_field(
354 struct lttng_perf_counter_thread_field *thread_field)
355 {
356 close_perf_fd(thread_field->fd);
357 unmap_perf_page(thread_field->pc);
358 cds_list_del_rcu(&thread_field->rcu_field_node);
359 cds_list_del(&thread_field->thread_field_node);
360 free(thread_field);
361 }
362
363 static
364 void lttng_destroy_perf_thread_key(void *_key)
365 {
366 struct lttng_perf_counter_thread *perf_thread = _key;
367 struct lttng_perf_counter_thread_field *pos, *p;
368
369 ust_lock_nocheck();
370 cds_list_for_each_entry_safe(pos, p, &perf_thread->rcu_field_list,
371 rcu_field_node)
372 lttng_destroy_perf_thread_field(pos);
373 ust_unlock();
374 free(perf_thread);
375 }
376
377 /* Called with UST lock held */
378 static
379 void lttng_destroy_perf_counter_field(struct lttng_ctx_field *field)
380 {
381 struct lttng_perf_counter_field *perf_field;
382 struct lttng_perf_counter_thread_field *pos, *p;
383
384 free((char *) field->event_field.name);
385 perf_field = field->u.perf_counter;
386 /*
387 * This put is performed when no threads can concurrently
388 * perform a "get" concurrently, thanks to urcu-bp grace
389 * period.
390 */
391 cds_list_for_each_entry_safe(pos, p, &perf_field->thread_field_list,
392 thread_field_node)
393 lttng_destroy_perf_thread_field(pos);
394 free(perf_field);
395 }
396
397 #ifdef __ARM_ARCH_7A__
398
399 static
400 int perf_get_exclude_kernel(void)
401 {
402 return 0;
403 }
404
405 #else /* __ARM_ARCH_7A__ */
406
407 static
408 int perf_get_exclude_kernel(void)
409 {
410 return 1;
411 }
412
413 #endif /* __ARM_ARCH_7A__ */
414
415 /* Called with UST lock held */
416 int lttng_add_perf_counter_to_ctx(uint32_t type,
417 uint64_t config,
418 const char *name,
419 struct lttng_ctx **ctx)
420 {
421 struct lttng_ctx_field *field;
422 struct lttng_perf_counter_field *perf_field;
423 char *name_alloc;
424 int ret;
425
426 name_alloc = strdup(name);
427 if (!name_alloc) {
428 ret = -ENOMEM;
429 goto name_alloc_error;
430 }
431 perf_field = zmalloc(sizeof(*perf_field));
432 if (!perf_field) {
433 ret = -ENOMEM;
434 goto perf_field_alloc_error;
435 }
436 field = lttng_append_context(ctx);
437 if (!field) {
438 ret = -ENOMEM;
439 goto append_context_error;
440 }
441 if (lttng_find_context(*ctx, name_alloc)) {
442 ret = -EEXIST;
443 goto find_error;
444 }
445
446 field->destroy = lttng_destroy_perf_counter_field;
447
448 field->event_field.name = name_alloc;
449 field->event_field.type.atype = atype_integer;
450 field->event_field.type.u.basic.integer.size =
451 sizeof(uint64_t) * CHAR_BIT;
452 field->event_field.type.u.basic.integer.alignment =
453 lttng_alignof(uint64_t) * CHAR_BIT;
454 field->event_field.type.u.basic.integer.signedness =
455 lttng_is_signed_type(uint64_t);
456 field->event_field.type.u.basic.integer.reverse_byte_order = 0;
457 field->event_field.type.u.basic.integer.base = 10;
458 field->event_field.type.u.basic.integer.encoding = lttng_encode_none;
459 field->get_size = perf_counter_get_size;
460 field->record = perf_counter_record;
461 field->get_value = perf_counter_get_value;
462
463 perf_field->attr.type = type;
464 perf_field->attr.config = config;
465 perf_field->attr.exclude_kernel = perf_get_exclude_kernel();
466 CDS_INIT_LIST_HEAD(&perf_field->thread_field_list);
467 field->u.perf_counter = perf_field;
468
469 /* Ensure that this perf counter can be used in this process. */
470 ret = open_perf_fd(&perf_field->attr);
471 if (ret < 0) {
472 ret = -ENODEV;
473 goto setup_error;
474 }
475 close_perf_fd(ret);
476
477 /*
478 * Contexts can only be added before tracing is started, so we
479 * don't have to synchronize against concurrent threads using
480 * the field here.
481 */
482
483 lttng_context_update(*ctx);
484 return 0;
485
486 setup_error:
487 find_error:
488 lttng_remove_context_field(ctx, field);
489 append_context_error:
490 free(perf_field);
491 perf_field_alloc_error:
492 free(name_alloc);
493 name_alloc_error:
494 return ret;
495 }
496
497 int lttng_perf_counter_init(void)
498 {
499 int ret;
500
501 ret = pthread_key_create(&perf_counter_key,
502 lttng_destroy_perf_thread_key);
503 if (ret)
504 ret = -ret;
505 return ret;
506 }
507
508 void lttng_perf_counter_exit(void)
509 {
510 int ret;
511
512 ret = pthread_key_delete(perf_counter_key);
513 if (ret) {
514 errno = ret;
515 PERROR("Error in pthread_key_delete");
516 }
517 }
This page took 0.040579 seconds and 5 git commands to generate.