Fix: perf counters: sign-extend pmc register
[lttng-ust.git] / liblttng-ust / lttng-context-perf-counters.c
1 /*
2 * lttng-context-perf-counters.c
3 *
4 * LTTng UST performance monitoring counters (perf-counters) integration.
5 *
6 * Copyright (C) 2009-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; only
11 * version 2.1 of the License.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <sys/types.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <stdbool.h>
29 #include <sys/mman.h>
30 #include <sys/syscall.h>
31 #include <linux/perf_event.h>
32 #include <lttng/ust-events.h>
33 #include <lttng/ust-tracer.h>
34 #include <lttng/ringbuffer-config.h>
35 #include <urcu/system.h>
36 #include <urcu/arch.h>
37 #include <urcu/rculist.h>
38 #include <helper.h>
39 #include <urcu/ref.h>
40 #include <usterr-signal-safe.h>
41 #include <signal.h>
42 #include "lttng-tracer-core.h"
43
44 /*
45 * We use a global perf counter key and iterate on per-thread RCU lists
46 * of fields in the fast path, even though this is not strictly speaking
47 * what would provide the best fast-path complexity, to ensure teardown
48 * of sessions vs thread exit is handled racelessly.
49 *
50 * Updates and traversals of thread_list are protected by UST lock.
51 * Updates to rcu_field_list are protected by UST lock.
52 */
53
54 struct lttng_perf_counter_thread_field {
55 struct lttng_perf_counter_field *field; /* Back reference */
56 struct perf_event_mmap_page *pc;
57 struct cds_list_head thread_field_node; /* Per-field list of thread fields (node) */
58 struct cds_list_head rcu_field_node; /* RCU per-thread list of fields (node) */
59 int fd; /* Perf FD */
60 };
61
62 struct lttng_perf_counter_thread {
63 struct cds_list_head rcu_field_list; /* RCU per-thread list of fields */
64 };
65
66 struct lttng_perf_counter_field {
67 struct perf_event_attr attr;
68 struct cds_list_head thread_field_list; /* Per-field list of thread fields */
69 };
70
71 static pthread_key_t perf_counter_key;
72
73 static
74 size_t perf_counter_get_size(struct lttng_ctx_field *field, size_t offset)
75 {
76 size_t size = 0;
77
78 size += lib_ring_buffer_align(offset, lttng_alignof(uint64_t));
79 size += sizeof(uint64_t);
80 return size;
81 }
82
83 #if defined(__x86_64__) || defined(__i386__)
84
85 static
86 uint64_t rdpmc(unsigned int counter)
87 {
88 unsigned int low, high;
89
90 asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
91
92 return low | ((uint64_t) high) << 32;
93 }
94
95 static bool arch_perf_use_read(void)
96 {
97 return false;
98 }
99
100 static
101 uint64_t read_perf_counter(
102 struct lttng_perf_counter_thread_field *thread_field)
103 {
104 uint32_t seq, idx;
105 uint64_t count;
106 struct perf_event_mmap_page *pc = thread_field->pc;
107
108 if (caa_unlikely(!pc))
109 return 0;
110
111 do {
112 seq = CMM_LOAD_SHARED(pc->lock);
113 cmm_barrier();
114
115 idx = pc->index;
116 if (idx) {
117 int64_t pmcval;
118
119 pmcval = rdpmc(idx - 1);
120 /* Sign-extend the pmc register result. */
121 pmcval <<= 64 - pc->pmc_width;
122 pmcval >>= 64 - pc->pmc_width;
123 count = pc->offset + pmcval;
124 } else {
125 count = 0;
126 }
127 cmm_barrier();
128 } while (CMM_LOAD_SHARED(pc->lock) != seq);
129
130 return count;
131 }
132
133 #elif defined (__ARM_ARCH_7A__)
134
135 static bool arch_perf_use_read(void)
136 {
137 return true;
138 }
139
140 static
141 uint64_t read_perf_counter(
142 struct lttng_perf_counter_thread_field *thread_field)
143 {
144 uint64_t count;
145
146 if (caa_unlikely(thread_field->fd < 0))
147 return 0;
148
149 if (caa_unlikely(read(thread_field->fd, &count, sizeof(count))
150 < sizeof(count)))
151 return 0;
152
153 return count;
154 }
155
156 #else /* defined(__x86_64__) || defined(__i386__) || defined(__ARM_ARCH_7A__) */
157
158 #error "Perf event counters are only supported on x86 and ARMv7 so far."
159
160 #endif /* #else defined(__x86_64__) || defined(__i386__) || defined(__ARM_ARCH_7A__) */
161
162 static
163 int sys_perf_event_open(struct perf_event_attr *attr,
164 pid_t pid, int cpu, int group_fd,
165 unsigned long flags)
166 {
167 return syscall(SYS_perf_event_open, attr, pid, cpu,
168 group_fd, flags);
169 }
170
171 static
172 int open_perf_fd(struct perf_event_attr *attr)
173 {
174 int fd;
175
176 fd = sys_perf_event_open(attr, 0, -1, -1, 0);
177 if (fd < 0)
178 return -1;
179
180 return fd;
181 }
182
183 static
184 void close_perf_fd(int fd)
185 {
186 int ret;
187
188 if (fd < 0)
189 return;
190
191 ret = close(fd);
192 if (ret) {
193 perror("Error closing LTTng-UST perf memory mapping FD");
194 }
195 }
196
197 static
198 struct perf_event_mmap_page *setup_perf(
199 struct lttng_perf_counter_thread_field *thread_field)
200 {
201 void *perf_addr;
202
203 perf_addr = mmap(NULL, sizeof(struct perf_event_mmap_page),
204 PROT_READ, MAP_SHARED, thread_field->fd, 0);
205 if (perf_addr == MAP_FAILED)
206 perf_addr = NULL;
207
208 if (!arch_perf_use_read()) {
209 close_perf_fd(thread_field->fd);
210 thread_field->fd = -1;
211 }
212
213 return perf_addr;
214 }
215
216 static
217 void unmap_perf_page(struct perf_event_mmap_page *pc)
218 {
219 int ret;
220
221 if (!pc)
222 return;
223 ret = munmap(pc, sizeof(struct perf_event_mmap_page));
224 if (ret < 0) {
225 PERROR("Error in munmap");
226 abort();
227 }
228 }
229
230 static
231 struct lttng_perf_counter_thread *alloc_perf_counter_thread(void)
232 {
233 struct lttng_perf_counter_thread *perf_thread;
234 sigset_t newmask, oldmask;
235 int ret;
236
237 ret = sigfillset(&newmask);
238 if (ret)
239 abort();
240 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
241 if (ret)
242 abort();
243 /* Check again with signals disabled */
244 perf_thread = pthread_getspecific(perf_counter_key);
245 if (perf_thread)
246 goto skip;
247 perf_thread = zmalloc(sizeof(*perf_thread));
248 if (!perf_thread)
249 abort();
250 CDS_INIT_LIST_HEAD(&perf_thread->rcu_field_list);
251 ret = pthread_setspecific(perf_counter_key, perf_thread);
252 if (ret)
253 abort();
254 skip:
255 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
256 if (ret)
257 abort();
258 return perf_thread;
259 }
260
261 static
262 struct lttng_perf_counter_thread_field *
263 add_thread_field(struct lttng_perf_counter_field *perf_field,
264 struct lttng_perf_counter_thread *perf_thread)
265 {
266 struct lttng_perf_counter_thread_field *thread_field;
267 sigset_t newmask, oldmask;
268 int ret;
269
270 ret = sigfillset(&newmask);
271 if (ret)
272 abort();
273 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
274 if (ret)
275 abort();
276 /* Check again with signals disabled */
277 cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
278 rcu_field_node) {
279 if (thread_field->field == perf_field)
280 goto skip;
281 }
282 thread_field = zmalloc(sizeof(*thread_field));
283 if (!thread_field)
284 abort();
285 thread_field->field = perf_field;
286 thread_field->fd = open_perf_fd(&perf_field->attr);
287 if (thread_field->fd >= 0)
288 thread_field->pc = setup_perf(thread_field);
289 /*
290 * Note: thread_field->pc can be NULL if setup_perf() fails.
291 * Also, thread_field->fd can be -1 if open_perf_fd() fails.
292 */
293 ust_lock_nocheck();
294 cds_list_add_rcu(&thread_field->rcu_field_node,
295 &perf_thread->rcu_field_list);
296 cds_list_add(&thread_field->thread_field_node,
297 &perf_field->thread_field_list);
298 ust_unlock();
299 skip:
300 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
301 if (ret)
302 abort();
303 return thread_field;
304 }
305
306 static
307 struct lttng_perf_counter_thread_field *
308 get_thread_field(struct lttng_perf_counter_field *field)
309 {
310 struct lttng_perf_counter_thread *perf_thread;
311 struct lttng_perf_counter_thread_field *thread_field;
312
313 perf_thread = pthread_getspecific(perf_counter_key);
314 if (!perf_thread)
315 perf_thread = alloc_perf_counter_thread();
316 cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
317 rcu_field_node) {
318 if (thread_field->field == field)
319 return thread_field;
320 }
321 /* perf_counter_thread_field not found, need to add one */
322 return add_thread_field(field, perf_thread);
323 }
324
325 static
326 uint64_t wrapper_perf_counter_read(struct lttng_ctx_field *field)
327 {
328 struct lttng_perf_counter_field *perf_field;
329 struct lttng_perf_counter_thread_field *perf_thread_field;
330
331 perf_field = field->u.perf_counter;
332 perf_thread_field = get_thread_field(perf_field);
333 return read_perf_counter(perf_thread_field);
334 }
335
336 static
337 void perf_counter_record(struct lttng_ctx_field *field,
338 struct lttng_ust_lib_ring_buffer_ctx *ctx,
339 struct lttng_channel *chan)
340 {
341 uint64_t value;
342
343 value = wrapper_perf_counter_read(field);
344 lib_ring_buffer_align_ctx(ctx, lttng_alignof(value));
345 chan->ops->event_write(ctx, &value, sizeof(value));
346 }
347
348 static
349 void perf_counter_get_value(struct lttng_ctx_field *field,
350 struct lttng_ctx_value *value)
351 {
352 uint64_t v;
353
354 v = wrapper_perf_counter_read(field);
355 value->u.s64 = v;
356 }
357
358 /* Called with UST lock held */
359 static
360 void lttng_destroy_perf_thread_field(
361 struct lttng_perf_counter_thread_field *thread_field)
362 {
363 close_perf_fd(thread_field->fd);
364 unmap_perf_page(thread_field->pc);
365 cds_list_del_rcu(&thread_field->rcu_field_node);
366 cds_list_del(&thread_field->thread_field_node);
367 free(thread_field);
368 }
369
370 static
371 void lttng_destroy_perf_thread_key(void *_key)
372 {
373 struct lttng_perf_counter_thread *perf_thread = _key;
374 struct lttng_perf_counter_thread_field *pos, *p;
375
376 ust_lock_nocheck();
377 cds_list_for_each_entry_safe(pos, p, &perf_thread->rcu_field_list,
378 rcu_field_node)
379 lttng_destroy_perf_thread_field(pos);
380 ust_unlock();
381 free(perf_thread);
382 }
383
384 /* Called with UST lock held */
385 static
386 void lttng_destroy_perf_counter_field(struct lttng_ctx_field *field)
387 {
388 struct lttng_perf_counter_field *perf_field;
389 struct lttng_perf_counter_thread_field *pos, *p;
390
391 free((char *) field->event_field.name);
392 perf_field = field->u.perf_counter;
393 /*
394 * This put is performed when no threads can concurrently
395 * perform a "get" concurrently, thanks to urcu-bp grace
396 * period.
397 */
398 cds_list_for_each_entry_safe(pos, p, &perf_field->thread_field_list,
399 thread_field_node)
400 lttng_destroy_perf_thread_field(pos);
401 free(perf_field);
402 }
403
404 #ifdef __ARM_ARCH_7A__
405
406 static
407 int perf_get_exclude_kernel(void)
408 {
409 return 0;
410 }
411
412 #else /* __ARM_ARCH_7A__ */
413
414 static
415 int perf_get_exclude_kernel(void)
416 {
417 return 1;
418 }
419
420 #endif /* __ARM_ARCH_7A__ */
421
422 /* Called with UST lock held */
423 int lttng_add_perf_counter_to_ctx(uint32_t type,
424 uint64_t config,
425 const char *name,
426 struct lttng_ctx **ctx)
427 {
428 struct lttng_ctx_field *field;
429 struct lttng_perf_counter_field *perf_field;
430 char *name_alloc;
431 int ret;
432
433 name_alloc = strdup(name);
434 if (!name_alloc) {
435 ret = -ENOMEM;
436 goto name_alloc_error;
437 }
438 perf_field = zmalloc(sizeof(*perf_field));
439 if (!perf_field) {
440 ret = -ENOMEM;
441 goto perf_field_alloc_error;
442 }
443 field = lttng_append_context(ctx);
444 if (!field) {
445 ret = -ENOMEM;
446 goto append_context_error;
447 }
448 if (lttng_find_context(*ctx, name_alloc)) {
449 ret = -EEXIST;
450 goto find_error;
451 }
452
453 field->destroy = lttng_destroy_perf_counter_field;
454
455 field->event_field.name = name_alloc;
456 field->event_field.type.atype = atype_integer;
457 field->event_field.type.u.basic.integer.size =
458 sizeof(uint64_t) * CHAR_BIT;
459 field->event_field.type.u.basic.integer.alignment =
460 lttng_alignof(uint64_t) * CHAR_BIT;
461 field->event_field.type.u.basic.integer.signedness =
462 lttng_is_signed_type(uint64_t);
463 field->event_field.type.u.basic.integer.reverse_byte_order = 0;
464 field->event_field.type.u.basic.integer.base = 10;
465 field->event_field.type.u.basic.integer.encoding = lttng_encode_none;
466 field->get_size = perf_counter_get_size;
467 field->record = perf_counter_record;
468 field->get_value = perf_counter_get_value;
469
470 perf_field->attr.type = type;
471 perf_field->attr.config = config;
472 perf_field->attr.exclude_kernel = perf_get_exclude_kernel();
473 CDS_INIT_LIST_HEAD(&perf_field->thread_field_list);
474 field->u.perf_counter = perf_field;
475
476 /* Ensure that this perf counter can be used in this process. */
477 ret = open_perf_fd(&perf_field->attr);
478 if (ret < 0) {
479 ret = -ENODEV;
480 goto setup_error;
481 }
482 close_perf_fd(ret);
483
484 /*
485 * Contexts can only be added before tracing is started, so we
486 * don't have to synchronize against concurrent threads using
487 * the field here.
488 */
489
490 lttng_context_update(*ctx);
491 return 0;
492
493 setup_error:
494 find_error:
495 lttng_remove_context_field(ctx, field);
496 append_context_error:
497 free(perf_field);
498 perf_field_alloc_error:
499 free(name_alloc);
500 name_alloc_error:
501 return ret;
502 }
503
504 int lttng_perf_counter_init(void)
505 {
506 int ret;
507
508 ret = pthread_key_create(&perf_counter_key,
509 lttng_destroy_perf_thread_key);
510 if (ret)
511 ret = -ret;
512 return ret;
513 }
514
515 void lttng_perf_counter_exit(void)
516 {
517 int ret;
518
519 ret = pthread_key_delete(perf_counter_key);
520 if (ret) {
521 errno = ret;
522 PERROR("Error in pthread_key_delete");
523 }
524 }
This page took 0.04311 seconds and 5 git commands to generate.