Fix: eliminate timestamp overlap between packets
[lttng-ust.git] / libringbuffer / ring_buffer_frontend.c
CommitLineData
852c2936
MD
1/*
2 * ring_buffer_frontend.c
3 *
e92f3e28
MD
4 * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; only
9 * version 2.1 of the License.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
852c2936
MD
20 *
21 * Ring buffer wait-free buffer synchronization. Producer-consumer and flight
22 * recorder (overwrite) modes. See thesis:
23 *
24 * Desnoyers, Mathieu (2009), "Low-Impact Operating System Tracing", Ph.D.
25 * dissertation, Ecole Polytechnique de Montreal.
26 * http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12.pdf
27 *
28 * - Algorithm presentation in Chapter 5:
29 * "Lockless Multi-Core High-Throughput Buffering".
30 * - Algorithm formal verification in Section 8.6:
31 * "Formal verification of LTTng"
32 *
33 * Author:
34 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
35 *
36 * Inspired from LTT and RelayFS:
37 * Karim Yaghmour <karim@opersys.com>
38 * Tom Zanussi <zanussi@us.ibm.com>
39 * Bob Wisniewski <bob@watson.ibm.com>
40 * And from K42 :
41 * Bob Wisniewski <bob@watson.ibm.com>
42 *
43 * Buffer reader semantic :
44 *
45 * - get_subbuf_size
46 * while buffer is not finalized and empty
47 * - get_subbuf
48 * - if return value != 0, continue
49 * - splice one subbuffer worth of data to a pipe
50 * - splice the data from pipe to disk/network
51 * - put_subbuf
852c2936
MD
52 */
53
5ad63a16 54#define _GNU_SOURCE
a6352fd4 55#include <sys/types.h>
431d5cf0
MD
56#include <sys/mman.h>
57#include <sys/stat.h>
03d2d293 58#include <unistd.h>
431d5cf0 59#include <fcntl.h>
03d2d293
MD
60#include <signal.h>
61#include <time.h>
14641deb 62#include <urcu/compiler.h>
a6352fd4 63#include <urcu/ref.h>
8c90a710 64#include <urcu/tls-compat.h>
35897f8b 65#include <helper.h>
14641deb 66
a6352fd4 67#include "smp.h"
4318ae1b 68#include <lttng/ringbuffer-config.h>
2fed87ae 69#include "vatomic.h"
4931a13e
MD
70#include "backend.h"
71#include "frontend.h"
a6352fd4 72#include "shm.h"
f645cfa7 73#include "tlsfixup.h"
bdcf8d82 74#include "../liblttng-ust/compat.h" /* For ENODATA */
852c2936 75
431d5cf0
MD
76#ifndef max
77#define max(a, b) ((a) > (b) ? (a) : (b))
78#endif
79
64493e4f
MD
80/* Print DBG() messages about events lost only every 1048576 hits */
81#define DBG_PRINT_NR_LOST (1UL << 20)
82
34a91bdb
MD
83#define LTTNG_UST_RB_SIG_FLUSH SIGRTMIN
84#define LTTNG_UST_RB_SIG_READ SIGRTMIN + 1
85#define LTTNG_UST_RB_SIG_TEARDOWN SIGRTMIN + 2
03d2d293
MD
86#define CLOCKID CLOCK_MONOTONIC
87
2432c3c9
MD
88/*
89 * Use POSIX SHM: shm_open(3) and shm_unlink(3).
90 * close(2) to close the fd returned by shm_open.
91 * shm_unlink releases the shared memory object name.
92 * ftruncate(2) sets the size of the memory object.
93 * mmap/munmap maps the shared memory obj to a virtual address in the
94 * calling proceess (should be done both in libust and consumer).
95 * See shm_overview(7) for details.
96 * Pass file descriptor returned by shm_open(3) to ltt-sessiond through
97 * a UNIX socket.
98 *
99 * Since we don't need to access the object using its name, we can
100 * immediately shm_unlink(3) it, and only keep the handle with its file
101 * descriptor.
102 */
103
852c2936
MD
104/*
105 * Internal structure representing offsets to use at a sub-buffer switch.
106 */
107struct switch_offsets {
108 unsigned long begin, end, old;
109 size_t pre_header_padding, size;
110 unsigned int switch_new_start:1, switch_new_end:1, switch_old_start:1,
111 switch_old_end:1;
112};
113
8c90a710 114DEFINE_URCU_TLS(unsigned int, lib_ring_buffer_nesting);
852c2936 115
cb7378b3
MD
116/*
117 * wakeup_fd_mutex protects wakeup fd use by timer from concurrent
118 * close.
119 */
120static pthread_mutex_t wakeup_fd_mutex = PTHREAD_MUTEX_INITIALIZER;
121
852c2936
MD
122static
123void lib_ring_buffer_print_errors(struct channel *chan,
009769ca
MD
124 struct lttng_ust_lib_ring_buffer *buf, int cpu,
125 struct lttng_ust_shm_handle *handle);
852c2936 126
03d2d293
MD
127/*
128 * Handle timer teardown race wrt memory free of private data by
129 * ring buffer signals are handled by a single thread, which permits
130 * a synchronization point between handling of each signal.
64bf51a6 131 * Protected by the lock within the structure.
03d2d293
MD
132 */
133struct timer_signal_data {
134 pthread_t tid; /* thread id managing signals */
135 int setup_done;
136 int qs_done;
64bf51a6 137 pthread_mutex_t lock;
03d2d293
MD
138};
139
64bf51a6
MD
140static struct timer_signal_data timer_signal = {
141 .tid = 0,
142 .setup_done = 0,
143 .qs_done = 0,
144 .lock = PTHREAD_MUTEX_INITIALIZER,
145};
03d2d293 146
852c2936
MD
147/**
148 * lib_ring_buffer_reset - Reset ring buffer to initial values.
149 * @buf: Ring buffer.
150 *
151 * Effectively empty the ring buffer. Should be called when the buffer is not
152 * used for writing. The ring buffer can be opened for reading, but the reader
153 * should not be using the iterator concurrently with reset. The previous
154 * current iterator record is reset.
155 */
4cfec15c 156void lib_ring_buffer_reset(struct lttng_ust_lib_ring_buffer *buf,
38fae1d3 157 struct lttng_ust_shm_handle *handle)
852c2936 158{
1d498196 159 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 160 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
161 unsigned int i;
162
163 /*
164 * Reset iterator first. It will put the subbuffer if it currently holds
165 * it.
166 */
852c2936
MD
167 v_set(config, &buf->offset, 0);
168 for (i = 0; i < chan->backend.num_subbuf; i++) {
4746ae29
MD
169 v_set(config, &shmp_index(handle, buf->commit_hot, i)->cc, 0);
170 v_set(config, &shmp_index(handle, buf->commit_hot, i)->seq, 0);
171 v_set(config, &shmp_index(handle, buf->commit_cold, i)->cc_sb, 0);
852c2936 172 }
a6352fd4
MD
173 uatomic_set(&buf->consumed, 0);
174 uatomic_set(&buf->record_disabled, 0);
852c2936 175 v_set(config, &buf->last_tsc, 0);
1d498196 176 lib_ring_buffer_backend_reset(&buf->backend, handle);
852c2936
MD
177 /* Don't reset number of active readers */
178 v_set(config, &buf->records_lost_full, 0);
179 v_set(config, &buf->records_lost_wrap, 0);
180 v_set(config, &buf->records_lost_big, 0);
181 v_set(config, &buf->records_count, 0);
182 v_set(config, &buf->records_overrun, 0);
183 buf->finalized = 0;
184}
852c2936
MD
185
186/**
187 * channel_reset - Reset channel to initial values.
188 * @chan: Channel.
189 *
190 * Effectively empty the channel. Should be called when the channel is not used
191 * for writing. The channel can be opened for reading, but the reader should not
192 * be using the iterator concurrently with reset. The previous current iterator
193 * record is reset.
194 */
195void channel_reset(struct channel *chan)
196{
197 /*
198 * Reset iterators first. Will put the subbuffer if held for reading.
199 */
a6352fd4 200 uatomic_set(&chan->record_disabled, 0);
852c2936
MD
201 /* Don't reset commit_count_mask, still valid */
202 channel_backend_reset(&chan->backend);
203 /* Don't reset switch/read timer interval */
204 /* Don't reset notifiers and notifier enable bits */
205 /* Don't reset reader reference count */
206}
852c2936
MD
207
208/*
209 * Must be called under cpu hotplug protection.
210 */
4cfec15c 211int lib_ring_buffer_create(struct lttng_ust_lib_ring_buffer *buf,
a6352fd4 212 struct channel_backend *chanb, int cpu,
38fae1d3 213 struct lttng_ust_shm_handle *handle,
1d498196 214 struct shm_object *shmobj)
852c2936 215{
4cfec15c 216 const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config;
14641deb 217 struct channel *chan = caa_container_of(chanb, struct channel, backend);
a3f61e7f 218 void *priv = channel_get_private(chan);
852c2936 219 size_t subbuf_header_size;
2fed87ae 220 uint64_t tsc;
852c2936
MD
221 int ret;
222
223 /* Test for cpu hotplug */
224 if (buf->backend.allocated)
225 return 0;
226
a6352fd4 227 ret = lib_ring_buffer_backend_create(&buf->backend, &chan->backend,
1d498196 228 cpu, handle, shmobj);
852c2936
MD
229 if (ret)
230 return ret;
231
1d498196
MD
232 align_shm(shmobj, __alignof__(struct commit_counters_hot));
233 set_shmp(buf->commit_hot,
234 zalloc_shm(shmobj,
235 sizeof(struct commit_counters_hot) * chan->backend.num_subbuf));
236 if (!shmp(handle, buf->commit_hot)) {
852c2936
MD
237 ret = -ENOMEM;
238 goto free_chanbuf;
239 }
240
1d498196
MD
241 align_shm(shmobj, __alignof__(struct commit_counters_cold));
242 set_shmp(buf->commit_cold,
243 zalloc_shm(shmobj,
244 sizeof(struct commit_counters_cold) * chan->backend.num_subbuf));
245 if (!shmp(handle, buf->commit_cold)) {
852c2936
MD
246 ret = -ENOMEM;
247 goto free_commit;
248 }
249
852c2936
MD
250 /*
251 * Write the subbuffer header for first subbuffer so we know the total
252 * duration of data gathering.
253 */
254 subbuf_header_size = config->cb.subbuffer_header_size();
255 v_set(config, &buf->offset, subbuf_header_size);
4746ae29 256 subbuffer_id_clear_noref(config, &shmp_index(handle, buf->backend.buf_wsb, 0)->id);
1d498196
MD
257 tsc = config->cb.ring_buffer_clock_read(shmp(handle, buf->backend.chan));
258 config->cb.buffer_begin(buf, tsc, 0, handle);
4746ae29 259 v_add(config, subbuf_header_size, &shmp_index(handle, buf->commit_hot, 0)->cc);
852c2936
MD
260
261 if (config->cb.buffer_create) {
1d498196 262 ret = config->cb.buffer_create(buf, priv, cpu, chanb->name, handle);
852c2936
MD
263 if (ret)
264 goto free_init;
265 }
852c2936 266 buf->backend.allocated = 1;
852c2936
MD
267 return 0;
268
269 /* Error handling */
270free_init:
a6352fd4 271 /* commit_cold will be freed by shm teardown */
852c2936 272free_commit:
a6352fd4 273 /* commit_hot will be freed by shm teardown */
852c2936 274free_chanbuf:
852c2936
MD
275 return ret;
276}
277
03d2d293
MD
278static
279void lib_ring_buffer_channel_switch_timer(int sig, siginfo_t *si, void *uc)
852c2936 280{
03d2d293
MD
281 const struct lttng_ust_lib_ring_buffer_config *config;
282 struct lttng_ust_shm_handle *handle;
283 struct channel *chan;
284 int cpu;
285
286 assert(CMM_LOAD_SHARED(timer_signal.tid) == pthread_self());
287
288 chan = si->si_value.sival_ptr;
289 handle = chan->handle;
290 config = &chan->backend.config;
852c2936 291
34a91bdb 292 DBG("Switch timer for channel %p\n", chan);
03d2d293 293
4dcd7e80
MD
294 /*
295 * Only flush buffers periodically if readers are active.
296 */
cb7378b3 297 pthread_mutex_lock(&wakeup_fd_mutex);
03d2d293
MD
298 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
299 for_each_possible_cpu(cpu) {
300 struct lttng_ust_lib_ring_buffer *buf =
301 shmp(handle, chan->backend.buf[cpu].shmp);
4dcd7e80
MD
302 if (uatomic_read(&buf->active_readers))
303 lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE,
304 chan->handle);
03d2d293
MD
305 }
306 } else {
307 struct lttng_ust_lib_ring_buffer *buf =
308 shmp(handle, chan->backend.buf[0].shmp);
309
34a91bdb
MD
310 if (uatomic_read(&buf->active_readers))
311 lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE,
312 chan->handle);
313 }
314 pthread_mutex_unlock(&wakeup_fd_mutex);
315 return;
316}
317
318static
319void lib_ring_buffer_channel_do_read(struct channel *chan)
320{
321 const struct lttng_ust_lib_ring_buffer_config *config;
322 struct lttng_ust_shm_handle *handle;
323 int cpu;
324
325 handle = chan->handle;
326 config = &chan->backend.config;
327
328 /*
329 * Only flush buffers periodically if readers are active.
330 */
331 pthread_mutex_lock(&wakeup_fd_mutex);
332 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
333 for_each_possible_cpu(cpu) {
334 struct lttng_ust_lib_ring_buffer *buf =
335 shmp(handle, chan->backend.buf[cpu].shmp);
336
337 if (uatomic_read(&buf->active_readers)
338 && lib_ring_buffer_poll_deliver(config, buf,
339 chan, handle)) {
340 lib_ring_buffer_wakeup(buf, handle);
341 }
342 }
343 } else {
344 struct lttng_ust_lib_ring_buffer *buf =
345 shmp(handle, chan->backend.buf[0].shmp);
346
347 if (uatomic_read(&buf->active_readers)
348 && lib_ring_buffer_poll_deliver(config, buf,
349 chan, handle)) {
350 lib_ring_buffer_wakeup(buf, handle);
351 }
03d2d293 352 }
cb7378b3 353 pthread_mutex_unlock(&wakeup_fd_mutex);
34a91bdb
MD
354}
355
356static
357void lib_ring_buffer_channel_read_timer(int sig, siginfo_t *si, void *uc)
358{
359 struct channel *chan;
360
361 assert(CMM_LOAD_SHARED(timer_signal.tid) == pthread_self());
362 chan = si->si_value.sival_ptr;
363 DBG("Read timer for channel %p\n", chan);
364 lib_ring_buffer_channel_do_read(chan);
03d2d293
MD
365 return;
366}
367
368static
369void rb_setmask(sigset_t *mask)
370{
371 int ret;
372
373 ret = sigemptyset(mask);
374 if (ret) {
375 PERROR("sigemptyset");
376 }
34a91bdb
MD
377 ret = sigaddset(mask, LTTNG_UST_RB_SIG_FLUSH);
378 if (ret) {
379 PERROR("sigaddset");
380 }
381 ret = sigaddset(mask, LTTNG_UST_RB_SIG_READ);
03d2d293
MD
382 if (ret) {
383 PERROR("sigaddset");
384 }
385 ret = sigaddset(mask, LTTNG_UST_RB_SIG_TEARDOWN);
386 if (ret) {
387 PERROR("sigaddset");
388 }
389}
390
391static
392void *sig_thread(void *arg)
393{
394 sigset_t mask;
395 siginfo_t info;
396 int signr;
397
398 /* Only self thread will receive signal mask. */
399 rb_setmask(&mask);
400 CMM_STORE_SHARED(timer_signal.tid, pthread_self());
401
402 for (;;) {
403 signr = sigwaitinfo(&mask, &info);
404 if (signr == -1) {
34a91bdb
MD
405 if (errno != EINTR)
406 PERROR("sigwaitinfo");
03d2d293
MD
407 continue;
408 }
34a91bdb 409 if (signr == LTTNG_UST_RB_SIG_FLUSH) {
03d2d293
MD
410 lib_ring_buffer_channel_switch_timer(info.si_signo,
411 &info, NULL);
34a91bdb
MD
412 } else if (signr == LTTNG_UST_RB_SIG_READ) {
413 lib_ring_buffer_channel_read_timer(info.si_signo,
414 &info, NULL);
03d2d293
MD
415 } else if (signr == LTTNG_UST_RB_SIG_TEARDOWN) {
416 cmm_smp_mb();
417 CMM_STORE_SHARED(timer_signal.qs_done, 1);
418 cmm_smp_mb();
419 } else {
420 ERR("Unexptected signal %d\n", info.si_signo);
421 }
422 }
423 return NULL;
424}
425
426/*
03d2d293
MD
427 * Ensure only a single thread listens on the timer signal.
428 */
429static
430void lib_ring_buffer_setup_timer_thread(void)
431{
432 pthread_t thread;
433 int ret;
434
64bf51a6 435 pthread_mutex_lock(&timer_signal.lock);
03d2d293 436 if (timer_signal.setup_done)
64bf51a6 437 goto end;
03d2d293
MD
438
439 ret = pthread_create(&thread, NULL, &sig_thread, NULL);
440 if (ret) {
441 errno = ret;
442 PERROR("pthread_create");
443 }
444 ret = pthread_detach(thread);
445 if (ret) {
446 errno = ret;
447 PERROR("pthread_detach");
448 }
449 timer_signal.setup_done = 1;
64bf51a6
MD
450end:
451 pthread_mutex_unlock(&timer_signal.lock);
852c2936
MD
452}
453
03d2d293 454/*
64bf51a6 455 * Wait for signal-handling thread quiescent state.
03d2d293 456 */
64bf51a6
MD
457static
458void lib_ring_buffer_wait_signal_thread_qs(unsigned int signr)
459{
460 sigset_t pending_set;
461 int ret;
462
463 /*
464 * We need to be the only thread interacting with the thread
465 * that manages signals for teardown synchronization.
466 */
467 pthread_mutex_lock(&timer_signal.lock);
468
469 /*
470 * Ensure we don't have any signal queued for this channel.
471 */
472 for (;;) {
473 ret = sigemptyset(&pending_set);
474 if (ret == -1) {
475 PERROR("sigemptyset");
476 }
477 ret = sigpending(&pending_set);
478 if (ret == -1) {
479 PERROR("sigpending");
480 }
481 if (!sigismember(&pending_set, signr))
482 break;
483 caa_cpu_relax();
484 }
485
486 /*
487 * From this point, no new signal handler will be fired that
488 * would try to access "chan". However, we still need to wait
489 * for any currently executing handler to complete.
490 */
491 cmm_smp_mb();
492 CMM_STORE_SHARED(timer_signal.qs_done, 0);
493 cmm_smp_mb();
494
495 /*
496 * Kill with LTTNG_UST_RB_SIG_TEARDOWN, so signal management
497 * thread wakes up.
498 */
499 kill(getpid(), LTTNG_UST_RB_SIG_TEARDOWN);
500
501 while (!CMM_LOAD_SHARED(timer_signal.qs_done))
502 caa_cpu_relax();
503 cmm_smp_mb();
504
505 pthread_mutex_unlock(&timer_signal.lock);
506}
507
03d2d293
MD
508static
509void lib_ring_buffer_channel_switch_timer_start(struct channel *chan)
852c2936 510{
03d2d293
MD
511 struct sigevent sev;
512 struct itimerspec its;
513 int ret;
852c2936 514
03d2d293 515 if (!chan->switch_timer_interval || chan->switch_timer_enabled)
852c2936
MD
516 return;
517
03d2d293
MD
518 chan->switch_timer_enabled = 1;
519
520 lib_ring_buffer_setup_timer_thread();
521
522 sev.sigev_notify = SIGEV_SIGNAL;
34a91bdb 523 sev.sigev_signo = LTTNG_UST_RB_SIG_FLUSH;
03d2d293
MD
524 sev.sigev_value.sival_ptr = chan;
525 ret = timer_create(CLOCKID, &sev, &chan->switch_timer);
526 if (ret == -1) {
527 PERROR("timer_create");
528 }
529
530 its.it_value.tv_sec = chan->switch_timer_interval / 1000000;
531 its.it_value.tv_nsec = chan->switch_timer_interval % 1000000;
532 its.it_interval.tv_sec = its.it_value.tv_sec;
533 its.it_interval.tv_nsec = its.it_value.tv_nsec;
534
535 ret = timer_settime(chan->switch_timer, 0, &its, NULL);
536 if (ret == -1) {
537 PERROR("timer_settime");
538 }
539}
540
03d2d293
MD
541static
542void lib_ring_buffer_channel_switch_timer_stop(struct channel *chan)
543{
34a91bdb 544 int ret;
03d2d293
MD
545
546 if (!chan->switch_timer_interval || !chan->switch_timer_enabled)
547 return;
548
549 ret = timer_delete(chan->switch_timer);
550 if (ret == -1) {
551 PERROR("timer_delete");
552 }
553
64bf51a6 554 lib_ring_buffer_wait_signal_thread_qs(LTTNG_UST_RB_SIG_FLUSH);
03d2d293
MD
555
556 chan->switch_timer = 0;
557 chan->switch_timer_enabled = 0;
852c2936
MD
558}
559
34a91bdb
MD
560static
561void lib_ring_buffer_channel_read_timer_start(struct channel *chan)
852c2936 562{
4cfec15c 563 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
34a91bdb
MD
564 struct sigevent sev;
565 struct itimerspec its;
566 int ret;
852c2936 567
34a91bdb
MD
568 if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
569 || !chan->read_timer_interval || chan->read_timer_enabled)
570 return;
852c2936 571
34a91bdb 572 chan->read_timer_enabled = 1;
852c2936 573
34a91bdb 574 lib_ring_buffer_setup_timer_thread();
852c2936 575
34a91bdb
MD
576 sev.sigev_notify = SIGEV_SIGNAL;
577 sev.sigev_signo = LTTNG_UST_RB_SIG_READ;
578 sev.sigev_value.sival_ptr = chan;
579 ret = timer_create(CLOCKID, &sev, &chan->read_timer);
580 if (ret == -1) {
581 PERROR("timer_create");
582 }
852c2936 583
34a91bdb
MD
584 its.it_value.tv_sec = chan->read_timer_interval / 1000000;
585 its.it_value.tv_nsec = chan->read_timer_interval % 1000000;
586 its.it_interval.tv_sec = its.it_value.tv_sec;
587 its.it_interval.tv_nsec = its.it_value.tv_nsec;
852c2936 588
34a91bdb
MD
589 ret = timer_settime(chan->read_timer, 0, &its, NULL);
590 if (ret == -1) {
591 PERROR("timer_settime");
592 }
852c2936
MD
593}
594
34a91bdb
MD
595static
596void lib_ring_buffer_channel_read_timer_stop(struct channel *chan)
852c2936 597{
4cfec15c 598 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
34a91bdb 599 int ret;
852c2936
MD
600
601 if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
34a91bdb 602 || !chan->read_timer_interval || !chan->read_timer_enabled)
852c2936
MD
603 return;
604
34a91bdb
MD
605 ret = timer_delete(chan->read_timer);
606 if (ret == -1) {
607 PERROR("timer_delete");
608 }
609
852c2936
MD
610 /*
611 * do one more check to catch data that has been written in the last
612 * timer period.
613 */
34a91bdb
MD
614 lib_ring_buffer_channel_do_read(chan);
615
64bf51a6 616 lib_ring_buffer_wait_signal_thread_qs(LTTNG_UST_RB_SIG_READ);
34a91bdb
MD
617
618 chan->read_timer = 0;
619 chan->read_timer_enabled = 0;
852c2936
MD
620}
621
1d498196 622static void channel_unregister_notifiers(struct channel *chan,
38fae1d3 623 struct lttng_ust_shm_handle *handle)
852c2936 624{
03d2d293 625 lib_ring_buffer_channel_switch_timer_stop(chan);
34a91bdb 626 lib_ring_buffer_channel_read_timer_stop(chan);
852c2936
MD
627}
628
009769ca
MD
629static void channel_print_errors(struct channel *chan,
630 struct lttng_ust_shm_handle *handle)
631{
632 const struct lttng_ust_lib_ring_buffer_config *config =
633 &chan->backend.config;
634 int cpu;
635
636 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
637 for_each_possible_cpu(cpu) {
638 struct lttng_ust_lib_ring_buffer *buf =
639 shmp(handle, chan->backend.buf[cpu].shmp);
640 lib_ring_buffer_print_errors(chan, buf, cpu, handle);
641 }
642 } else {
643 struct lttng_ust_lib_ring_buffer *buf =
644 shmp(handle, chan->backend.buf[0].shmp);
645
646 lib_ring_buffer_print_errors(chan, buf, -1, handle);
647 }
648}
649
650static void channel_free(struct channel *chan,
651 struct lttng_ust_shm_handle *handle)
852c2936 652{
74d81a6c 653 channel_backend_free(&chan->backend, handle);
431d5cf0 654 /* chan is freed by shm teardown */
1d498196
MD
655 shm_object_table_destroy(handle->table);
656 free(handle);
852c2936
MD
657}
658
659/**
660 * channel_create - Create channel.
661 * @config: ring buffer instance configuration
662 * @name: name of the channel
a3f61e7f
MD
663 * @priv_data: ring buffer client private data area pointer (output)
664 * @priv_data_size: length, in bytes, of the private data area.
d028eddb 665 * @priv_data_init: initialization data for private data.
852c2936
MD
666 * @buf_addr: pointer the the beginning of the preallocated buffer contiguous
667 * address mapping. It is used only by RING_BUFFER_STATIC
668 * configuration. It can be set to NULL for other backends.
669 * @subbuf_size: subbuffer size
670 * @num_subbuf: number of subbuffers
671 * @switch_timer_interval: Time interval (in us) to fill sub-buffers with
672 * padding to let readers get those sub-buffers.
673 * Used for live streaming.
674 * @read_timer_interval: Time interval (in us) to wake up pending readers.
675 *
676 * Holds cpu hotplug.
677 * Returns NULL on failure.
678 */
4cfec15c 679struct lttng_ust_shm_handle *channel_create(const struct lttng_ust_lib_ring_buffer_config *config,
a3f61e7f
MD
680 const char *name,
681 void **priv_data,
682 size_t priv_data_align,
683 size_t priv_data_size,
d028eddb 684 void *priv_data_init,
a3f61e7f 685 void *buf_addr, size_t subbuf_size,
852c2936 686 size_t num_subbuf, unsigned int switch_timer_interval,
74d81a6c 687 unsigned int read_timer_interval)
852c2936 688{
24622edc 689 int ret;
a3f61e7f 690 size_t shmsize, chansize;
852c2936 691 struct channel *chan;
38fae1d3 692 struct lttng_ust_shm_handle *handle;
1d498196 693 struct shm_object *shmobj;
74d81a6c
MD
694 unsigned int nr_streams;
695
696 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
697 nr_streams = num_possible_cpus();
698 else
699 nr_streams = 1;
852c2936
MD
700
701 if (lib_ring_buffer_check_config(config, switch_timer_interval,
702 read_timer_interval))
703 return NULL;
704
38fae1d3 705 handle = zmalloc(sizeof(struct lttng_ust_shm_handle));
431d5cf0
MD
706 if (!handle)
707 return NULL;
708
1d498196
MD
709 /* Allocate table for channel + per-cpu buffers */
710 handle->table = shm_object_table_create(1 + num_possible_cpus());
711 if (!handle->table)
712 goto error_table_alloc;
852c2936 713
1d498196
MD
714 /* Calculate the shm allocation layout */
715 shmsize = sizeof(struct channel);
c1fca457 716 shmsize += offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_shmp));
74d81a6c 717 shmsize += sizeof(struct lttng_ust_lib_ring_buffer_shmp) * nr_streams;
a3f61e7f 718 chansize = shmsize;
74d81a6c
MD
719 if (priv_data_align)
720 shmsize += offset_align(shmsize, priv_data_align);
a3f61e7f 721 shmsize += priv_data_size;
a6352fd4 722
74d81a6c
MD
723 /* Allocate normal memory for channel (not shared) */
724 shmobj = shm_object_table_alloc(handle->table, shmsize, SHM_OBJECT_MEM);
b5a14697
MD
725 if (!shmobj)
726 goto error_append;
57773204 727 /* struct channel is at object 0, offset 0 (hardcoded) */
a3f61e7f 728 set_shmp(handle->chan, zalloc_shm(shmobj, chansize));
57773204
MD
729 assert(handle->chan._ref.index == 0);
730 assert(handle->chan._ref.offset == 0);
1d498196 731 chan = shmp(handle, handle->chan);
a6352fd4 732 if (!chan)
1d498196 733 goto error_append;
74d81a6c 734 chan->nr_streams = nr_streams;
a6352fd4 735
a3f61e7f
MD
736 /* space for private data */
737 if (priv_data_size) {
738 DECLARE_SHMP(void, priv_data_alloc);
739
740 align_shm(shmobj, priv_data_align);
741 chan->priv_data_offset = shmobj->allocated_len;
742 set_shmp(priv_data_alloc, zalloc_shm(shmobj, priv_data_size));
743 if (!shmp(handle, priv_data_alloc))
744 goto error_append;
745 *priv_data = channel_get_private(chan);
d028eddb 746 memcpy(*priv_data, priv_data_init, priv_data_size);
a3f61e7f
MD
747 } else {
748 chan->priv_data_offset = -1;
74d81a6c
MD
749 if (priv_data)
750 *priv_data = NULL;
a3f61e7f
MD
751 }
752
753 ret = channel_backend_init(&chan->backend, name, config,
1d498196 754 subbuf_size, num_subbuf, handle);
852c2936 755 if (ret)
1d498196 756 goto error_backend_init;
852c2936 757
03d2d293 758 chan->handle = handle;
852c2936 759 chan->commit_count_mask = (~0UL >> chan->backend.num_subbuf_order);
852c2936 760
34a91bdb
MD
761 chan->switch_timer_interval = switch_timer_interval;
762 chan->read_timer_interval = read_timer_interval;
03d2d293 763 lib_ring_buffer_channel_switch_timer_start(chan);
34a91bdb 764 lib_ring_buffer_channel_read_timer_start(chan);
852c2936 765
431d5cf0 766 return handle;
852c2936 767
1d498196
MD
768error_backend_init:
769error_append:
770 shm_object_table_destroy(handle->table);
771error_table_alloc:
431d5cf0 772 free(handle);
852c2936
MD
773 return NULL;
774}
852c2936 775
74d81a6c 776struct lttng_ust_shm_handle *channel_handle_create(void *data,
ff0f5728
MD
777 uint64_t memory_map_size,
778 int wakeup_fd)
193183fb 779{
38fae1d3 780 struct lttng_ust_shm_handle *handle;
193183fb
MD
781 struct shm_object *object;
782
38fae1d3 783 handle = zmalloc(sizeof(struct lttng_ust_shm_handle));
193183fb
MD
784 if (!handle)
785 return NULL;
786
787 /* Allocate table for channel + per-cpu buffers */
788 handle->table = shm_object_table_create(1 + num_possible_cpus());
789 if (!handle->table)
790 goto error_table_alloc;
791 /* Add channel object */
74d81a6c 792 object = shm_object_table_append_mem(handle->table, data,
ff0f5728 793 memory_map_size, wakeup_fd);
193183fb
MD
794 if (!object)
795 goto error_table_object;
57773204
MD
796 /* struct channel is at object 0, offset 0 (hardcoded) */
797 handle->chan._ref.index = 0;
798 handle->chan._ref.offset = 0;
193183fb
MD
799 return handle;
800
801error_table_object:
802 shm_object_table_destroy(handle->table);
803error_table_alloc:
804 free(handle);
805 return NULL;
806}
807
38fae1d3 808int channel_handle_add_stream(struct lttng_ust_shm_handle *handle,
74d81a6c
MD
809 int shm_fd, int wakeup_fd, uint32_t stream_nr,
810 uint64_t memory_map_size)
193183fb
MD
811{
812 struct shm_object *object;
813
814 /* Add stream object */
74d81a6c
MD
815 object = shm_object_table_append_shm(handle->table,
816 shm_fd, wakeup_fd, stream_nr,
817 memory_map_size);
193183fb 818 if (!object)
74d81a6c 819 return -EINVAL;
193183fb
MD
820 return 0;
821}
822
74d81a6c
MD
823unsigned int channel_handle_get_nr_streams(struct lttng_ust_shm_handle *handle)
824{
825 assert(handle->table);
826 return handle->table->allocated_len - 1;
827}
828
852c2936 829static
74d81a6c 830void channel_release(struct channel *chan, struct lttng_ust_shm_handle *handle)
852c2936 831{
74d81a6c 832 channel_free(chan, handle);
852c2936
MD
833}
834
835/**
836 * channel_destroy - Finalize, wait for q.s. and destroy channel.
837 * @chan: channel to destroy
838 *
839 * Holds cpu hotplug.
431d5cf0
MD
840 * Call "destroy" callback, finalize channels, decrement the channel
841 * reference count. Note that when readers have completed data
842 * consumption of finalized channels, get_subbuf() will return -ENODATA.
a3f61e7f 843 * They should release their handle at that point.
852c2936 844 */
a3f61e7f 845void channel_destroy(struct channel *chan, struct lttng_ust_shm_handle *handle,
74d81a6c 846 int consumer)
852c2936 847{
74d81a6c
MD
848 if (consumer) {
849 /*
850 * Note: the consumer takes care of finalizing and
851 * switching the buffers.
852 */
853 channel_unregister_notifiers(chan, handle);
3d0ef9f6
MD
854 /*
855 * The consumer prints errors.
856 */
857 channel_print_errors(chan, handle);
824f40b8
MD
858 }
859
431d5cf0
MD
860 /*
861 * sessiond/consumer are keeping a reference on the shm file
862 * descriptor directly. No need to refcount.
863 */
74d81a6c 864 channel_release(chan, handle);
a3f61e7f 865 return;
852c2936 866}
852c2936 867
4cfec15c
MD
868struct lttng_ust_lib_ring_buffer *channel_get_ring_buffer(
869 const struct lttng_ust_lib_ring_buffer_config *config,
1d498196 870 struct channel *chan, int cpu,
38fae1d3 871 struct lttng_ust_shm_handle *handle,
74d81a6c
MD
872 int *shm_fd, int *wait_fd,
873 int *wakeup_fd,
874 uint64_t *memory_map_size)
852c2936 875{
381c0f1e
MD
876 struct shm_ref *ref;
877
878 if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) {
74d81a6c 879 cpu = 0;
381c0f1e 880 } else {
e095d803
MD
881 if (cpu >= num_possible_cpus())
882 return NULL;
381c0f1e 883 }
74d81a6c
MD
884 ref = &chan->backend.buf[cpu].shmp._ref;
885 *shm_fd = shm_get_shm_fd(handle, ref);
886 *wait_fd = shm_get_wait_fd(handle, ref);
887 *wakeup_fd = shm_get_wakeup_fd(handle, ref);
888 if (shm_get_shm_size(handle, ref, memory_map_size))
889 return NULL;
890 return shmp(handle, chan->backend.buf[cpu].shmp);
852c2936 891}
852c2936 892
ff0f5728
MD
893int ring_buffer_channel_close_wait_fd(const struct lttng_ust_lib_ring_buffer_config *config,
894 struct channel *chan,
895 struct lttng_ust_shm_handle *handle)
896{
897 struct shm_ref *ref;
898
899 ref = &handle->chan._ref;
900 return shm_close_wait_fd(handle, ref);
901}
902
903int ring_buffer_channel_close_wakeup_fd(const struct lttng_ust_lib_ring_buffer_config *config,
904 struct channel *chan,
905 struct lttng_ust_shm_handle *handle)
906{
907 struct shm_ref *ref;
908
909 ref = &handle->chan._ref;
910 return shm_close_wakeup_fd(handle, ref);
911}
912
913int ring_buffer_stream_close_wait_fd(const struct lttng_ust_lib_ring_buffer_config *config,
74d81a6c
MD
914 struct channel *chan,
915 struct lttng_ust_shm_handle *handle,
916 int cpu)
852c2936 917{
74d81a6c
MD
918 struct shm_ref *ref;
919
920 if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) {
921 cpu = 0;
922 } else {
923 if (cpu >= num_possible_cpus())
924 return -EINVAL;
824f40b8 925 }
74d81a6c
MD
926 ref = &chan->backend.buf[cpu].shmp._ref;
927 return shm_close_wait_fd(handle, ref);
928}
929
ff0f5728 930int ring_buffer_stream_close_wakeup_fd(const struct lttng_ust_lib_ring_buffer_config *config,
74d81a6c
MD
931 struct channel *chan,
932 struct lttng_ust_shm_handle *handle,
933 int cpu)
934{
935 struct shm_ref *ref;
cb7378b3 936 int ret;
74d81a6c
MD
937
938 if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) {
939 cpu = 0;
940 } else {
941 if (cpu >= num_possible_cpus())
942 return -EINVAL;
943 }
944 ref = &chan->backend.buf[cpu].shmp._ref;
cb7378b3
MD
945 pthread_mutex_lock(&wakeup_fd_mutex);
946 ret = shm_close_wakeup_fd(handle, ref);
947 pthread_mutex_unlock(&wakeup_fd_mutex);
948 return ret;
74d81a6c
MD
949}
950
951int lib_ring_buffer_open_read(struct lttng_ust_lib_ring_buffer *buf,
952 struct lttng_ust_shm_handle *handle)
953{
a6352fd4 954 if (uatomic_cmpxchg(&buf->active_readers, 0, 1) != 0)
852c2936 955 return -EBUSY;
a6352fd4 956 cmm_smp_mb();
852c2936
MD
957 return 0;
958}
852c2936 959
4cfec15c 960void lib_ring_buffer_release_read(struct lttng_ust_lib_ring_buffer *buf,
74d81a6c 961 struct lttng_ust_shm_handle *handle)
852c2936 962{
1d498196 963 struct channel *chan = shmp(handle, buf->backend.chan);
852c2936 964
a6352fd4
MD
965 CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
966 cmm_smp_mb();
967 uatomic_dec(&buf->active_readers);
852c2936
MD
968}
969
970/**
971 * lib_ring_buffer_snapshot - save subbuffer position snapshot (for read)
972 * @buf: ring buffer
973 * @consumed: consumed count indicating the position where to read
974 * @produced: produced count, indicates position when to stop reading
975 *
976 * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
977 * data to read at consumed position, or 0 if the get operation succeeds.
852c2936
MD
978 */
979
4cfec15c 980int lib_ring_buffer_snapshot(struct lttng_ust_lib_ring_buffer *buf,
1d498196 981 unsigned long *consumed, unsigned long *produced,
38fae1d3 982 struct lttng_ust_shm_handle *handle)
852c2936 983{
1d498196 984 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 985 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
986 unsigned long consumed_cur, write_offset;
987 int finalized;
988
14641deb 989 finalized = CMM_ACCESS_ONCE(buf->finalized);
852c2936
MD
990 /*
991 * Read finalized before counters.
992 */
a6352fd4
MD
993 cmm_smp_rmb();
994 consumed_cur = uatomic_read(&buf->consumed);
852c2936
MD
995 /*
996 * No need to issue a memory barrier between consumed count read and
997 * write offset read, because consumed count can only change
998 * concurrently in overwrite mode, and we keep a sequence counter
999 * identifier derived from the write offset to check we are getting
1000 * the same sub-buffer we are expecting (the sub-buffers are atomically
1001 * "tagged" upon writes, tags are checked upon read).
1002 */
1003 write_offset = v_read(config, &buf->offset);
1004
1005 /*
1006 * Check that we are not about to read the same subbuffer in
1007 * which the writer head is.
1008 */
1009 if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
1010 == 0)
1011 goto nodata;
1012
1013 *consumed = consumed_cur;
1014 *produced = subbuf_trunc(write_offset, chan);
1015
1016 return 0;
1017
1018nodata:
1019 /*
1020 * The memory barriers __wait_event()/wake_up_interruptible() take care
1021 * of "raw_spin_is_locked" memory ordering.
1022 */
1023 if (finalized)
1024 return -ENODATA;
852c2936
MD
1025 else
1026 return -EAGAIN;
1027}
852c2936
MD
1028
1029/**
d1a996f5 1030 * lib_ring_buffer_move_consumer - move consumed counter forward
852c2936
MD
1031 * @buf: ring buffer
1032 * @consumed_new: new consumed count value
1033 */
4cfec15c 1034void lib_ring_buffer_move_consumer(struct lttng_ust_lib_ring_buffer *buf,
1d498196 1035 unsigned long consumed_new,
38fae1d3 1036 struct lttng_ust_shm_handle *handle)
852c2936 1037{
4cfec15c 1038 struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend;
1d498196 1039 struct channel *chan = shmp(handle, bufb->chan);
852c2936
MD
1040 unsigned long consumed;
1041
74d81a6c 1042 CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
852c2936
MD
1043
1044 /*
1045 * Only push the consumed value forward.
1046 * If the consumed cmpxchg fails, this is because we have been pushed by
1047 * the writer in flight recorder mode.
1048 */
a6352fd4 1049 consumed = uatomic_read(&buf->consumed);
852c2936 1050 while ((long) consumed - (long) consumed_new < 0)
a6352fd4
MD
1051 consumed = uatomic_cmpxchg(&buf->consumed, consumed,
1052 consumed_new);
852c2936 1053}
852c2936
MD
1054
1055/**
1056 * lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading
1057 * @buf: ring buffer
1058 * @consumed: consumed count indicating the position where to read
1059 *
1060 * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
1061 * data to read at consumed position, or 0 if the get operation succeeds.
852c2936 1062 */
4cfec15c 1063int lib_ring_buffer_get_subbuf(struct lttng_ust_lib_ring_buffer *buf,
1d498196 1064 unsigned long consumed,
38fae1d3 1065 struct lttng_ust_shm_handle *handle)
852c2936 1066{
1d498196 1067 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 1068 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1069 unsigned long consumed_cur, consumed_idx, commit_count, write_offset;
1070 int ret;
1071 int finalized;
1072
1073retry:
14641deb 1074 finalized = CMM_ACCESS_ONCE(buf->finalized);
852c2936
MD
1075 /*
1076 * Read finalized before counters.
1077 */
a6352fd4
MD
1078 cmm_smp_rmb();
1079 consumed_cur = uatomic_read(&buf->consumed);
852c2936 1080 consumed_idx = subbuf_index(consumed, chan);
4746ae29 1081 commit_count = v_read(config, &shmp_index(handle, buf->commit_cold, consumed_idx)->cc_sb);
852c2936
MD
1082 /*
1083 * Make sure we read the commit count before reading the buffer
1084 * data and the write offset. Correct consumed offset ordering
1085 * wrt commit count is insured by the use of cmpxchg to update
1086 * the consumed offset.
852c2936 1087 */
a6352fd4
MD
1088 /*
1089 * Local rmb to match the remote wmb to read the commit count
1090 * before the buffer data and the write offset.
1091 */
1092 cmm_smp_rmb();
852c2936
MD
1093
1094 write_offset = v_read(config, &buf->offset);
1095
1096 /*
1097 * Check that the buffer we are getting is after or at consumed_cur
1098 * position.
1099 */
1100 if ((long) subbuf_trunc(consumed, chan)
1101 - (long) subbuf_trunc(consumed_cur, chan) < 0)
1102 goto nodata;
1103
1104 /*
1105 * Check that the subbuffer we are trying to consume has been
1106 * already fully committed.
1107 */
1108 if (((commit_count - chan->backend.subbuf_size)
1109 & chan->commit_count_mask)
6c81e331 1110 - (buf_trunc(consumed, chan)
852c2936
MD
1111 >> chan->backend.num_subbuf_order)
1112 != 0)
1113 goto nodata;
1114
1115 /*
1116 * Check that we are not about to read the same subbuffer in
1117 * which the writer head is.
1118 */
6c81e331 1119 if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed, chan)
852c2936
MD
1120 == 0)
1121 goto nodata;
1122
1123 /*
1124 * Failure to get the subbuffer causes a busy-loop retry without going
1125 * to a wait queue. These are caused by short-lived race windows where
1126 * the writer is getting access to a subbuffer we were trying to get
1127 * access to. Also checks that the "consumed" buffer count we are
1128 * looking for matches the one contained in the subbuffer id.
1129 */
1130 ret = update_read_sb_index(config, &buf->backend, &chan->backend,
1d498196
MD
1131 consumed_idx, buf_trunc_val(consumed, chan),
1132 handle);
852c2936
MD
1133 if (ret)
1134 goto retry;
1135 subbuffer_id_clear_noref(config, &buf->backend.buf_rsb.id);
1136
1137 buf->get_subbuf_consumed = consumed;
1138 buf->get_subbuf = 1;
1139
1140 return 0;
1141
1142nodata:
1143 /*
1144 * The memory barriers __wait_event()/wake_up_interruptible() take care
1145 * of "raw_spin_is_locked" memory ordering.
1146 */
1147 if (finalized)
1148 return -ENODATA;
852c2936
MD
1149 else
1150 return -EAGAIN;
1151}
852c2936
MD
1152
1153/**
1154 * lib_ring_buffer_put_subbuf - release exclusive subbuffer access
1155 * @buf: ring buffer
1156 */
4cfec15c 1157void lib_ring_buffer_put_subbuf(struct lttng_ust_lib_ring_buffer *buf,
38fae1d3 1158 struct lttng_ust_shm_handle *handle)
852c2936 1159{
4cfec15c 1160 struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend;
1d498196 1161 struct channel *chan = shmp(handle, bufb->chan);
4cfec15c 1162 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1163 unsigned long read_sb_bindex, consumed_idx, consumed;
1164
74d81a6c 1165 CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
852c2936
MD
1166
1167 if (!buf->get_subbuf) {
1168 /*
1169 * Reader puts a subbuffer it did not get.
1170 */
1171 CHAN_WARN_ON(chan, 1);
1172 return;
1173 }
1174 consumed = buf->get_subbuf_consumed;
1175 buf->get_subbuf = 0;
1176
1177 /*
1178 * Clear the records_unread counter. (overruns counter)
1179 * Can still be non-zero if a file reader simply grabbed the data
1180 * without using iterators.
1181 * Can be below zero if an iterator is used on a snapshot more than
1182 * once.
1183 */
1184 read_sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id);
1185 v_add(config, v_read(config,
4746ae29 1186 &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread),
852c2936 1187 &bufb->records_read);
4746ae29 1188 v_set(config, &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread, 0);
852c2936
MD
1189 CHAN_WARN_ON(chan, config->mode == RING_BUFFER_OVERWRITE
1190 && subbuffer_id_is_noref(config, bufb->buf_rsb.id));
1191 subbuffer_id_set_noref(config, &bufb->buf_rsb.id);
1192
1193 /*
1194 * Exchange the reader subbuffer with the one we put in its place in the
1195 * writer subbuffer table. Expect the original consumed count. If
1196 * update_read_sb_index fails, this is because the writer updated the
1197 * subbuffer concurrently. We should therefore keep the subbuffer we
1198 * currently have: it has become invalid to try reading this sub-buffer
1199 * consumed count value anyway.
1200 */
1201 consumed_idx = subbuf_index(consumed, chan);
1202 update_read_sb_index(config, &buf->backend, &chan->backend,
1d498196
MD
1203 consumed_idx, buf_trunc_val(consumed, chan),
1204 handle);
852c2936
MD
1205 /*
1206 * update_read_sb_index return value ignored. Don't exchange sub-buffer
1207 * if the writer concurrently updated it.
1208 */
1209}
852c2936
MD
1210
1211/*
1212 * cons_offset is an iterator on all subbuffer offsets between the reader
1213 * position and the writer position. (inclusive)
1214 */
1215static
4cfec15c 1216void lib_ring_buffer_print_subbuffer_errors(struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
1217 struct channel *chan,
1218 unsigned long cons_offset,
1d498196 1219 int cpu,
38fae1d3 1220 struct lttng_ust_shm_handle *handle)
852c2936 1221{
4cfec15c 1222 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1223 unsigned long cons_idx, commit_count, commit_count_sb;
1224
1225 cons_idx = subbuf_index(cons_offset, chan);
4746ae29
MD
1226 commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, cons_idx)->cc);
1227 commit_count_sb = v_read(config, &shmp_index(handle, buf->commit_cold, cons_idx)->cc_sb);
852c2936
MD
1228
1229 if (subbuf_offset(commit_count, chan) != 0)
4d3c9523 1230 DBG("ring buffer %s, cpu %d: "
852c2936
MD
1231 "commit count in subbuffer %lu,\n"
1232 "expecting multiples of %lu bytes\n"
1233 " [ %lu bytes committed, %lu bytes reader-visible ]\n",
1234 chan->backend.name, cpu, cons_idx,
1235 chan->backend.subbuf_size,
1236 commit_count, commit_count_sb);
1237
4d3c9523 1238 DBG("ring buffer: %s, cpu %d: %lu bytes committed\n",
852c2936
MD
1239 chan->backend.name, cpu, commit_count);
1240}
1241
1242static
4cfec15c 1243void lib_ring_buffer_print_buffer_errors(struct lttng_ust_lib_ring_buffer *buf,
852c2936 1244 struct channel *chan,
1d498196 1245 void *priv, int cpu,
38fae1d3 1246 struct lttng_ust_shm_handle *handle)
852c2936 1247{
4cfec15c 1248 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1249 unsigned long write_offset, cons_offset;
1250
852c2936
MD
1251 /*
1252 * No need to order commit_count, write_offset and cons_offset reads
1253 * because we execute at teardown when no more writer nor reader
1254 * references are left.
1255 */
1256 write_offset = v_read(config, &buf->offset);
a6352fd4 1257 cons_offset = uatomic_read(&buf->consumed);
852c2936 1258 if (write_offset != cons_offset)
4d3c9523 1259 DBG("ring buffer %s, cpu %d: "
852c2936
MD
1260 "non-consumed data\n"
1261 " [ %lu bytes written, %lu bytes read ]\n",
1262 chan->backend.name, cpu, write_offset, cons_offset);
1263
a6352fd4 1264 for (cons_offset = uatomic_read(&buf->consumed);
852c2936
MD
1265 (long) (subbuf_trunc((unsigned long) v_read(config, &buf->offset),
1266 chan)
1267 - cons_offset) > 0;
1268 cons_offset = subbuf_align(cons_offset, chan))
1269 lib_ring_buffer_print_subbuffer_errors(buf, chan, cons_offset,
1d498196 1270 cpu, handle);
852c2936
MD
1271}
1272
1273static
1274void lib_ring_buffer_print_errors(struct channel *chan,
009769ca
MD
1275 struct lttng_ust_lib_ring_buffer *buf, int cpu,
1276 struct lttng_ust_shm_handle *handle)
852c2936 1277{
4cfec15c 1278 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
a3f61e7f 1279 void *priv = channel_get_private(chan);
852c2936 1280
a1360615
MD
1281 if (!strcmp(chan->backend.name, "relay-metadata-mmap")) {
1282 DBG("ring buffer %s: %lu records written, "
1283 "%lu records overrun\n",
1284 chan->backend.name,
1285 v_read(config, &buf->records_count),
1286 v_read(config, &buf->records_overrun));
1287 } else {
1288 DBG("ring buffer %s, cpu %d: %lu records written, "
1289 "%lu records overrun\n",
1290 chan->backend.name, cpu,
1291 v_read(config, &buf->records_count),
1292 v_read(config, &buf->records_overrun));
1293
1294 if (v_read(config, &buf->records_lost_full)
1295 || v_read(config, &buf->records_lost_wrap)
1296 || v_read(config, &buf->records_lost_big))
1297 DBG("ring buffer %s, cpu %d: records were lost. Caused by:\n"
1298 " [ %lu buffer full, %lu nest buffer wrap-around, "
1299 "%lu event too big ]\n",
1300 chan->backend.name, cpu,
1301 v_read(config, &buf->records_lost_full),
1302 v_read(config, &buf->records_lost_wrap),
1303 v_read(config, &buf->records_lost_big));
1304 }
1d498196 1305 lib_ring_buffer_print_buffer_errors(buf, chan, priv, cpu, handle);
852c2936
MD
1306}
1307
1308/*
1309 * lib_ring_buffer_switch_old_start: Populate old subbuffer header.
1310 *
1311 * Only executed when the buffer is finalized, in SWITCH_FLUSH.
1312 */
1313static
4cfec15c 1314void lib_ring_buffer_switch_old_start(struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
1315 struct channel *chan,
1316 struct switch_offsets *offsets,
2fed87ae 1317 uint64_t tsc,
38fae1d3 1318 struct lttng_ust_shm_handle *handle)
852c2936 1319{
4cfec15c 1320 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1321 unsigned long oldidx = subbuf_index(offsets->old, chan);
1322 unsigned long commit_count;
1323
1d498196 1324 config->cb.buffer_begin(buf, tsc, oldidx, handle);
852c2936
MD
1325
1326 /*
1327 * Order all writes to buffer before the commit count update that will
1328 * determine that the subbuffer is full.
1329 */
a6352fd4 1330 cmm_smp_wmb();
852c2936 1331 v_add(config, config->cb.subbuffer_header_size(),
4746ae29
MD
1332 &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1333 commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
852c2936
MD
1334 /* Check if the written buffer has to be delivered */
1335 lib_ring_buffer_check_deliver(config, buf, chan, offsets->old,
969771a1 1336 commit_count, oldidx, handle, tsc);
852c2936
MD
1337 lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
1338 offsets->old, commit_count,
1d498196
MD
1339 config->cb.subbuffer_header_size(),
1340 handle);
852c2936
MD
1341}
1342
1343/*
1344 * lib_ring_buffer_switch_old_end: switch old subbuffer
1345 *
1346 * Note : offset_old should never be 0 here. It is ok, because we never perform
1347 * buffer switch on an empty subbuffer in SWITCH_ACTIVE mode. The caller
1348 * increments the offset_old value when doing a SWITCH_FLUSH on an empty
1349 * subbuffer.
1350 */
1351static
4cfec15c 1352void lib_ring_buffer_switch_old_end(struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
1353 struct channel *chan,
1354 struct switch_offsets *offsets,
2fed87ae 1355 uint64_t tsc,
38fae1d3 1356 struct lttng_ust_shm_handle *handle)
852c2936 1357{
4cfec15c 1358 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1359 unsigned long oldidx = subbuf_index(offsets->old - 1, chan);
1360 unsigned long commit_count, padding_size, data_size;
1361
1362 data_size = subbuf_offset(offsets->old - 1, chan) + 1;
1363 padding_size = chan->backend.subbuf_size - data_size;
1d498196
MD
1364 subbuffer_set_data_size(config, &buf->backend, oldidx, data_size,
1365 handle);
852c2936
MD
1366
1367 /*
1368 * Order all writes to buffer before the commit count update that will
1369 * determine that the subbuffer is full.
1370 */
a6352fd4 1371 cmm_smp_wmb();
4746ae29
MD
1372 v_add(config, padding_size, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1373 commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
852c2936 1374 lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1,
969771a1 1375 commit_count, oldidx, handle, tsc);
852c2936
MD
1376 lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
1377 offsets->old, commit_count,
1d498196 1378 padding_size, handle);
852c2936
MD
1379}
1380
1381/*
1382 * lib_ring_buffer_switch_new_start: Populate new subbuffer.
1383 *
1384 * This code can be executed unordered : writers may already have written to the
1385 * sub-buffer before this code gets executed, caution. The commit makes sure
1386 * that this code is executed before the deliver of this sub-buffer.
1387 */
1388static
4cfec15c 1389void lib_ring_buffer_switch_new_start(struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
1390 struct channel *chan,
1391 struct switch_offsets *offsets,
2fed87ae 1392 uint64_t tsc,
38fae1d3 1393 struct lttng_ust_shm_handle *handle)
852c2936 1394{
4cfec15c 1395 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1396 unsigned long beginidx = subbuf_index(offsets->begin, chan);
1397 unsigned long commit_count;
1398
1d498196 1399 config->cb.buffer_begin(buf, tsc, beginidx, handle);
852c2936
MD
1400
1401 /*
1402 * Order all writes to buffer before the commit count update that will
1403 * determine that the subbuffer is full.
1404 */
a6352fd4 1405 cmm_smp_wmb();
852c2936 1406 v_add(config, config->cb.subbuffer_header_size(),
4746ae29
MD
1407 &shmp_index(handle, buf->commit_hot, beginidx)->cc);
1408 commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, beginidx)->cc);
852c2936
MD
1409 /* Check if the written buffer has to be delivered */
1410 lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin,
969771a1 1411 commit_count, beginidx, handle, tsc);
852c2936
MD
1412 lib_ring_buffer_write_commit_counter(config, buf, chan, beginidx,
1413 offsets->begin, commit_count,
1d498196
MD
1414 config->cb.subbuffer_header_size(),
1415 handle);
852c2936
MD
1416}
1417
1418/*
1419 * lib_ring_buffer_switch_new_end: finish switching current subbuffer
1420 *
1421 * The only remaining threads could be the ones with pending commits. They will
1422 * have to do the deliver themselves.
1423 */
1424static
4cfec15c 1425void lib_ring_buffer_switch_new_end(struct lttng_ust_lib_ring_buffer *buf,
1d498196
MD
1426 struct channel *chan,
1427 struct switch_offsets *offsets,
2fed87ae 1428 uint64_t tsc,
38fae1d3 1429 struct lttng_ust_shm_handle *handle)
852c2936 1430{
4cfec15c 1431 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1432 unsigned long endidx = subbuf_index(offsets->end - 1, chan);
1433 unsigned long commit_count, padding_size, data_size;
1434
1435 data_size = subbuf_offset(offsets->end - 1, chan) + 1;
1436 padding_size = chan->backend.subbuf_size - data_size;
1d498196
MD
1437 subbuffer_set_data_size(config, &buf->backend, endidx, data_size,
1438 handle);
852c2936
MD
1439
1440 /*
1441 * Order all writes to buffer before the commit count update that will
1442 * determine that the subbuffer is full.
1443 */
a6352fd4 1444 cmm_smp_wmb();
4746ae29
MD
1445 v_add(config, padding_size, &shmp_index(handle, buf->commit_hot, endidx)->cc);
1446 commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, endidx)->cc);
852c2936 1447 lib_ring_buffer_check_deliver(config, buf, chan, offsets->end - 1,
1d498196 1448 commit_count, endidx, handle);
852c2936
MD
1449 lib_ring_buffer_write_commit_counter(config, buf, chan, endidx,
1450 offsets->end, commit_count,
1d498196 1451 padding_size, handle);
852c2936
MD
1452}
1453
1454/*
1455 * Returns :
1456 * 0 if ok
1457 * !0 if execution must be aborted.
1458 */
1459static
1460int lib_ring_buffer_try_switch_slow(enum switch_mode mode,
4cfec15c 1461 struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
1462 struct channel *chan,
1463 struct switch_offsets *offsets,
cfaee541
MD
1464 uint64_t *tsc,
1465 struct lttng_ust_shm_handle *handle)
852c2936 1466{
4cfec15c 1467 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
cfaee541 1468 unsigned long off, reserve_commit_diff;
852c2936
MD
1469
1470 offsets->begin = v_read(config, &buf->offset);
1471 offsets->old = offsets->begin;
1472 offsets->switch_old_start = 0;
1473 off = subbuf_offset(offsets->begin, chan);
1474
1475 *tsc = config->cb.ring_buffer_clock_read(chan);
1476
1477 /*
1478 * Ensure we flush the header of an empty subbuffer when doing the
1479 * finalize (SWITCH_FLUSH). This ensures that we end up knowing the
1480 * total data gathering duration even if there were no records saved
1481 * after the last buffer switch.
1482 * In SWITCH_ACTIVE mode, switch the buffer when it contains events.
1483 * SWITCH_ACTIVE only flushes the current subbuffer, dealing with end of
1484 * subbuffer header as appropriate.
1485 * The next record that reserves space will be responsible for
1486 * populating the following subbuffer header. We choose not to populate
1487 * the next subbuffer header here because we want to be able to use
a6352fd4
MD
1488 * SWITCH_ACTIVE for periodical buffer flush, which must
1489 * guarantee that all the buffer content (records and header
1490 * timestamps) are visible to the reader. This is required for
1491 * quiescence guarantees for the fusion merge.
852c2936 1492 */
cfaee541
MD
1493 if (mode != SWITCH_FLUSH && !off)
1494 return -1; /* we do not have to switch : buffer is empty */
1495
1496 if (caa_unlikely(off == 0)) {
1497 unsigned long sb_index, commit_count;
1498
1499 /*
1500 * We are performing a SWITCH_FLUSH. At this stage, there are no
1501 * concurrent writes into the buffer.
1502 *
1503 * The client does not save any header information. Don't
1504 * switch empty subbuffer on finalize, because it is invalid to
1505 * deliver a completely empty subbuffer.
1506 */
1507 if (!config->cb.subbuffer_header_size())
1508 return -1;
1509
1510 /* Test new buffer integrity */
1511 sb_index = subbuf_index(offsets->begin, chan);
1512 commit_count = v_read(config,
1513 &shmp_index(handle, buf->commit_cold,
1514 sb_index)->cc_sb);
1515 reserve_commit_diff =
1516 (buf_trunc(offsets->begin, chan)
1517 >> chan->backend.num_subbuf_order)
1518 - (commit_count & chan->commit_count_mask);
1519 if (caa_likely(reserve_commit_diff == 0)) {
1520 /* Next subbuffer not being written to. */
1521 if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE &&
1522 subbuf_trunc(offsets->begin, chan)
1523 - subbuf_trunc((unsigned long)
1524 uatomic_read(&buf->consumed), chan)
1525 >= chan->backend.buf_size)) {
1526 /*
1527 * We do not overwrite non consumed buffers
1528 * and we are full : don't switch.
1529 */
852c2936 1530 return -1;
cfaee541
MD
1531 } else {
1532 /*
1533 * Next subbuffer not being written to, and we
1534 * are either in overwrite mode or the buffer is
1535 * not full. It's safe to write in this new
1536 * subbuffer.
1537 */
1538 }
1539 } else {
852c2936 1540 /*
cfaee541
MD
1541 * Next subbuffer reserve offset does not match the
1542 * commit offset. Don't perform switch in
1543 * producer-consumer and overwrite mode. Caused by
1544 * either a writer OOPS or too many nested writes over a
1545 * reserve/commit pair.
852c2936 1546 */
cfaee541 1547 return -1;
852c2936 1548 }
cfaee541
MD
1549
1550 /*
1551 * Need to write the subbuffer start header on finalize.
1552 */
1553 offsets->switch_old_start = 1;
1554 }
1555 offsets->begin = subbuf_align(offsets->begin, chan);
852c2936
MD
1556 /* Note: old points to the next subbuf at offset 0 */
1557 offsets->end = offsets->begin;
1558 return 0;
1559}
1560
1561/*
1562 * Force a sub-buffer switch. This operation is completely reentrant : can be
1563 * called while tracing is active with absolutely no lock held.
1564 *
1565 * Note, however, that as a v_cmpxchg is used for some atomic
1566 * operations, this function must be called from the CPU which owns the buffer
1567 * for a ACTIVE flush.
1568 */
4cfec15c 1569void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, enum switch_mode mode,
38fae1d3 1570 struct lttng_ust_shm_handle *handle)
852c2936 1571{
1d498196 1572 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 1573 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1574 struct switch_offsets offsets;
1575 unsigned long oldidx;
2fed87ae 1576 uint64_t tsc;
852c2936
MD
1577
1578 offsets.size = 0;
1579
1580 /*
1581 * Perform retryable operations.
1582 */
1583 do {
1584 if (lib_ring_buffer_try_switch_slow(mode, buf, chan, &offsets,
cfaee541 1585 &tsc, handle))
852c2936
MD
1586 return; /* Switch not needed */
1587 } while (v_cmpxchg(config, &buf->offset, offsets.old, offsets.end)
1588 != offsets.old);
1589
1590 /*
1591 * Atomically update last_tsc. This update races against concurrent
1592 * atomic updates, but the race will always cause supplementary full TSC
1593 * records, never the opposite (missing a full TSC record when it would
1594 * be needed).
1595 */
1596 save_last_tsc(config, buf, tsc);
1597
1598 /*
1599 * Push the reader if necessary
1600 */
1601 lib_ring_buffer_reserve_push_reader(buf, chan, offsets.old);
1602
1603 oldidx = subbuf_index(offsets.old, chan);
1d498196 1604 lib_ring_buffer_clear_noref(config, &buf->backend, oldidx, handle);
852c2936
MD
1605
1606 /*
1607 * May need to populate header start on SWITCH_FLUSH.
1608 */
1609 if (offsets.switch_old_start) {
1d498196 1610 lib_ring_buffer_switch_old_start(buf, chan, &offsets, tsc, handle);
852c2936
MD
1611 offsets.old += config->cb.subbuffer_header_size();
1612 }
1613
1614 /*
1615 * Switch old subbuffer.
1616 */
1d498196 1617 lib_ring_buffer_switch_old_end(buf, chan, &offsets, tsc, handle);
852c2936 1618}
852c2936
MD
1619
1620/*
1621 * Returns :
1622 * 0 if ok
1623 * -ENOSPC if event size is too large for packet.
1624 * -ENOBUFS if there is currently not enough space in buffer for the event.
1625 * -EIO if data cannot be written into the buffer for any other reason.
1626 */
1627static
4cfec15c 1628int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
1629 struct channel *chan,
1630 struct switch_offsets *offsets,
4cfec15c 1631 struct lttng_ust_lib_ring_buffer_ctx *ctx)
852c2936 1632{
4cfec15c 1633 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
38fae1d3 1634 struct lttng_ust_shm_handle *handle = ctx->handle;
1793b0b5 1635 unsigned long reserve_commit_diff, offset_cmp;
852c2936 1636
1793b0b5
MD
1637retry:
1638 offsets->begin = offset_cmp = v_read(config, &buf->offset);
852c2936
MD
1639 offsets->old = offsets->begin;
1640 offsets->switch_new_start = 0;
1641 offsets->switch_new_end = 0;
1642 offsets->switch_old_end = 0;
1643 offsets->pre_header_padding = 0;
1644
1645 ctx->tsc = config->cb.ring_buffer_clock_read(chan);
1646 if ((int64_t) ctx->tsc == -EIO)
1647 return -EIO;
1648
1649 if (last_tsc_overflow(config, buf, ctx->tsc))
1650 ctx->rflags |= RING_BUFFER_RFLAG_FULL_TSC;
1651
b5a3dfa5 1652 if (caa_unlikely(subbuf_offset(offsets->begin, ctx->chan) == 0)) {
852c2936
MD
1653 offsets->switch_new_start = 1; /* For offsets->begin */
1654 } else {
1655 offsets->size = config->cb.record_header_size(config, chan,
1656 offsets->begin,
1657 &offsets->pre_header_padding,
1658 ctx);
1659 offsets->size +=
1660 lib_ring_buffer_align(offsets->begin + offsets->size,
1661 ctx->largest_align)
1662 + ctx->data_size;
b5a3dfa5 1663 if (caa_unlikely(subbuf_offset(offsets->begin, chan) +
852c2936
MD
1664 offsets->size > chan->backend.subbuf_size)) {
1665 offsets->switch_old_end = 1; /* For offsets->old */
1666 offsets->switch_new_start = 1; /* For offsets->begin */
1667 }
1668 }
b5a3dfa5 1669 if (caa_unlikely(offsets->switch_new_start)) {
1793b0b5 1670 unsigned long sb_index, commit_count;
852c2936
MD
1671
1672 /*
1673 * We are typically not filling the previous buffer completely.
1674 */
b5a3dfa5 1675 if (caa_likely(offsets->switch_old_end))
852c2936
MD
1676 offsets->begin = subbuf_align(offsets->begin, chan);
1677 offsets->begin = offsets->begin
1678 + config->cb.subbuffer_header_size();
1679 /* Test new buffer integrity */
1680 sb_index = subbuf_index(offsets->begin, chan);
1793b0b5
MD
1681 /*
1682 * Read buf->offset before buf->commit_cold[sb_index].cc_sb.
1683 * lib_ring_buffer_check_deliver() has the matching
1684 * memory barriers required around commit_cold cc_sb
1685 * updates to ensure reserve and commit counter updates
1686 * are not seen reordered when updated by another CPU.
1687 */
1688 cmm_smp_rmb();
1689 commit_count = v_read(config,
1690 &shmp_index(handle, buf->commit_cold,
1691 sb_index)->cc_sb);
1692 /* Read buf->commit_cold[sb_index].cc_sb before buf->offset. */
1693 cmm_smp_rmb();
1694 if (caa_unlikely(offset_cmp != v_read(config, &buf->offset))) {
1695 /*
1696 * The reserve counter have been concurrently updated
1697 * while we read the commit counter. This means the
1698 * commit counter we read might not match buf->offset
1699 * due to concurrent update. We therefore need to retry.
1700 */
1701 goto retry;
1702 }
852c2936
MD
1703 reserve_commit_diff =
1704 (buf_trunc(offsets->begin, chan)
1705 >> chan->backend.num_subbuf_order)
1793b0b5 1706 - (commit_count & chan->commit_count_mask);
b5a3dfa5 1707 if (caa_likely(reserve_commit_diff == 0)) {
852c2936 1708 /* Next subbuffer not being written to. */
b5a3dfa5 1709 if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE &&
852c2936
MD
1710 subbuf_trunc(offsets->begin, chan)
1711 - subbuf_trunc((unsigned long)
a6352fd4 1712 uatomic_read(&buf->consumed), chan)
852c2936 1713 >= chan->backend.buf_size)) {
64493e4f
MD
1714 unsigned long nr_lost;
1715
852c2936
MD
1716 /*
1717 * We do not overwrite non consumed buffers
1718 * and we are full : record is lost.
1719 */
64493e4f 1720 nr_lost = v_read(config, &buf->records_lost_full);
852c2936 1721 v_inc(config, &buf->records_lost_full);
64493e4f
MD
1722 if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
1723 DBG("%lu or more records lost in (%s:%d) (buffer full)\n",
1724 nr_lost + 1, chan->backend.name,
1725 buf->backend.cpu);
1726 }
852c2936
MD
1727 return -ENOBUFS;
1728 } else {
1729 /*
1730 * Next subbuffer not being written to, and we
1731 * are either in overwrite mode or the buffer is
1732 * not full. It's safe to write in this new
1733 * subbuffer.
1734 */
1735 }
1736 } else {
64493e4f
MD
1737 unsigned long nr_lost;
1738
852c2936
MD
1739 /*
1740 * Next subbuffer reserve offset does not match the
1793b0b5
MD
1741 * commit offset, and this did not involve update to the
1742 * reserve counter. Drop record in producer-consumer and
852c2936
MD
1743 * overwrite mode. Caused by either a writer OOPS or too
1744 * many nested writes over a reserve/commit pair.
1745 */
64493e4f 1746 nr_lost = v_read(config, &buf->records_lost_wrap);
852c2936 1747 v_inc(config, &buf->records_lost_wrap);
64493e4f
MD
1748 if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
1749 DBG("%lu or more records lost in (%s:%d) (wrap-around)\n",
1750 nr_lost + 1, chan->backend.name,
1751 buf->backend.cpu);
1752 }
852c2936
MD
1753 return -EIO;
1754 }
1755 offsets->size =
1756 config->cb.record_header_size(config, chan,
1757 offsets->begin,
1758 &offsets->pre_header_padding,
1759 ctx);
1760 offsets->size +=
1761 lib_ring_buffer_align(offsets->begin + offsets->size,
1762 ctx->largest_align)
1763 + ctx->data_size;
b5a3dfa5 1764 if (caa_unlikely(subbuf_offset(offsets->begin, chan)
852c2936 1765 + offsets->size > chan->backend.subbuf_size)) {
64493e4f
MD
1766 unsigned long nr_lost;
1767
852c2936
MD
1768 /*
1769 * Record too big for subbuffers, report error, don't
1770 * complete the sub-buffer switch.
1771 */
64493e4f 1772 nr_lost = v_read(config, &buf->records_lost_big);
852c2936 1773 v_inc(config, &buf->records_lost_big);
64493e4f
MD
1774 if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
1775 DBG("%lu or more records lost in (%s:%d) record size "
1776 " of %zu bytes is too large for buffer\n",
1777 nr_lost + 1, chan->backend.name,
1778 buf->backend.cpu, offsets->size);
1779 }
852c2936
MD
1780 return -ENOSPC;
1781 } else {
1782 /*
1783 * We just made a successful buffer switch and the
1784 * record fits in the new subbuffer. Let's write.
1785 */
1786 }
1787 } else {
1788 /*
1789 * Record fits in the current buffer and we are not on a switch
1790 * boundary. It's safe to write.
1791 */
1792 }
1793 offsets->end = offsets->begin + offsets->size;
1794
b5a3dfa5 1795 if (caa_unlikely(subbuf_offset(offsets->end, chan) == 0)) {
852c2936
MD
1796 /*
1797 * The offset_end will fall at the very beginning of the next
1798 * subbuffer.
1799 */
1800 offsets->switch_new_end = 1; /* For offsets->begin */
1801 }
1802 return 0;
1803}
1804
1805/**
1806 * lib_ring_buffer_reserve_slow - Atomic slot reservation in a buffer.
1807 * @ctx: ring buffer context.
1808 *
1809 * Return : -NOBUFS if not enough space, -ENOSPC if event size too large,
1810 * -EIO for other errors, else returns 0.
1811 * It will take care of sub-buffer switching.
1812 */
4cfec15c 1813int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx)
852c2936
MD
1814{
1815 struct channel *chan = ctx->chan;
38fae1d3 1816 struct lttng_ust_shm_handle *handle = ctx->handle;
4cfec15c
MD
1817 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1818 struct lttng_ust_lib_ring_buffer *buf;
852c2936
MD
1819 struct switch_offsets offsets;
1820 int ret;
1821
1822 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
1d498196 1823 buf = shmp(handle, chan->backend.buf[ctx->cpu].shmp);
852c2936 1824 else
1d498196 1825 buf = shmp(handle, chan->backend.buf[0].shmp);
852c2936
MD
1826 ctx->buf = buf;
1827
1828 offsets.size = 0;
1829
1830 do {
1831 ret = lib_ring_buffer_try_reserve_slow(buf, chan, &offsets,
1832 ctx);
b5a3dfa5 1833 if (caa_unlikely(ret))
852c2936 1834 return ret;
b5a3dfa5 1835 } while (caa_unlikely(v_cmpxchg(config, &buf->offset, offsets.old,
852c2936
MD
1836 offsets.end)
1837 != offsets.old));
1838
1839 /*
1840 * Atomically update last_tsc. This update races against concurrent
1841 * atomic updates, but the race will always cause supplementary full TSC
1842 * records, never the opposite (missing a full TSC record when it would
1843 * be needed).
1844 */
1845 save_last_tsc(config, buf, ctx->tsc);
1846
1847 /*
1848 * Push the reader if necessary
1849 */
1850 lib_ring_buffer_reserve_push_reader(buf, chan, offsets.end - 1);
1851
1852 /*
1853 * Clear noref flag for this subbuffer.
1854 */
1855 lib_ring_buffer_clear_noref(config, &buf->backend,
1d498196
MD
1856 subbuf_index(offsets.end - 1, chan),
1857 handle);
852c2936
MD
1858
1859 /*
1860 * Switch old subbuffer if needed.
1861 */
b5a3dfa5 1862 if (caa_unlikely(offsets.switch_old_end)) {
852c2936 1863 lib_ring_buffer_clear_noref(config, &buf->backend,
1d498196
MD
1864 subbuf_index(offsets.old - 1, chan),
1865 handle);
1866 lib_ring_buffer_switch_old_end(buf, chan, &offsets, ctx->tsc, handle);
852c2936
MD
1867 }
1868
1869 /*
1870 * Populate new subbuffer.
1871 */
b5a3dfa5 1872 if (caa_unlikely(offsets.switch_new_start))
1d498196 1873 lib_ring_buffer_switch_new_start(buf, chan, &offsets, ctx->tsc, handle);
852c2936 1874
b5a3dfa5 1875 if (caa_unlikely(offsets.switch_new_end))
1d498196 1876 lib_ring_buffer_switch_new_end(buf, chan, &offsets, ctx->tsc, handle);
852c2936
MD
1877
1878 ctx->slot_size = offsets.size;
1879 ctx->pre_offset = offsets.begin;
1880 ctx->buf_offset = offsets.begin + offsets.pre_header_padding;
1881 return 0;
1882}
f645cfa7
MD
1883
1884/*
1885 * Force a read (imply TLS fixup for dlopen) of TLS variables.
1886 */
1887void lttng_fixup_ringbuffer_tls(void)
1888{
8c90a710 1889 asm volatile ("" : : "m" (URCU_TLS(lib_ring_buffer_nesting)));
f645cfa7 1890}
03d2d293
MD
1891
1892void lib_ringbuffer_signal_init(void)
1893{
1894 sigset_t mask;
1895 int ret;
1896
1897 /*
1898 * Block signal for entire process, so only our thread processes
1899 * it.
1900 */
1901 rb_setmask(&mask);
1902 ret = pthread_sigmask(SIG_BLOCK, &mask, NULL);
1903 if (ret) {
1904 errno = ret;
1905 PERROR("pthread_sigmask");
1906 }
1907}
This page took 0.119116 seconds and 4 git commands to generate.