libust/buffers.c

   1 /*
   2  * buffers.c
   3  * LTTng userspace tracer buffering system
   4  *
   5  * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
   6  * Copyright (C) 2008-2011 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
  21  */
  22
  23 /*
  24  * Note: this code does not support the ref/noref flag and reader-owned
  25  * subbuffer scheme needed for flight recorder mode.
  26  */
  27
  28 #include <unistd.h>
  29 #include <sys/mman.h>
  30 #include <sys/ipc.h>
  31 #include <sys/shm.h>
  32 #include <fcntl.h>
  33 #include <stdlib.h>
  34
  35 #include <ust/clock.h>
  36
  37 #include "buffers.h"
  38 #include "channels.h"
  39 #include "tracer.h"
  40 #include "tracercore.h"
  41 #include "usterr_signal_safe.h"
  42
  43 struct ltt_reserve_switch_offsets {
  44         long begin, end, old;
  45         long begin_switch, end_switch_current, end_switch_old;
  46         size_t before_hdr_pad, size;
  47 };
  48
  49
  50 static DEFINE_MUTEX(ust_buffers_channels_mutex);
  51 static CDS_LIST_HEAD(ust_buffers_channels);
  52
  53 static void ltt_force_switch(struct ust_buffer *buf,
  54                 enum force_switch_mode mode);
  55
  56 static int get_n_cpus(void)
  57 {
  58         int result;
  59         static int n_cpus = 0;
  60
  61         if(!n_cpus) {
  62                 /* On Linux, when some processors are offline
  63                  * _SC_NPROCESSORS_CONF counts the offline
  64                  * processors, whereas _SC_NPROCESSORS_ONLN
  65                  * does not. If we used _SC_NPROCESSORS_ONLN,
  66                  * getcpu() could return a value greater than
  67                  * this sysconf, in which case the arrays
  68                  * indexed by processor would overflow.
  69                  */
  70                 result = sysconf(_SC_NPROCESSORS_CONF);
  71                 if(result == -1) {
  72                         return -1;
  73                 }
  74
  75                 n_cpus = result;
  76         }
  77
  78         return n_cpus;
  79 }
  80
  81 /**
  82  * _ust_buffers_strncpy_fixup - Fix an incomplete string in a ltt_relay buffer.
  83  * @buf : buffer
  84  * @offset : offset within the buffer
  85  * @len : length to write
  86  * @copied: string actually copied
  87  * @terminated: does string end with \0
  88  *
  89  * Fills string with "X" if incomplete.
  90  */
  91 void _ust_buffers_strncpy_fixup(struct ust_buffer *buf, size_t offset,
  92                                 size_t len, size_t copied, int terminated)
  93 {
  94         size_t buf_offset, cpy;
  95
  96         if (copied == len) {
  97                 /*
  98                  * Deal with non-terminated string.
  99                  */
 100                 assert(!terminated);
 101                 offset += copied - 1;
 102                 buf_offset = BUFFER_OFFSET(offset, buf->chan);
 103                 /*
 104                  * Underlying layer should never ask for writes across
 105                  * subbuffers.
 106                  */
 107                 assert(buf_offset
 108                        < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 109                 ust_buffers_do_memset(buf->buf_data + buf_offset, '\0', 1);
 110                 return;
 111         }
 112
 113         /*
 114          * Deal with incomplete string.
 115          * Overwrite string's \0 with X too.
 116          */
 117         cpy = copied - 1;
 118         assert(terminated);
 119         len -= cpy;
 120         offset += cpy;
 121         buf_offset = BUFFER_OFFSET(offset, buf->chan);
 122
 123         /*
 124          * Underlying layer should never ask for writes across subbuffers.
 125          */
 126         assert(buf_offset
 127                < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 128
 129         ust_buffers_do_memset(buf->buf_data + buf_offset,
 130                               'X', len);
 131
 132         /*
 133          * Overwrite last 'X' with '\0'.
 134          */
 135         offset += len - 1;
 136         buf_offset = BUFFER_OFFSET(offset, buf->chan);
 137         /*
 138          * Underlying layer should never ask for writes across subbuffers.
 139          */
 140         assert(buf_offset
 141                < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 142         ust_buffers_do_memset(buf->buf_data + buf_offset, '\0', 1);
 143 }
 144
 145 static void ltt_buffer_begin(struct ust_buffer *buf,
 146                              u64 tsc, unsigned int subbuf_idx)
 147 {
 148         struct ust_channel *channel = buf->chan;
 149         struct ltt_subbuffer_header *header =
 150                 (struct ltt_subbuffer_header *)
 151                         ust_buffers_offset_address(buf,
 152                                 subbuf_idx * buf->chan->subbuf_size);
 153
 154         header->cycle_count_begin = tsc;
 155         header->data_size = 0xFFFFFFFF; /* for recognizing crashed buffers */
 156         header->sb_size = 0xFFFFFFFF; /* for recognizing crashed buffers */
 157         /*
 158          * No memory barrier needed to order data_data/sb_size vs commit count
 159          * update, because commit count update contains a compiler barrier that
 160          * ensures the order of the writes are OK from a program POV. It only
 161          * matters for crash dump recovery which is not executed concurrently,
 162          * so memory write order does not matter.
 163          */
 164         ltt_write_trace_header(channel->trace, header);
 165 }
 166
 167 static int map_buf_data(struct ust_buffer *buf, size_t *size)
 168 {
 169         void *ptr;
 170         int result;
 171
 172         *size = PAGE_ALIGN(*size);
 173
 174         result = buf->shmid = shmget(getpid(), *size, IPC_CREAT | IPC_EXCL | 0700);
 175         if (result < 0 && errno == EINVAL) {
 176                 ERR("shmget() returned EINVAL; maybe /proc/sys/kernel/shmmax should be increased.");
 177                 return -1;
 178         } else if (result < 0) {
 179                 PERROR("shmget");
 180                 return -1;
 181         }
 182
 183         ptr = shmat(buf->shmid, NULL, 0);
 184         if (ptr == (void *) -1) {
 185                 perror("shmat");
 186                 goto destroy_shmem;
 187         }
 188
 189         /* Already mark the shared memory for destruction. This will occur only
 190          * when all users have detached.
 191          */
 192         result = shmctl(buf->shmid, IPC_RMID, NULL);
 193         if(result == -1) {
 194                 perror("shmctl");
 195                 return -1;
 196         }
 197
 198         buf->buf_data = ptr;
 199         buf->buf_size = *size;
 200
 201         return 0;
 202
 203 destroy_shmem:
 204         result = shmctl(buf->shmid, IPC_RMID, NULL);
 205         if(result == -1) {
 206                 perror("shmctl");
 207         }
 208
 209         return -1;
 210 }
 211
 212 static int open_buf(struct ust_channel *chan, int cpu)
 213 {
 214         int result, fds[2];
 215         unsigned int j;
 216         struct ust_trace *trace = chan->trace;
 217         struct ust_buffer *buf = chan->buf[cpu];
 218         unsigned int n_subbufs = chan->subbuf_cnt;
 219
 220
 221         result = map_buf_data(buf, &chan->alloc_size);
 222         if (result < 0)
 223                 return -1;
 224
 225         buf->commit_count =
 226                 zmalloc(sizeof(*buf->commit_count) * n_subbufs);
 227         if (!buf->commit_count)
 228                 goto unmap_buf;
 229
 230         result = pipe(fds);
 231         if (result < 0) {
 232                 PERROR("pipe");
 233                 goto free_commit_count;
 234         }
 235         buf->data_ready_fd_read = fds[0];
 236         buf->data_ready_fd_write = fds[1];
 237
 238         buf->cpu = cpu;
 239         buf->chan = chan;
 240
 241         uatomic_set(&buf->offset, ltt_subbuffer_header_size());
 242         uatomic_set(&buf->consumed, 0);
 243         uatomic_set(&buf->active_readers, 0);
 244         for (j = 0; j < n_subbufs; j++) {
 245                 uatomic_set(&buf->commit_count[j].cc, 0);
 246                 uatomic_set(&buf->commit_count[j].cc_sb, 0);
 247         }
 248
 249         ltt_buffer_begin(buf, trace->start_tsc, 0);
 250
 251         uatomic_add(&buf->commit_count[0].cc, ltt_subbuffer_header_size());
 252
 253         uatomic_set(&buf->events_lost, 0);
 254         uatomic_set(&buf->corrupted_subbuffers, 0);
 255
 256         memset(buf->commit_seq, 0, sizeof(buf->commit_seq[0]) * n_subbufs);
 257
 258         return 0;
 259
 260 free_commit_count:
 261         free(buf->commit_count);
 262
 263 unmap_buf:
 264         if (shmdt(buf->buf_data) < 0) {
 265                 PERROR("shmdt failed");
 266         }
 267
 268         return -1;
 269 }
 270
 271 static void close_buf(struct ust_buffer *buf)
 272 {
 273         int result;
 274
 275         result = shmdt(buf->buf_data);
 276         if (result < 0) {
 277                 PERROR("shmdt");
 278         }
 279
 280         result = close(buf->data_ready_fd_read);
 281         if (result < 0) {
 282                 PERROR("close");
 283         }
 284
 285         result = close(buf->data_ready_fd_write);
 286         if (result < 0 && errno != EBADF) {
 287                 PERROR("close");
 288         }
 289 }
 290
 291
 292 static int open_channel(struct ust_channel *chan, size_t subbuf_size,
 293                         size_t subbuf_cnt)
 294 {
 295         int i;
 296         int result;
 297
 298         if(subbuf_size == 0 || subbuf_cnt == 0)
 299                 return -1;
 300
 301         /* Check that the subbuffer size is larger than a page. */
 302         WARN_ON_ONCE(subbuf_size < PAGE_SIZE);
 303
 304         /*
 305          * Make sure the number of subbuffers and subbuffer size are power of 2.
 306          */
 307         WARN_ON_ONCE(hweight32(subbuf_size) != 1);
 308         WARN_ON(hweight32(subbuf_cnt) != 1);
 309
 310         chan->version = UST_CHANNEL_VERSION;
 311         chan->subbuf_cnt = subbuf_cnt;
 312         chan->subbuf_size = subbuf_size;
 313         chan->subbuf_size_order = get_count_order(subbuf_size);
 314         chan->alloc_size = subbuf_size * subbuf_cnt;
 315
 316         pthread_mutex_lock(&ust_buffers_channels_mutex);
 317         for (i=0; i < chan->n_cpus; i++) {
 318                 result = open_buf(chan, i);
 319                 if (result == -1)
 320                         goto error;
 321         }
 322         cds_list_add(&chan->list, &ust_buffers_channels);
 323         pthread_mutex_unlock(&ust_buffers_channels_mutex);
 324
 325         return 0;
 326
 327         /* Error handling */
 328 error:
 329         for(i--; i >= 0; i--)
 330                 close_buf(chan->buf[i]);
 331
 332         pthread_mutex_unlock(&ust_buffers_channels_mutex);
 333         return -1;
 334 }
 335
 336 static void close_channel(struct ust_channel *chan)
 337 {
 338         int i;
 339         if(!chan)
 340                 return;
 341
 342         pthread_mutex_lock(&ust_buffers_channels_mutex);
 343         /*
 344          * checking for chan->buf[i] being NULL or not is useless in
 345          * practice because we allocate buffers for all possible cpus.
 346          * However, should we decide to change this and only allocate
 347          * for online cpus, this check becomes useful.
 348          */
 349         for (i=0; i<chan->n_cpus; i++) {
 350                 if (chan->buf[i])
 351                         close_buf(chan->buf[i]);
 352         }
 353
 354         cds_list_del(&chan->list);
 355
 356         pthread_mutex_unlock(&ust_buffers_channels_mutex);
 357 }
 358
 359 /*
 360  * offset is assumed to never be 0 here : never deliver a completely empty
 361  * subbuffer. The lost size is between 0 and subbuf_size-1.
 362  */
 363 static notrace void ltt_buffer_end(struct ust_buffer *buf,
 364                 u64 tsc, unsigned int offset, unsigned int subbuf_idx)
 365 {
 366         struct ltt_subbuffer_header *header =
 367                 (struct ltt_subbuffer_header *)
 368                         ust_buffers_offset_address(buf,
 369                                 subbuf_idx * buf->chan->subbuf_size);
 370         u32 data_size = SUBBUF_OFFSET(offset - 1, buf->chan) + 1;
 371
 372         header->sb_size = PAGE_ALIGN(data_size);
 373         header->cycle_count_end = tsc;
 374         header->events_lost = uatomic_read(&buf->events_lost);
 375         header->subbuf_corrupt = uatomic_read(&buf->corrupted_subbuffers);
 376         if(unlikely(header->events_lost > 0)) {
 377                 DBG("Some events (%d) were lost in %s_%d", header->events_lost, buf->chan->channel_name, buf->cpu);
 378         }
 379         /*
 380          * Makes sure data_size write happens after write of the rest of the
 381          * buffer end data, because data_size is used to identify a completely
 382          * written subbuffer in a crash dump.
 383          */
 384         cmm_barrier();
 385         header->data_size = data_size;
 386 }
 387
 388 /*
 389  * This function should not be called from NMI interrupt context
 390  */
 391 static notrace void ltt_buf_unfull(struct ust_buffer *buf,
 392                 unsigned int subbuf_idx,
 393                 long offset)
 394 {
 395 }
 396
 397 /*
 398  * Promote compiler cmm_barrier to a smp_mb().
 399  * For the specific LTTng case, this IPI call should be removed if the
 400  * architecture does not reorder writes.  This should eventually be provided by
 401  * a separate architecture-specific infrastructure.
 402  */
 403 //ust// static void remote_mb(void *info)
 404 //ust// {
 405 //ust//         smp_mb();
 406 //ust// }
 407
 408 int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed)
 409 {
 410         struct ust_channel *channel = buf->chan;
 411         long consumed_old, consumed_idx, commit_count, write_offset;
 412 //ust// int retval;
 413
 414         consumed_old = uatomic_read(&buf->consumed);
 415         consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
 416         commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb);
 417         /*
 418          * Make sure we read the commit count before reading the buffer
 419          * data and the write offset. Correct consumed offset ordering
 420          * wrt commit count is insured by the use of cmpxchg to update
 421          * the consumed offset.
 422          */
 423
 424         /*
 425          * Local rmb to match the remote wmb to read the commit count before the
 426          * buffer data and the write offset.
 427          */
 428         cmm_smp_rmb();
 429
 430         write_offset = uatomic_read(&buf->offset);
 431         /*
 432          * Check that the subbuffer we are trying to consume has been
 433          * already fully committed.
 434          */
 435         if (((commit_count - buf->chan->subbuf_size)
 436              & channel->commit_count_mask)
 437             - (BUFFER_TRUNC(consumed_old, buf->chan)
 438                >> channel->n_subbufs_order)
 439             != 0) {
 440                 return -EAGAIN;
 441         }
 442         /*
 443          * Check that we are not about to read the same subbuffer in
 444          * which the writer head is.
 445          */
 446         if ((SUBBUF_TRUNC(write_offset, buf->chan)
 447            - SUBBUF_TRUNC(consumed_old, buf->chan))
 448            == 0) {
 449                 return -EAGAIN;
 450         }
 451         *consumed = consumed_old;
 452
 453         return 0;
 454 }
 455
 456 int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old)
 457 {
 458         long consumed_new, consumed_old;
 459
 460         consumed_old = uatomic_read(&buf->consumed);
 461         consumed_old = consumed_old & (~0xFFFFFFFFL);
 462         consumed_old = consumed_old | uconsumed_old;
 463         consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
 464
 465         if (uatomic_cmpxchg(&buf->consumed, consumed_old,
 466                                 consumed_new)
 467             != consumed_old) {
 468                 /* We have been pushed by the writer : the last
 469                  * buffer read _is_ corrupted! It can also
 470                  * happen if this is a buffer we never got. */
 471                 return -EIO;
 472         } else {
 473                 /* tell the client that buffer is now unfull */
 474                 int index;
 475                 long data;
 476                 index = SUBBUF_INDEX(consumed_old, buf->chan);
 477                 data = BUFFER_OFFSET(consumed_old, buf->chan);
 478                 ltt_buf_unfull(buf, index, data);
 479         }
 480         return 0;
 481 }
 482
 483 static int map_buf_structs(struct ust_channel *chan)
 484 {
 485         void *ptr;
 486         int result;
 487         size_t size;
 488         int i;
 489
 490         size = PAGE_ALIGN(1);
 491
 492         for(i=0; i<chan->n_cpus; i++) {
 493
 494                 result = chan->buf_struct_shmids[i] = shmget(getpid(), size, IPC_CREAT | IPC_EXCL | 0700);
 495                 if(result == -1) {
 496                         PERROR("shmget");
 497                         goto destroy_previous;
 498                 }
 499
 500                 ptr = shmat(chan->buf_struct_shmids[i], NULL, 0);
 501                 if(ptr == (void *) -1) {
 502                         perror("shmat");
 503                         goto destroy_shm;
 504                 }
 505
 506                 /* Already mark the shared memory for destruction. This will occur only
 507                  * when all users have detached.
 508                  */
 509                 result = shmctl(chan->buf_struct_shmids[i], IPC_RMID, NULL);
 510                 if(result == -1) {
 511                         perror("shmctl");
 512                         goto destroy_previous;
 513                 }
 514
 515                 chan->buf[i] = ptr;
 516         }
 517
 518         return 0;
 519
 520         /* Jumping inside this loop occurs from within the other loop above with i as
 521          * counter, so it unallocates the structures for the cpu = current_i down to
 522          * zero. */
 523         for(; i>=0; i--) {
 524                 destroy_shm:
 525                 result = shmctl(chan->buf_struct_shmids[i], IPC_RMID, NULL);
 526                 if(result == -1) {
 527                         perror("shmctl");
 528                 }
 529
 530                 destroy_previous:
 531                 continue;
 532         }
 533
 534         return -1;
 535 }
 536
 537 static int unmap_buf_structs(struct ust_channel *chan)
 538 {
 539         int i;
 540
 541         for (i=0; i < chan->n_cpus; i++) {
 542                 if (shmdt(chan->buf[i]) < 0) {
 543                         PERROR("shmdt");
 544                 }
 545         }
 546         return 0;
 547 }
 548
 549 /*
 550  * Create channel.
 551  */
 552 static int create_channel(const char *trace_name, struct ust_trace *trace,
 553         const char *channel_name, struct ust_channel *chan,
 554         unsigned int subbuf_size, unsigned int n_subbufs, int overwrite)
 555 {
 556         int i, result;
 557
 558         chan->trace = trace;
 559         chan->overwrite = overwrite;
 560         chan->n_subbufs_order = get_count_order(n_subbufs);
 561         chan->commit_count_mask = (~0UL >> chan->n_subbufs_order);
 562         chan->n_cpus = get_n_cpus();
 563
 564         /* These mappings should ideall be per-cpu, if somebody can do that
 565          * from userspace, that would be cool!
 566          */
 567         chan->buf = (void *) zmalloc(chan->n_cpus * sizeof(void *));
 568         if(chan->buf == NULL) {
 569                 goto error;
 570         }
 571         chan->buf_struct_shmids = (int *) zmalloc(chan->n_cpus * sizeof(int));
 572         if(chan->buf_struct_shmids == NULL)
 573                 goto free_buf;
 574
 575         result = map_buf_structs(chan);
 576         if(result != 0) {
 577                 goto free_buf_struct_shmids;
 578         }
 579
 580         result = open_channel(chan, subbuf_size, n_subbufs);
 581         if (result != 0) {
 582                 ERR("Cannot open channel for trace %s", trace_name);
 583                 goto unmap_buf_structs;
 584         }
 585
 586         return 0;
 587
 588 unmap_buf_structs:
 589         for (i=0; i < chan->n_cpus; i++) {
 590                 if (shmdt(chan->buf[i]) < 0) {
 591                         PERROR("shmdt bufstruct");
 592                 }
 593         }
 594
 595 free_buf_struct_shmids:
 596         free(chan->buf_struct_shmids);
 597
 598 free_buf:
 599         free(chan->buf);
 600
 601 error:
 602         return -1;
 603 }
 604
 605
 606 static void remove_channel(struct ust_channel *chan)
 607 {
 608         close_channel(chan);
 609
 610         unmap_buf_structs(chan);
 611
 612         free(chan->buf_struct_shmids);
 613
 614         free(chan->buf);
 615 }
 616
 617 static void ltt_relay_async_wakeup_chan(struct ust_channel *ltt_channel)
 618 {
 619 }
 620
 621 static void ltt_relay_finish_buffer(struct ust_channel *channel, unsigned int cpu)
 622 {
 623         if (channel->buf[cpu]) {
 624                 struct ust_buffer *buf = channel->buf[cpu];
 625                 ltt_force_switch(buf, FORCE_FLUSH);
 626
 627                 /* closing the pipe tells the consumer the buffer is finished */
 628                 close(buf->data_ready_fd_write);
 629         }
 630 }
 631
 632
 633 static void finish_channel(struct ust_channel *channel)
 634 {
 635         unsigned int i;
 636
 637         for (i=0; i<channel->n_cpus; i++) {
 638                 ltt_relay_finish_buffer(channel, i);
 639         }
 640 }
 641
 642
 643 /*
 644  * ltt_reserve_switch_old_subbuf: switch old subbuffer
 645  *
 646  * Concurrency safe because we are the last and only thread to alter this
 647  * sub-buffer. As long as it is not delivered and read, no other thread can
 648  * alter the offset, alter the reserve_count or call the
 649  * client_buffer_end_callback on this sub-buffer.
 650  *
 651  * The only remaining threads could be the ones with pending commits. They will
 652  * have to do the deliver themselves.  Not concurrency safe in overwrite mode.
 653  * We detect corrupted subbuffers with commit and reserve counts. We keep a
 654  * corrupted sub-buffers count and push the readers across these sub-buffers.
 655  *
 656  * Not concurrency safe if a writer is stalled in a subbuffer and another writer
 657  * switches in, finding out it's corrupted.  The result will be than the old
 658  * (uncommited) subbuffer will be declared corrupted, and that the new subbuffer
 659  * will be declared corrupted too because of the commit count adjustment.
 660  *
 661  * Note : offset_old should never be 0 here.
 662  */
 663 static void ltt_reserve_switch_old_subbuf(
 664                 struct ust_channel *chan, struct ust_buffer *buf,
 665                 struct ltt_reserve_switch_offsets *offsets, u64 *tsc)
 666 {
 667         long oldidx = SUBBUF_INDEX(offsets->old - 1, chan);
 668         long commit_count, padding_size;
 669
 670         padding_size = chan->subbuf_size
 671                         - (SUBBUF_OFFSET(offsets->old - 1, chan) + 1);
 672         ltt_buffer_end(buf, *tsc, offsets->old, oldidx);
 673
 674         /*
 675          * Must write slot data before incrementing commit count.
 676          * This compiler barrier is upgraded into a cmm_smp_wmb() by the IPI
 677          * sent by get_subbuf() when it does its cmm_smp_rmb().
 678          */
 679         cmm_smp_wmb();
 680         uatomic_add(&buf->commit_count[oldidx].cc, padding_size);
 681         commit_count = uatomic_read(&buf->commit_count[oldidx].cc);
 682         ltt_check_deliver(chan, buf, offsets->old - 1, commit_count, oldidx);
 683         ltt_write_commit_counter(chan, buf, oldidx,
 684                 offsets->old, commit_count, padding_size);
 685 }
 686
 687 /*
 688  * ltt_reserve_switch_new_subbuf: Populate new subbuffer.
 689  *
 690  * This code can be executed unordered : writers may already have written to the
 691  * sub-buffer before this code gets executed, caution.  The commit makes sure
 692  * that this code is executed before the deliver of this sub-buffer.
 693  */
 694 static void ltt_reserve_switch_new_subbuf(
 695                 struct ust_channel *chan, struct ust_buffer *buf,
 696                 struct ltt_reserve_switch_offsets *offsets, u64 *tsc)
 697 {
 698         long beginidx = SUBBUF_INDEX(offsets->begin, chan);
 699         long commit_count;
 700
 701         ltt_buffer_begin(buf, *tsc, beginidx);
 702
 703         /*
 704          * Must write slot data before incrementing commit count.
 705          * This compiler barrier is upgraded into a cmm_smp_wmb() by the IPI
 706          * sent by get_subbuf() when it does its cmm_smp_rmb().
 707          */
 708         cmm_smp_wmb();
 709         uatomic_add(&buf->commit_count[beginidx].cc, ltt_subbuffer_header_size());
 710         commit_count = uatomic_read(&buf->commit_count[beginidx].cc);
 711         /* Check if the written buffer has to be delivered */
 712         ltt_check_deliver(chan, buf, offsets->begin, commit_count, beginidx);
 713         ltt_write_commit_counter(chan, buf, beginidx,
 714                 offsets->begin, commit_count, ltt_subbuffer_header_size());
 715 }
 716
 717 /*
 718  * ltt_reserve_end_switch_current: finish switching current subbuffer
 719  *
 720  * Concurrency safe because we are the last and only thread to alter this
 721  * sub-buffer. As long as it is not delivered and read, no other thread can
 722  * alter the offset, alter the reserve_count or call the
 723  * client_buffer_end_callback on this sub-buffer.
 724  *
 725  * The only remaining threads could be the ones with pending commits. They will
 726  * have to do the deliver themselves.  Not concurrency safe in overwrite mode.
 727  * We detect corrupted subbuffers with commit and reserve counts. We keep a
 728  * corrupted sub-buffers count and push the readers across these sub-buffers.
 729  *
 730  * Not concurrency safe if a writer is stalled in a subbuffer and another writer
 731  * switches in, finding out it's corrupted.  The result will be than the old
 732  * (uncommited) subbuffer will be declared corrupted, and that the new subbuffer
 733  * will be declared corrupted too because of the commit count adjustment.
 734  */
 735 static void ltt_reserve_end_switch_current(
 736                 struct ust_channel *chan,
 737                 struct ust_buffer *buf,
 738                 struct ltt_reserve_switch_offsets *offsets, u64 *tsc)
 739 {
 740         long endidx = SUBBUF_INDEX(offsets->end - 1, chan);
 741         long commit_count, padding_size;
 742
 743         padding_size = chan->subbuf_size
 744                         - (SUBBUF_OFFSET(offsets->end - 1, chan) + 1);
 745
 746         ltt_buffer_end(buf, *tsc, offsets->end, endidx);
 747
 748         /*
 749          * Must write slot data before incrementing commit count.
 750          * This compiler barrier is upgraded into a cmm_smp_wmb() by the IPI
 751          * sent by get_subbuf() when it does its cmm_smp_rmb().
 752          */
 753         cmm_smp_wmb();
 754         uatomic_add(&buf->commit_count[endidx].cc, padding_size);
 755         commit_count = uatomic_read(&buf->commit_count[endidx].cc);
 756         ltt_check_deliver(chan, buf,
 757                 offsets->end - 1, commit_count, endidx);
 758         ltt_write_commit_counter(chan, buf, endidx,
 759                 offsets->end, commit_count, padding_size);
 760 }
 761
 762 /*
 763  * Returns :
 764  * 0 if ok
 765  * !0 if execution must be aborted.
 766  */
 767 static int ltt_relay_try_switch_slow(
 768                 enum force_switch_mode mode,
 769                 struct ust_channel *chan,
 770                 struct ust_buffer *buf,
 771                 struct ltt_reserve_switch_offsets *offsets,
 772                 u64 *tsc)
 773 {
 774         long subbuf_index;
 775         long reserve_commit_diff;
 776
 777         offsets->begin = uatomic_read(&buf->offset);
 778         offsets->old = offsets->begin;
 779         offsets->begin_switch = 0;
 780         offsets->end_switch_old = 0;
 781
 782         *tsc = trace_clock_read64();
 783
 784         if (SUBBUF_OFFSET(offsets->begin, buf->chan) != 0) {
 785                 offsets->begin = SUBBUF_ALIGN(offsets->begin, buf->chan);
 786                 offsets->end_switch_old = 1;
 787         } else {
 788                 /* we do not have to switch : buffer is empty */
 789                 return -1;
 790         }
 791         if (mode == FORCE_ACTIVE)
 792                 offsets->begin += ltt_subbuffer_header_size();
 793         /*
 794          * Always begin_switch in FORCE_ACTIVE mode.
 795          * Test new buffer integrity
 796          */
 797         subbuf_index = SUBBUF_INDEX(offsets->begin, buf->chan);
 798         reserve_commit_diff =
 799                 (BUFFER_TRUNC(offsets->begin, buf->chan)
 800                  >> chan->n_subbufs_order)
 801                 - (uatomic_read(&buf->commit_count[subbuf_index].cc_sb)
 802                         & chan->commit_count_mask);
 803         if (reserve_commit_diff == 0) {
 804                 /* Next buffer not corrupted. */
 805                 if (mode == FORCE_ACTIVE
 806                     && !chan->overwrite
 807                     && offsets->begin - uatomic_read(&buf->consumed)
 808                        >= chan->alloc_size) {
 809                         /*
 810                          * We do not overwrite non consumed buffers and we are
 811                          * full : ignore switch while tracing is active.
 812                          */
 813                         return -1;
 814                 }
 815         } else {
 816                 /*
 817                  * Next subbuffer corrupted. Force pushing reader even in normal
 818                  * mode
 819                  */
 820         }
 821         offsets->end = offsets->begin;
 822         return 0;
 823 }
 824
 825 /*
 826  * Force a sub-buffer switch for a per-cpu buffer. This operation is
 827  * completely reentrant : can be called while tracing is active with
 828  * absolutely no lock held.
 829  */
 830 void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
 831                 enum force_switch_mode mode)
 832 {
 833         struct ust_channel *chan = buf->chan;
 834         struct ltt_reserve_switch_offsets offsets;
 835         u64 tsc;
 836
 837         offsets.size = 0;
 838
 839         DBG("Switching (forced) %s_%d", chan->channel_name, buf->cpu);
 840         /*
 841          * Perform retryable operations.
 842          */
 843         do {
 844                 if (ltt_relay_try_switch_slow(mode, chan, buf,
 845                                 &offsets, &tsc))
 846                         return;
 847         } while (uatomic_cmpxchg(&buf->offset, offsets.old,
 848                         offsets.end) != offsets.old);
 849
 850         /*
 851          * Atomically update last_tsc. This update races against concurrent
 852          * atomic updates, but the race will always cause supplementary full TSC
 853          * events, never the opposite (missing a full TSC event when it would be
 854          * needed).
 855          */
 856         save_last_tsc(buf, tsc);
 857
 858         /*
 859          * Push the reader if necessary
 860          */
 861         if (mode == FORCE_ACTIVE) {
 862                 ltt_reserve_push_reader(chan, buf, offsets.end - 1);
 863         }
 864
 865         /*
 866          * Switch old subbuffer if needed.
 867          */
 868         if (offsets.end_switch_old) {
 869                 ltt_reserve_switch_old_subbuf(chan, buf, &offsets, &tsc);
 870         }
 871
 872         /*
 873          * Populate new subbuffer.
 874          */
 875         if (mode == FORCE_ACTIVE)
 876                 ltt_reserve_switch_new_subbuf(chan, buf, &offsets, &tsc);
 877 }
 878
 879 /*
 880  * Returns :
 881  * 0 if ok
 882  * !0 if execution must be aborted.
 883  */
 884 static int ltt_relay_try_reserve_slow(struct ust_channel *chan, struct ust_buffer *buf,
 885                 struct ltt_reserve_switch_offsets *offsets, size_t data_size,
 886                 u64 *tsc, unsigned int *rflags, int largest_align)
 887 {
 888         long reserve_commit_diff;
 889
 890         offsets->begin = uatomic_read(&buf->offset);
 891         offsets->old = offsets->begin;
 892         offsets->begin_switch = 0;
 893         offsets->end_switch_current = 0;
 894         offsets->end_switch_old = 0;
 895
 896         *tsc = trace_clock_read64();
 897         if (last_tsc_overflow(buf, *tsc))
 898                 *rflags = LTT_RFLAG_ID_SIZE_TSC;
 899
 900         if (unlikely(SUBBUF_OFFSET(offsets->begin, buf->chan) == 0)) {
 901                 offsets->begin_switch = 1;              /* For offsets->begin */
 902         } else {
 903                 offsets->size = ust_get_header_size(chan,
 904                                         offsets->begin, data_size,
 905                                         &offsets->before_hdr_pad, *rflags);
 906                 offsets->size += ltt_align(offsets->begin + offsets->size,
 907                                            largest_align)
 908                                  + data_size;
 909                 if (unlikely((SUBBUF_OFFSET(offsets->begin, buf->chan) +
 910                              offsets->size) > buf->chan->subbuf_size)) {
 911                         offsets->end_switch_old = 1;    /* For offsets->old */
 912                         offsets->begin_switch = 1;      /* For offsets->begin */
 913                 }
 914         }
 915         if (unlikely(offsets->begin_switch)) {
 916                 long subbuf_index;
 917
 918                 /*
 919                  * We are typically not filling the previous buffer completely.
 920                  */
 921                 if (likely(offsets->end_switch_old))
 922                         offsets->begin = SUBBUF_ALIGN(offsets->begin,
 923                                                       buf->chan);
 924                 offsets->begin = offsets->begin + ltt_subbuffer_header_size();
 925                 /* Test new buffer integrity */
 926                 subbuf_index = SUBBUF_INDEX(offsets->begin, buf->chan);
 927                 reserve_commit_diff =
 928                   (BUFFER_TRUNC(offsets->begin, buf->chan)
 929                    >> chan->n_subbufs_order)
 930                   - (uatomic_read(&buf->commit_count[subbuf_index].cc_sb)
 931                                 & chan->commit_count_mask);
 932                 if (likely(reserve_commit_diff == 0)) {
 933                         /* Next buffer not corrupted. */
 934                         if (unlikely(!chan->overwrite &&
 935                                 (SUBBUF_TRUNC(offsets->begin, buf->chan)
 936                                  - SUBBUF_TRUNC(uatomic_read(
 937                                                         &buf->consumed),
 938                                                 buf->chan))
 939                                 >= chan->alloc_size)) {
 940                                 /*
 941                                  * We do not overwrite non consumed buffers
 942                                  * and we are full : event is lost.
 943                                  */
 944                                 uatomic_inc(&buf->events_lost);
 945                                 return -1;
 946                         } else {
 947                                 /*
 948                                  * next buffer not corrupted, we are either in
 949                                  * overwrite mode or the buffer is not full.
 950                                  * It's safe to write in this new subbuffer.
 951                                  */
 952                         }
 953                 } else {
 954                         /*
 955                          * Next subbuffer corrupted. Drop event in normal and
 956                          * overwrite mode. Caused by either a writer OOPS or
 957                          * too many nested writes over a reserve/commit pair.
 958                          */
 959                         uatomic_inc(&buf->events_lost);
 960                         return -1;
 961                 }
 962                 offsets->size = ust_get_header_size(chan,
 963                                         offsets->begin, data_size,
 964                                         &offsets->before_hdr_pad, *rflags);
 965                 offsets->size += ltt_align(offsets->begin + offsets->size,
 966                                            largest_align)
 967                                  + data_size;
 968                 if (unlikely((SUBBUF_OFFSET(offsets->begin, buf->chan)
 969                              + offsets->size) > buf->chan->subbuf_size)) {
 970                         /*
 971                          * Event too big for subbuffers, report error, don't
 972                          * complete the sub-buffer switch.
 973                          */
 974                         uatomic_inc(&buf->events_lost);
 975                         return -1;
 976                 } else {
 977                         /*
 978                          * We just made a successful buffer switch and the event
 979                          * fits in the new subbuffer. Let's write.
 980                          */
 981                 }
 982         } else {
 983                 /*
 984                  * Event fits in the current buffer and we are not on a switch
 985                  * boundary. It's safe to write.
 986                  */
 987         }
 988         offsets->end = offsets->begin + offsets->size;
 989
 990         if (unlikely((SUBBUF_OFFSET(offsets->end, buf->chan)) == 0)) {
 991                 /*
 992                  * The offset_end will fall at the very beginning of the next
 993                  * subbuffer.
 994                  */
 995                 offsets->end_switch_current = 1;        /* For offsets->begin */
 996         }
 997         return 0;
 998 }
 999
1000 /**
1001  * ltt_relay_reserve_slot_lockless_slow - Atomic slot reservation in a buffer.
1002  * @trace: the trace structure to log to.
1003  * @ltt_channel: channel structure
1004  * @transport_data: data structure specific to ltt relay
1005  * @data_size: size of the variable length data to log.
1006  * @slot_size: pointer to total size of the slot (out)
1007  * @buf_offset : pointer to reserved buffer offset (out)
1008  * @tsc: pointer to the tsc at the slot reservation (out)
1009  * @cpu: cpuid
1010  *
1011  * Return : -ENOSPC if not enough space, else returns 0.
1012  * It will take care of sub-buffer switching.
1013  */
1014 int ltt_reserve_slot_lockless_slow(struct ust_channel *chan,
1015                 struct ust_trace *trace, size_t data_size,
1016                 int largest_align, int cpu,
1017                 struct ust_buffer **ret_buf,
1018                 size_t *slot_size, long *buf_offset,
1019                 u64 *tsc, unsigned int *rflags)
1020 {
1021         struct ust_buffer *buf = *ret_buf = chan->buf[cpu];
1022         struct ltt_reserve_switch_offsets offsets;
1023
1024         offsets.size = 0;
1025
1026         do {
1027                 if (unlikely(ltt_relay_try_reserve_slow(chan, buf, &offsets,
1028                                 data_size, tsc, rflags, largest_align)))
1029                         return -ENOSPC;
1030         } while (unlikely(uatomic_cmpxchg(&buf->offset, offsets.old,
1031                         offsets.end) != offsets.old));
1032
1033         /*
1034          * Atomically update last_tsc. This update races against concurrent
1035          * atomic updates, but the race will always cause supplementary full TSC
1036          * events, never the opposite (missing a full TSC event when it would be
1037          * needed).
1038          */
1039         save_last_tsc(buf, *tsc);
1040
1041         /*
1042          * Push the reader if necessary
1043          */
1044         ltt_reserve_push_reader(chan, buf, offsets.end - 1);
1045
1046         /*
1047          * Switch old subbuffer if needed.
1048          */
1049         if (unlikely(offsets.end_switch_old)) {
1050                 ltt_reserve_switch_old_subbuf(chan, buf, &offsets, tsc);
1051                 DBG("Switching %s_%d", chan->channel_name, cpu);
1052         }
1053
1054         /*
1055          * Populate new subbuffer.
1056          */
1057         if (unlikely(offsets.begin_switch))
1058                 ltt_reserve_switch_new_subbuf(chan, buf, &offsets, tsc);
1059
1060         if (unlikely(offsets.end_switch_current))
1061                 ltt_reserve_end_switch_current(chan, buf, &offsets, tsc);
1062
1063         *slot_size = offsets.size;
1064         *buf_offset = offsets.begin + offsets.before_hdr_pad;
1065         return 0;
1066 }
1067
1068 static struct ltt_transport ust_relay_transport = {
1069         .name = "ustrelay",
1070         .ops = {
1071                 .create_channel = create_channel,
1072                 .finish_channel = finish_channel,
1073                 .remove_channel = remove_channel,
1074                 .wakeup_channel = ltt_relay_async_wakeup_chan,
1075         },
1076 };
1077
1078 static char initialized = 0;
1079
1080 void __attribute__((constructor)) init_ustrelay_transport(void)
1081 {
1082         if(!initialized) {
1083                 ltt_transport_register(&ust_relay_transport);
1084                 initialized = 1;
1085         }
1086 }
1087
1088 static void __attribute__((destructor)) ust_buffers_exit(void)
1089 {
1090         ltt_transport_unregister(&ust_relay_transport);
1091 }
1092
1093 size_t ltt_write_event_header_slow(struct ust_channel *channel,
1094                 struct ust_buffer *buf, long buf_offset,
1095                 u16 eID, u32 event_size,
1096                 u64 tsc, unsigned int rflags)
1097 {
1098         struct ltt_event_header header;
1099         u16 small_size;
1100
1101         switch (rflags) {
1102         case LTT_RFLAG_ID_SIZE_TSC:
1103                 header.id_time = 29 << LTT_TSC_BITS;
1104                 break;
1105         case LTT_RFLAG_ID_SIZE:
1106                 header.id_time = 30 << LTT_TSC_BITS;
1107                 break;
1108         case LTT_RFLAG_ID:
1109                 header.id_time = 31 << LTT_TSC_BITS;
1110                 break;
1111         default:
1112                 WARN_ON_ONCE(1);
1113                 header.id_time = 0;
1114                 break;
1115         }
1116
1117         header.id_time |= (u32)tsc & LTT_TSC_MASK;
1118         ust_buffers_write(buf, buf_offset, &header, sizeof(header));
1119         buf_offset += sizeof(header);
1120
1121         switch (rflags) {
1122         case LTT_RFLAG_ID_SIZE_TSC:
1123                 small_size = (u16)min_t(u32, event_size, LTT_MAX_SMALL_SIZE);
1124                 ust_buffers_write(buf, buf_offset,
1125                         &eID, sizeof(u16));
1126                 buf_offset += sizeof(u16);
1127                 ust_buffers_write(buf, buf_offset,
1128                         &small_size, sizeof(u16));
1129                 buf_offset += sizeof(u16);
1130                 if (small_size == LTT_MAX_SMALL_SIZE) {
1131                         ust_buffers_write(buf, buf_offset,
1132                                 &event_size, sizeof(u32));
1133                         buf_offset += sizeof(u32);
1134                 }
1135                 buf_offset += ltt_align(buf_offset, sizeof(u64));
1136                 ust_buffers_write(buf, buf_offset,
1137                         &tsc, sizeof(u64));
1138                 buf_offset += sizeof(u64);
1139                 break;
1140         case LTT_RFLAG_ID_SIZE:
1141                 small_size = (u16)min_t(u32, event_size, LTT_MAX_SMALL_SIZE);
1142                 ust_buffers_write(buf, buf_offset,
1143                         &eID, sizeof(u16));
1144                 buf_offset += sizeof(u16);
1145                 ust_buffers_write(buf, buf_offset,
1146                         &small_size, sizeof(u16));
1147                 buf_offset += sizeof(u16);
1148                 if (small_size == LTT_MAX_SMALL_SIZE) {
1149                         ust_buffers_write(buf, buf_offset,
1150                                 &event_size, sizeof(u32));
1151                         buf_offset += sizeof(u32);
1152                 }
1153                 break;
1154         case LTT_RFLAG_ID:
1155                 ust_buffers_write(buf, buf_offset,
1156                         &eID, sizeof(u16));
1157                 buf_offset += sizeof(u16);
1158                 break;
1159         }
1160
1161         return buf_offset;
1162 }