X-Git-Url: https://git.liburcu.org/?p=ust.git;a=blobdiff_plain;f=libust%2Fbuffers.h;h=850b9355017299ac3bc38c265f2ac86ce68aafe3;hp=4fa6262af1715da97ebd69afecfa4084d61566dd;hb=HEAD;hpb=6a843332a8ac7d1125e0961f148cc494e0397923 diff --git a/libust/buffers.h b/libust/buffers.h index 4fa6262..850b935 100644 --- a/libust/buffers.h +++ b/libust/buffers.h @@ -26,14 +26,12 @@ #include #include +#include -#include "usterr.h" +#include "usterr_signal_safe.h" #include "channels.h" #include "tracerconst.h" #include "tracercore.h" -#include "header-inline.h" - -/***** FIXME: SHOULD BE REMOVED ***** */ /* * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of @@ -57,6 +55,20 @@ /**************************************/ +/* + * TODO: using "long" type for struct ust_buffer (control structure + * shared between traced apps and the consumer) is a very bad idea when + * we get to systems with mixed 32/64-bit processes. + * + * But on 64-bit system, we want the full power of 64-bit counters, + * which wraps less often. Therefore, it's not as easy as "use 32-bit + * types everywhere". + * + * One way to deal with this is to: + * 1) Design 64-bit consumer so it can detect 32-bit and 64-bit apps. + * 2) The 32-bit consumer only supports 32-bit apps. + */ + struct commit_counters { long cc; /* ATOMIC */ long cc_sb; /* ATOMIC - Incremented _once_ at sb switch */ @@ -82,6 +94,11 @@ struct ust_buffer { int data_ready_fd_write; /* the reading end of the pipe */ int data_ready_fd_read; + /* + * List of buffers with an open pipe, used for fork and forced subbuffer + * switch. + */ + struct cds_list_head open_buffers_list; unsigned int finalized; //ust// struct timer_list switch_timer; /* timer for periodical switch */ @@ -89,20 +106,20 @@ struct ust_buffer { struct ust_channel *chan; - struct kref kref; + struct urcu_ref urcu_ref; void *buf_data; size_t buf_size; int shmid; unsigned int cpu; /* commit count per subbuffer; must be at end of struct */ - long commit_seq[0] ____cacheline_aligned; /* ATOMIC */ + long commit_seq[0]; /* ATOMIC */ } ____cacheline_aligned; /* - * A switch is done during tracing or as a final flush after tracing (so it - * won't write in the new sub-buffer). - * FIXME: make this message clearer + * A switch is either done during tracing (FORCE_ACTIVE) or as a final + * flush after tracing (with FORCE_FLUSH). FORCE_FLUSH ensures we won't + * write in the new sub-buffer). */ enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH }; @@ -116,6 +133,40 @@ extern int ltt_reserve_slot_lockless_slow(struct ust_channel *chan, extern void ltt_force_switch_lockless_slow(struct ust_buffer *buf, enum force_switch_mode mode); +#ifndef HAVE_EFFICIENT_UNALIGNED_ACCESS + +/* + * Calculate the offset needed to align the type. + * size_of_type must be non-zero. + */ +static inline unsigned int ltt_align(size_t align_drift, size_t size_of_type) +{ + size_t alignment = min(sizeof(void *), size_of_type); + return (alignment - align_drift) & (alignment - 1); +} +/* Default arch alignment */ +#define LTT_ALIGN + +static inline int ltt_get_alignment(void) +{ + return sizeof(void *); +} + +#else /* HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +static inline unsigned int ltt_align(size_t align_drift, + size_t size_of_type) +{ + return 0; +} + +#define LTT_ALIGN __attribute__((packed)) + +static inline int ltt_get_alignment(void) +{ + return 0; +} +#endif /* HAVE_EFFICIENT_UNALIGNED_ACCESS */ static __inline__ void ust_buffers_do_copy(void *dest, const void *src, size_t len) { @@ -188,6 +239,63 @@ static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf, } #endif +/* + * ust_get_header_size + * + * Calculate alignment offset to 32-bits. This is the alignment offset of the + * event header. + * + * Important note : + * The event header must be 32-bits. The total offset calculated here : + * + * Alignment of header struct on 32 bits (min arch size, header size) + * + sizeof(header struct) (32-bits) + * + (opt) u16 (ext. event id) + * + (opt) u16 (event_size) (if event_size == 0xFFFFUL, has ext. event size) + * + (opt) u32 (ext. event size) + * + (opt) u64 full TSC (aligned on min(64-bits, arch size)) + * + * The payload must itself determine its own alignment from the biggest type it + * contains. + * */ +static __inline__ unsigned char ust_get_header_size( + struct ust_channel *channel, + size_t offset, + size_t data_size, + size_t *before_hdr_pad, + unsigned int rflags) +{ + size_t orig_offset = offset; + size_t padding; + + padding = ltt_align(offset, sizeof(struct ltt_event_header)); + offset += padding; + offset += sizeof(struct ltt_event_header); + + if(unlikely(rflags)) { + switch (rflags) { + case LTT_RFLAG_ID_SIZE_TSC: + offset += sizeof(u16) + sizeof(u16); + if (data_size >= 0xFFFFU) + offset += sizeof(u32); + offset += ltt_align(offset, sizeof(u64)); + offset += sizeof(u64); + break; + case LTT_RFLAG_ID_SIZE: + offset += sizeof(u16) + sizeof(u16); + if (data_size >= 0xFFFFU) + offset += sizeof(u32); + break; + case LTT_RFLAG_ID: + offset += sizeof(u16); + break; + } + } + + *before_hdr_pad = padding; + return offset - orig_offset; +} + static __inline__ void ltt_reserve_push_reader( struct ust_channel *rchan, struct ust_buffer *buf, @@ -271,7 +379,7 @@ static __inline__ int ltt_poll_deliver(struct ust_channel *chan, struct ust_buff consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan); commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb); /* - * No memory barrier here, since we are only interested + * No memory cmm_barrier here, since we are only interested * in a statistically correct polling result. The next poll will * get the data is we are racing. The mb() that ensures correct * memory order is in get_subbuf. @@ -319,12 +427,6 @@ static __inline__ int ltt_relay_try_reserve( *tsc = trace_clock_read64(); -//ust// #ifdef CONFIG_LTT_VMCORE -//ust// prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]); -//ust// prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, rchan)]); -//ust// #else -//ust// prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]); -//ust// #endif if (last_tsc_overflow(buf, *tsc)) *rflags = LTT_RFLAG_ID_SIZE_TSC; @@ -369,8 +471,7 @@ static __inline__ int ltt_reserve_slot(struct ust_channel *chan, /* * Perform retryable operations. */ - /* FIXME: make this really per cpu? */ - if (unlikely(LOAD_SHARED(ltt_nesting) > 4)) { + if (unlikely(CMM_LOAD_SHARED(ltt_nesting) > 4)) { DBG("Dropping event because nesting is too deep."); uatomic_inc(&buf->events_lost); return -EPERM; @@ -398,11 +499,6 @@ static __inline__ int ltt_reserve_slot(struct ust_channel *chan, */ ltt_reserve_push_reader(chan, buf, o_end - 1); - /* - * Clear noref flag for this subbuffer. - */ -//ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(o_end - 1, chan)); - *buf_offset = o_begin + before_hdr_pad; return 0; slow_path: @@ -429,7 +525,6 @@ static __inline__ void ltt_force_switch(struct ust_buffer *buf, * commit count reaches back the reserve offset (module subbuffer size). It is * useful for crash dump. */ -//ust// #ifdef CONFIG_LTT_VMCORE static __inline__ void ltt_write_commit_counter(struct ust_channel *chan, struct ust_buffer *buf, long idx, long buf_offset, long commit_count, size_t data_size) @@ -455,12 +550,6 @@ static __inline__ void ltt_write_commit_counter(struct ust_channel *chan, DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf->chan->channel_name, buf->cpu, idx, commit_count); } -//ust// #else -//ust// static __inline__ void ltt_write_commit_counter(struct ust_buffer *buf, -//ust// long idx, long buf_offset, long commit_count, size_t data_size) -//ust// { -//ust// } -//ust// #endif /* * Atomic unordered slot commit. Increments the commit count in the @@ -483,7 +572,7 @@ static __inline__ void ltt_commit_slot( long endidx = SUBBUF_INDEX(offset_end - 1, chan); long commit_count; - smp_wmb(); + cmm_smp_wmb(); uatomic_add(&buf->commit_count[endidx].cc, slot_size); /* @@ -513,22 +602,92 @@ static __inline__ void ltt_commit_slot( ltt_write_commit_counter(chan, buf, endidx, buf_offset, commit_count, data_size); } -void _ust_buffers_write(struct ust_buffer *buf, size_t offset, - const void *src, size_t len, ssize_t cpy); +void _ust_buffers_strncpy_fixup(struct ust_buffer *buf, size_t offset, + size_t len, size_t copied, int terminated); static __inline__ int ust_buffers_write(struct ust_buffer *buf, size_t offset, const void *src, size_t len) { - size_t cpy; size_t buf_offset = BUFFER_OFFSET(offset, buf->chan); assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt); + assert(buf_offset + len + <= buf->chan->subbuf_size*buf->chan->subbuf_cnt); - cpy = min_t(size_t, len, buf->buf_size - buf_offset); - ust_buffers_do_copy(buf->buf_data + buf_offset, src, cpy); + ust_buffers_do_copy(buf->buf_data + buf_offset, src, len); - if (unlikely(len != cpy)) - _ust_buffers_write(buf, buf_offset, src, len, cpy); + return len; +} + +/* + * ust_buffers_do_memset - write character into dest. + * @dest: destination + * @src: source character + * @len: length to write + */ +static __inline__ +void ust_buffers_do_memset(void *dest, char src, size_t len) +{ + /* + * What we really want here is an __inline__ memset, but we + * don't have constants, so gcc generally uses a function call. + */ + for (; len > 0; len--) + *(u8 *)dest++ = src; +} + +/* + * ust_buffers_do_strncpy - copy a string up to a certain number of bytes + * @dest: destination + * @src: source + * @len: max. length to copy + * @terminated: output string ends with \0 (output) + * + * returns the number of bytes copied. Does not finalize with \0 if len is + * reached. + */ +static __inline__ +size_t ust_buffers_do_strncpy(void *dest, const void *src, size_t len, + int *terminated) +{ + size_t orig_len = len; + + *terminated = 0; + /* + * What we really want here is an __inline__ strncpy, but we + * don't have constants, so gcc generally uses a function call. + */ + for (; len > 0; len--) { + *(u8 *)dest = CMM_LOAD_SHARED(*(const u8 *)src); + /* Check with dest, because src may be modified concurrently */ + if (*(const u8 *)dest == '\0') { + len--; + *terminated = 1; + break; + } + dest++; + src++; + } + return orig_len - len; +} + +static __inline__ +int ust_buffers_strncpy(struct ust_buffer *buf, size_t offset, const void *src, + size_t len) +{ + size_t buf_offset = BUFFER_OFFSET(offset, buf->chan); + ssize_t copied; + int terminated; + + assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt); + assert(buf_offset + len + <= buf->chan->subbuf_size*buf->chan->subbuf_cnt); + + copied = ust_buffers_do_strncpy(buf->buf_data + buf_offset, + src, len, &terminated); + if (unlikely(copied < len || !terminated)) + _ust_buffers_strncpy_fixup(buf, offset, len, copied, + terminated); return len; }