From c1be8fb947a1d56a0f0b2fd82eca6f23c467b0ee Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 1 Oct 2013 20:06:37 -0400 Subject: [PATCH] Fix: urcu-bp segfault in glibc pthread_kill() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This fixes an issue that appears after this recent urcu-bp fix is applied: Fix: urcu-bp: Bulletproof RCU arena resize bug Prior to this fix, on Linux at least, the behavior was to allocate (and leak) one memory map region per reader thread. It worked, except for the unfortunate leak. The fact that it worked, even though not the way we had intended it to, is is why testing did not raise any red flag. That state of affairs has prevailed for a long time, but it was side-tracking some issues. After fixing the underlying bug that was causing the memory map leak, another issue appears. The garbage collection scheme reclaiming the thread tracking structures in urcu-bp fails in stress tests to due a bug in glibc (tested against glibc 2.13 and 2.17). Under this workload, on a 2-core/hyperthreaded i7: ./test_urcu_bp 40 4 10 we can easily trigger a segmentation fault in the pthread_kill() code. Program terminated with signal 11, Segmentation fault. Backtrace: #0 __pthread_kill (threadid=140723681437440, signo=0) at ../nptl/sysdeps/unix/sysv/linux/pthread_kill.c:42 42 ../nptl/sysdeps/unix/sysv/linux/pthread_kill.c: No such file or directory. (gdb) bt full #0 __pthread_kill (threadid=140723681437440, signo=0) at ../nptl/sysdeps/unix/sysv/linux/pthread_kill.c:42 __x = pd = 0x7ffcc90b2700 tid = val = #1 0x0000000000403009 in rcu_gc_registry () at ../../urcu-bp.c:437 tid = 140723681437440 ret = 0 chunk = 0x7ffcca0b8000 rcu_reader_reg = 0x7ffcca0b8120 __PRETTY_FUNCTION__ = "rcu_gc_registry" #2 0x0000000000402b9c in synchronize_rcu_bp () at ../../urcu-bp.c:230 cur_snap_readers = {next = 0x7ffcb4888cc0, prev = 0x7ffcb4888cc0} qsreaders = {next = 0x7ffcb4888cd0, prev = 0x7ffcb4888cd0} newmask = {__val = {18446744067267100671, 18446744073709551615 }} oldmask = {__val = {0, 140723337334144, 0, 0, 0, 140723690351643, 0, 140723127058464, 4, 0, 140723698253920, 140723693868864, 4096, 140723690370432, 140723698253920, 140723059951840}} ret = 0 __PRETTY_FUNCTION__ = "synchronize_rcu_bp" #3 0x0000000000401803 in thr_writer (_count=0x76b2f0) at test_urcu_bp.c:223 count = 0x76b2f0 new = 0x7ffca80008c0 old = 0x7ffca40008c0 #4 0x00007ffcc9c83f8e in start_thread (arg=0x7ffcb4889700) at pthread_create.c:311 __res = pd = 0x7ffcb4889700 now = unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140723337336576, 6546223316613858487, 0, 140723698253920, 140723693868864, 4096, -6547756131873848137, -6547872135220034377}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}} not_first_call = 0 pagesize_m1 = sp = freesize = __PRETTY_FUNCTION__ = "start_thread" #5 0x00007ffcc99ade1d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113 It appears that the memory backing the thread information can be relinquished by NPTL concurrently with execution of pthread_kill() targeting an already joined thread and cause this segfault. We were using pthread_kill(tid, 0) to discover if the target thread was alive or not, as documented in pthread_kill(3): If sig is 0, then no signal is sent, but error checking is still per‐ formed; this can be used to check for the existence of a thread ID. but it appears that the glibc implementation is racy. Instead of using the racy pthread_kill implementation, implement cleanup using a pthread_key destroy notifier for a dummy key. This notifier is called for each thread exit and destroy. Signed-off-by: Mathieu Desnoyers --- urcu-bp.c | 166 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 134 insertions(+), 32 deletions(-) diff --git a/urcu-bp.c b/urcu-bp.c index 190a569..387a52b 100644 --- a/urcu-bp.c +++ b/urcu-bp.c @@ -91,10 +91,18 @@ void *mremap_wrapper(void *old_address, size_t old_size, */ #define RCU_QS_ACTIVE_ATTEMPTS 100 +static +void __attribute__((constructor)) rcu_bp_init(void); +static void __attribute__((destructor)) rcu_bp_exit(void); static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; +static int initialized; + +static pthread_key_t urcu_bp_key; + #ifdef DEBUG_YIELD unsigned int rcu_yield_active; DEFINE_URCU_TLS(unsigned int, rcu_rand_yield); @@ -112,7 +120,7 @@ static CDS_LIST_HEAD(registry); struct registry_chunk { size_t data_len; /* data length */ - size_t used; /* data used */ + size_t used; /* amount of data used */ struct cds_list_head node; /* chunk_list node */ char data[]; }; @@ -128,8 +136,6 @@ static struct registry_arena registry_arena = { /* Saved fork signal mask, protected by rcu_gp_lock */ static sigset_t saved_fork_signal_mask; -static void rcu_gc_registry(void); - static void mutex_lock(pthread_mutex_t *mutex) { int ret; @@ -226,9 +232,6 @@ void synchronize_rcu(void) /* Write new ptr before changing the qparity */ cmm_smp_mb(); - /* Remove old registry elements */ - rcu_gc_registry(); - /* * Wait for readers to observe original parity or be quiescent. */ @@ -402,10 +405,14 @@ static void add_thread(void) { struct rcu_reader *rcu_reader_reg; + int ret; rcu_reader_reg = arena_alloc(®istry_arena); if (!rcu_reader_reg) abort(); + ret = pthread_setspecific(urcu_bp_key, rcu_reader_reg); + if (ret) + abort(); /* Add to registry */ rcu_reader_reg->tid = pthread_self(); @@ -418,33 +425,42 @@ void add_thread(void) URCU_TLS(rcu_reader) = rcu_reader_reg; } -/* Called with signals off and mutex locked */ -static void rcu_gc_registry(void) +/* Called with mutex locked */ +static +void cleanup_thread(struct registry_chunk *chunk, + struct rcu_reader *rcu_reader_reg) +{ + rcu_reader_reg->ctr = 0; + cds_list_del(&rcu_reader_reg->node); + rcu_reader_reg->tid = 0; + rcu_reader_reg->alloc = 0; + chunk->used -= sizeof(struct rcu_reader); +} + +static +struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg) { struct registry_chunk *chunk; - struct rcu_reader *rcu_reader_reg; cds_list_for_each_entry(chunk, ®istry_arena.chunk_list, node) { - for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0]; - rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len]; - rcu_reader_reg++) { - pthread_t tid; - int ret; + if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0]) + continue; + if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len]) + continue; + return chunk; + } + return NULL; +} - if (!rcu_reader_reg->alloc) - continue; - tid = rcu_reader_reg->tid; - ret = pthread_kill(tid, 0); - assert(ret != EINVAL); - if (ret == ESRCH) { - cds_list_del(&rcu_reader_reg->node); - rcu_reader_reg->ctr = 0; - rcu_reader_reg->alloc = 0; - chunk->used -= sizeof(struct rcu_reader); - } +/* Called with signals off and mutex locked */ +static +void remove_thread(void) +{ + struct rcu_reader *rcu_reader_reg; - } - } + rcu_reader_reg = URCU_TLS(rcu_reader); + cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg); + URCU_TLS(rcu_reader) = NULL; } /* Disable signals, take mutex, add to registry */ @@ -454,32 +470,95 @@ void rcu_bp_register(void) int ret; ret = sigfillset(&newmask); - assert(!ret); + if (ret) + abort(); ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask); - assert(!ret); + if (ret) + abort(); /* * Check if a signal concurrently registered our thread since - * the check in rcu_read_lock(). */ + * the check in rcu_read_lock(). + */ if (URCU_TLS(rcu_reader)) goto end; + /* + * Take care of early registration before urcu_bp constructor. + */ + rcu_bp_init(); + mutex_lock(&rcu_gp_lock); add_thread(); mutex_unlock(&rcu_gp_lock); end: ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); - assert(!ret); + if (ret) + abort(); +} + +/* Disable signals, take mutex, remove from registry */ +static +void rcu_bp_unregister(void) +{ + sigset_t newmask, oldmask; + int ret; + + ret = sigfillset(&newmask); + if (ret) + abort(); + ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask); + if (ret) + abort(); + + mutex_lock(&rcu_gp_lock); + remove_thread(); + mutex_unlock(&rcu_gp_lock); + ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + if (ret) + abort(); +} + +/* + * Remove thread from the registry when it exits, and flag it as + * destroyed so garbage collection can take care of it. + */ +static +void urcu_bp_thread_exit_notifier(void *rcu_key) +{ + assert(rcu_key == URCU_TLS(rcu_reader)); + rcu_bp_unregister(); +} + +static +void rcu_bp_init(void) +{ + mutex_lock(&init_lock); + if (!initialized) { + int ret; + + ret = pthread_key_create(&urcu_bp_key, + urcu_bp_thread_exit_notifier); + if (ret) + abort(); + initialized = 1; + } + mutex_unlock(&init_lock); } +static void rcu_bp_exit(void) { struct registry_chunk *chunk, *tmp; + int ret; cds_list_for_each_entry_safe(chunk, tmp, ®istry_arena.chunk_list, node) { munmap(chunk, chunk->data_len + sizeof(struct registry_chunk)); } + ret = pthread_key_delete(urcu_bp_key); + if (ret) + abort(); } /* @@ -511,12 +590,35 @@ void rcu_bp_after_fork_parent(void) assert(!ret); } +/* + * Prune all entries from registry except our own thread. Fits the Linux + * fork behavior. Called with rcu_gp_lock held. + */ +static +void urcu_bp_prune_registry(void) +{ + struct registry_chunk *chunk; + struct rcu_reader *rcu_reader_reg; + + cds_list_for_each_entry(chunk, ®istry_arena.chunk_list, node) { + for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0]; + rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len]; + rcu_reader_reg++) { + if (!rcu_reader_reg->alloc) + continue; + if (rcu_reader_reg->tid == pthread_self()) + continue; + cleanup_thread(chunk, rcu_reader_reg); + } + } +} + void rcu_bp_after_fork_child(void) { sigset_t oldmask; int ret; - rcu_gc_registry(); + urcu_bp_prune_registry(); oldmask = saved_fork_signal_mask; mutex_unlock(&rcu_gp_lock); ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); -- 2.34.1