X-Git-Url: https://git.liburcu.org/?p=urcu.git;a=blobdiff_plain;f=rculfhash.c;h=197a00f14984d09d8d3e08068a8f7bc89318b636;hp=f95ef5dc083aa4e22618512708beb37cee22e1e3;hb=bb7b2f262ba9a888271c31f48d8c4331d33f2376;hpb=5f51139190536f948f9571fdf3c97cf0198356d0 diff --git a/rculfhash.c b/rculfhash.c index f95ef5d..197a00f 100644 --- a/rculfhash.c +++ b/rculfhash.c @@ -167,12 +167,7 @@ #define CHAIN_LEN_RESIZE_THRESHOLD 3 /* - * Define the minimum table size. Protects against hash table resize overload - * when too many entries are added quickly before the resize can complete. - * This is especially the case if the table could be shrinked to a size of 1. - * TODO: we might want to make the add/remove operations help the resize to - * add or remove dummy nodes when a resize is ongoing to ensure upper-bound on - * chain length. + * Define the minimum table size. */ #define MIN_TABLE_SIZE 128 @@ -199,6 +194,9 @@ #define DUMMY_FLAG (1UL << 1) #define FLAGS_MASK ((1UL << 2) - 1) +/* Value of the end pointer. Should not interact with flags. */ +#define END_VALUE 0x4 + struct ht_items_count { unsigned long add, remove; } __attribute__((aligned(CAA_CACHE_LINE_SIZE))); @@ -246,6 +244,12 @@ struct rcu_resize_work { struct cds_lfht *ht; }; +static +struct cds_lfht_node *_cds_lfht_add(struct cds_lfht *ht, + unsigned long size, + struct cds_lfht_node *node, + int unique, int dummy); + /* * Algorithm to reverse bits in a word by lookup table, extended to * 64-bit words. @@ -651,7 +655,19 @@ struct cds_lfht_node *flag_dummy(struct cds_lfht_node *node) { return (struct cds_lfht_node *) (((unsigned long) node) | DUMMY_FLAG); } - + +static +struct cds_lfht_node *get_end(void) +{ + return (struct cds_lfht_node *) END_VALUE; +} + +static +int is_end(struct cds_lfht_node *node) +{ + return clear_flag(node) == (struct cds_lfht_node *) END_VALUE; +} + static unsigned long _uatomic_max(unsigned long *ptr, unsigned long v) { @@ -678,7 +694,7 @@ void cds_lfht_free_level(struct rcu_head *head) * Remove all logically deleted nodes from a bucket up to a certain node key. */ static -void _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node) +int _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node) { struct cds_lfht_node *iter_prev, *iter, *next, *new_next; @@ -690,6 +706,15 @@ void _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node iter_prev = dummy; /* We can always skip the dummy node initially */ iter = rcu_dereference(iter_prev->p.next); + if (unlikely(iter == NULL)) { + /* + * We are executing concurrently with a hash table + * expand, so we see a dummy node with NULL next value. + * Help expand by linking this node into the list and + * retry. + */ + return 1; + } assert(iter_prev->p.reverse_hash <= node->p.reverse_hash); /* * We should never be called with dummy (start of chain) @@ -699,10 +724,11 @@ void _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node */ assert(dummy != node); for (;;) { - if (unlikely(!clear_flag(iter))) - return; + assert(iter != NULL); + if (unlikely(is_end(iter))) + return 0; if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash)) - return; + return 0; next = rcu_dereference(clear_flag(iter)->p.next); if (likely(is_removed(next))) break; @@ -714,8 +740,10 @@ void _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node new_next = flag_dummy(clear_flag(next)); else new_next = clear_flag(next); + assert(new_next != NULL); (void) uatomic_cmpxchg(&iter_prev->p.next, iter, new_next); } + return 0; } static @@ -728,12 +756,13 @@ struct cds_lfht_node *_cds_lfht_add(struct cds_lfht *ht, *dummy_node; struct _cds_lfht_node *lookup; unsigned long hash, index, order; + int force_dummy = 0; assert(!is_dummy(node)); assert(!is_removed(node)); if (!size) { assert(dummy); - node->p.next = flag_dummy(NULL); + node->p.next = flag_dummy(get_end()); return node; /* Initial first add (head) */ } hash = bit_reverse_ulong(node->p.reverse_hash); @@ -750,12 +779,28 @@ struct cds_lfht_node *_cds_lfht_add(struct cds_lfht *ht, iter_prev = (struct cds_lfht_node *) lookup; /* We can always skip the dummy node initially */ iter = rcu_dereference(iter_prev->p.next); + if (unlikely(iter == NULL)) { + /* + * We are executing concurrently with a hash table + * expand, so we see a dummy node with NULL next value. + * Help expand by linking this node into the list and + * retry. + */ + (void) _cds_lfht_add(ht, size >> 1, iter_prev, 0, 1); + continue; /* retry */ + } assert(iter_prev->p.reverse_hash <= node->p.reverse_hash); for (;;) { - /* TODO: check if removed */ - if (unlikely(!clear_flag(iter))) + assert(iter != NULL); + /* + * When adding a dummy node, we allow concurrent + * add/removal to help. If we find the dummy node in + * place, skip its insertion. + */ + if (unlikely(dummy && clear_flag(iter) == node)) + return node; + if (unlikely(is_end(iter))) goto insert; - /* TODO: check if removed */ if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash)) goto insert; next = rcu_dereference(clear_flag(iter)->p.next); @@ -779,14 +824,31 @@ struct cds_lfht_node *_cds_lfht_add(struct cds_lfht *ht, assert(!is_removed(iter_prev)); assert(!is_removed(iter)); assert(iter_prev != node); - if (!dummy) + if (!dummy) { node->p.next = clear_flag(iter); - else - node->p.next = flag_dummy(clear_flag(iter)); + } else { + /* + * Dummy node insertion is performed concurrently (help + * scheme). We try to link its next node, and if this + * succeeds, it _means_ it's us who link this dummy node + * into the table. force_dummy is set as soon as we + * succeed this cmpxchg within this function. + */ + if (!force_dummy) { + if (uatomic_cmpxchg(&node->p.next, NULL, + flag_dummy(clear_flag(iter))) != NULL) { + return NULL; + } + force_dummy = 1; + } else { + node->p.next = flag_dummy(clear_flag(iter)); + } + } if (is_dummy(iter)) new_node = flag_dummy(node); else new_node = node; + assert(new_node != NULL); if (uatomic_cmpxchg(&iter_prev->p.next, iter, new_node) != iter) continue; /* retry */ @@ -798,6 +860,7 @@ struct cds_lfht_node *_cds_lfht_add(struct cds_lfht *ht, new_next = flag_dummy(clear_flag(next)); else new_next = clear_flag(next); + assert(new_next != NULL); (void) uatomic_cmpxchg(&iter_prev->p.next, iter, new_next); /* retry */ } @@ -807,7 +870,11 @@ gc_end: order = get_count_order_ulong(index + 1); lookup = &ht->t.tbl[order]->nodes[index & (!order ? 0 : ((1UL << (order - 1)) - 1))]; dummy_node = (struct cds_lfht_node *) lookup; - _cds_lfht_gc_bucket(dummy_node, node); + if (_cds_lfht_gc_bucket(dummy_node, node)) { + /* Help expand */ + (void) _cds_lfht_add(ht, size >> 1, dummy_node, 0, 1); + goto gc_end; /* retry */ + } return node; } @@ -833,6 +900,7 @@ int _cds_lfht_remove(struct cds_lfht *ht, unsigned long size, assert(is_dummy(next)); else assert(!is_dummy(next)); + assert(next != NULL); old = uatomic_cmpxchg(&node->p.next, next, flag_removed(next)); } while (old != next); @@ -845,13 +913,18 @@ int _cds_lfht_remove(struct cds_lfht *ht, unsigned long size, * the node, and remove it (along with any other logically removed node) * if found. */ +gc_retry: hash = bit_reverse_ulong(node->p.reverse_hash); assert(size > 0); index = hash & (size - 1); order = get_count_order_ulong(index + 1); lookup = &ht->t.tbl[order]->nodes[index & (!order ? 0 : ((1UL << (order - 1)) - 1))]; dummy = (struct cds_lfht_node *) lookup; - _cds_lfht_gc_bucket(dummy, node); + if (_cds_lfht_gc_bucket(dummy, node)) { + /* Help expand */ + (void) _cds_lfht_add(ht, size >> 1, dummy, 0, 1); + goto gc_retry; /* retry */ + } end: /* * Only the flagging action indicated that we (and no other) @@ -883,6 +956,11 @@ void init_table_hash(struct cds_lfht *ht, unsigned long i, } } +/* + * Holding RCU read lock to protect _cds_lfht_add against memory + * reclaim that could be performed by other call_rcu worker threads (ABA + * problem). + */ static void init_table_link(struct cds_lfht *ht, unsigned long i, unsigned long len) { @@ -905,11 +983,6 @@ void init_table_link(struct cds_lfht *ht, unsigned long i, unsigned long len) ht->cds_lfht_rcu_thread_offline(); } -/* - * Holding RCU read lock to protect _cds_lfht_add against memory - * reclaim that could be performed by other call_rcu worker threads (ABA - * problem). - */ static void init_table(struct cds_lfht *ht, unsigned long first_order, unsigned long len_order) @@ -924,30 +997,58 @@ void init_table(struct cds_lfht *ht, len = !i ? 1 : 1UL << (i - 1); dbg_printf("init order %lu len: %lu\n", i, len); + + /* Stop expand if the resize target changes under us */ + if (CMM_LOAD_SHARED(ht->t.resize_target) < (!i ? 1 : (1UL << i))) + break; + ht->t.tbl[i] = calloc(1, sizeof(struct rcu_level) + (len * sizeof(struct _cds_lfht_node))); /* Set all dummy nodes reverse hash values for a level */ init_table_hash(ht, i, len); + /* + * Update table size. At this point, concurrent add/remove see + * dummy nodes with correctly initialized reverse hash value, + * but with NULL next pointers. If they do, they can help us + * link the dummy nodes into the list and retry. + */ + cmm_smp_wmb(); /* populate data before RCU size */ + CMM_STORE_SHARED(ht->t.size, !i ? 1 : (1UL << i)); + /* * Link all dummy nodes into the table. Concurrent * add/remove are helping us. */ init_table_link(ht, i, len); - /* - * Update table size (after init for now, because no - * concurrent updater help (TODO)). - */ - cmm_smp_wmb(); /* populate data before RCU size */ - CMM_STORE_SHARED(ht->t.size, !i ? 1 : (1UL << i)); dbg_printf("init new size: %lu\n", !i ? 1 : (1UL << i)); if (CMM_LOAD_SHARED(ht->in_progress_destroy)) break; } } +/* + * Holding RCU read lock to protect _cds_lfht_remove against memory + * reclaim that could be performed by other call_rcu worker threads (ABA + * problem). + * For a single level, we logically remove and garbage collect each node. + * + * As a design choice, we perform logical removal and garbage collection on a + * node-per-node basis to simplify this algorithm. We also assume keeping good + * cache locality of the operation would overweight possible performance gain + * that could be achieved by batching garbage collection for multiple levels. + * However, this would have to be justified by benchmarks. + * + * Concurrent removal and add operations are helping us perform garbage + * collection of logically removed nodes. We guarantee that all logically + * removed nodes have been garbage-collected (unlinked) before call_rcu is + * invoked to free a hole level of dummy nodes (after a grace period). + * + * Logical removal and garbage collection can therefore be done in batch or on a + * node-per-node basis, as long as the guarantee above holds. + */ static void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len) { @@ -972,11 +1073,6 @@ void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len) ht->cds_lfht_rcu_thread_offline(); } -/* - * Holding RCU read lock to protect _cds_lfht_remove against memory - * reclaim that could be performed by other call_rcu worker threads (ABA - * problem). - */ static void fini_table(struct cds_lfht *ht, unsigned long first_order, unsigned long len_order) @@ -987,13 +1083,27 @@ void fini_table(struct cds_lfht *ht, first_order, first_order + len_order); end_order = first_order + len_order; assert(first_order > 0); - assert(ht->t.size == (1UL << (first_order - 1))); for (i = end_order - 1; i >= first_order; i--) { unsigned long len; len = !i ? 1 : 1UL << (i - 1); dbg_printf("fini order %lu len: %lu\n", i, len); + /* Stop shrink if the resize target changes under us */ + if (CMM_LOAD_SHARED(ht->t.resize_target) > (1UL << (i - 1))) + break; + + cmm_smp_wmb(); /* populate data before RCU size */ + CMM_STORE_SHARED(ht->t.size, 1UL << (i - 1)); + + /* + * We need to wait for all add operations to reach Q.S. (and + * thus use the new table for lookups) before we can start + * releasing the old dummy nodes. Otherwise their lookup will + * return a logically removed node as insert position. + */ + ht->cds_lfht_synchronize_rcu(); + /* * Set "removed" flag in dummy nodes about to be removed. * Unlink all now-logically-removed dummy node pointers. @@ -1046,6 +1156,7 @@ struct cds_lfht *cds_lfht_new(cds_lfht_hash_fct hash_fct, ht->flags = flags; ht->cds_lfht_rcu_thread_offline(); pthread_mutex_lock(&ht->resize_mutex); + ht->t.resize_target = 1UL << (order - 1); init_table(ht, 0, order); pthread_mutex_unlock(&ht->resize_mutex); ht->cds_lfht_rcu_thread_online(); @@ -1054,23 +1165,39 @@ struct cds_lfht *cds_lfht_new(cds_lfht_hash_fct hash_fct, struct cds_lfht_node *cds_lfht_lookup(struct cds_lfht *ht, void *key, size_t key_len) { - struct cds_lfht_node *node, *next; + struct cds_lfht_node *node, *next, *dummy_node; struct _cds_lfht_node *lookup; unsigned long hash, reverse_hash, index, order, size; hash = ht->hash_fct(key, key_len, ht->hash_seed); reverse_hash = bit_reverse_ulong(hash); +restart: size = rcu_dereference(ht->t.size); index = hash & (size - 1); order = get_count_order_ulong(index + 1); lookup = &ht->t.tbl[order]->nodes[index & (!order ? 0 : ((1UL << (order - 1))) - 1)]; dbg_printf("lookup hash %lu index %lu order %lu aridx %lu\n", hash, index, order, index & (!order ? 0 : ((1UL << (order - 1)) - 1))); - node = (struct cds_lfht_node *) lookup; + dummy_node = (struct cds_lfht_node *) lookup; + /* We can always skip the dummy node initially */ + node = rcu_dereference(dummy_node->p.next); + if (unlikely(node == NULL)) { + /* + * We are executing concurrently with a hash table + * expand, so we see a dummy node with NULL next value. + * Help expand by linking this node into the list and + * retry. + */ + (void) _cds_lfht_add(ht, size >> 1, dummy_node, 0, 1); + goto restart; /* retry */ + } + node = clear_flag(node); for (;;) { - if (unlikely(!node)) + if (unlikely(is_end(node))) { + node = NULL; break; + } if (unlikely(node->p.reverse_hash > reverse_hash)) { node = NULL; break; @@ -1102,8 +1229,10 @@ struct cds_lfht_node *cds_lfht_next(struct cds_lfht *ht, node = clear_flag(next); for (;;) { - if (unlikely(!node)) + if (unlikely(is_end(node))) { + node = NULL; break; + } if (unlikely(node->p.reverse_hash > reverse_hash)) { node = NULL; break; @@ -1175,7 +1304,7 @@ int cds_lfht_delete_dummy(struct cds_lfht *ht) if (!is_dummy(node)) return -EPERM; assert(!is_removed(node)); - } while (clear_flag(node)); + } while (!is_end(node)); /* * size accessed without rcu_dereference because hash table is * being destroyed. @@ -1241,7 +1370,7 @@ void cds_lfht_count_nodes(struct cds_lfht *ht, else (nr_dummy)++; node = clear_flag(next); - } while (node); + } while (!is_end(node)); dbg_printf("number of dummy nodes: %lu\n", nr_dummy); } @@ -1274,17 +1403,6 @@ void _do_cds_lfht_shrink(struct cds_lfht *ht, old_size, old_order, new_size, new_order); assert(new_size < old_size); - cmm_smp_wmb(); /* populate data before RCU size */ - CMM_STORE_SHARED(ht->t.size, new_size); - - /* - * We need to wait for all add operations to reach Q.S. (and - * thus use the new table for lookups) before we can start - * releasing the old dummy nodes. Otherwise their lookup will - * return a logically removed node as insert position. - */ - ht->cds_lfht_synchronize_rcu(); - /* Remove and unlink all dummy nodes to remove. */ fini_table(ht, new_order, old_order - new_order); } @@ -1310,7 +1428,7 @@ void _do_cds_lfht_resize(struct cds_lfht *ht) ht->t.resize_initiated = 0; /* write resize_initiated before read resize_target */ cmm_smp_mb(); - } while (new_size != CMM_LOAD_SHARED(ht->t.resize_target)); + } while (ht->t.size != CMM_LOAD_SHARED(ht->t.resize_target)); } static