X-Git-Url: https://git.liburcu.org/?p=urcu.git;a=blobdiff_plain;f=rculfhash.c;h=074a6f4ea8c4c40f7b2f7374d3ecdc8387e34bff;hp=824a940391461a1a3b7f1db0a847be38e84dd741;hb=860d07e812c3001e9d6ce261c8b861dabd3203b3;hpb=17f31d1b07ffb59db8bad3ce2ae8f824ed820de0 diff --git a/rculfhash.c b/rculfhash.c index 824a940..074a6f4 100644 --- a/rculfhash.c +++ b/rculfhash.c @@ -169,7 +169,6 @@ /* * Define the minimum table size. */ -//#define MIN_TABLE_SIZE 128 #define MIN_TABLE_SIZE 1 #if (CAA_BITS_PER_LONG == 32) @@ -178,6 +177,12 @@ #define MAX_TABLE_ORDER 64 #endif +/* + * Minimum number of dummy nodes to touch per thread to parallelize grow/shrink. + */ +#define MIN_PARTITION_PER_THREAD_ORDER 12 +#define MIN_PARTITION_PER_THREAD (1UL << MIN_PARTITION_PER_THREAD_ORDER) + #ifndef min #define min(a, b) ((a) < (b) ? (a) : (b)) #endif @@ -196,10 +201,10 @@ #define FLAGS_MASK ((1UL << 2) - 1) /* Value of the end pointer. Should not interact with flags. */ -#define END_VALUE 0x4 +#define END_VALUE NULL struct ht_items_count { - unsigned long add, remove; + unsigned long add, del; } __attribute__((aligned(CAA_CACHE_LINE_SIZE))); struct rcu_level { @@ -236,6 +241,9 @@ struct cds_lfht { void (*cds_lfht_rcu_read_unlock)(void); void (*cds_lfht_rcu_thread_offline)(void); void (*cds_lfht_rcu_thread_online)(void); + void (*cds_lfht_rcu_register_thread)(void); + void (*cds_lfht_rcu_unregister_thread)(void); + pthread_attr_t *resize_attr; /* Resize threads attributes */ unsigned long count; /* global approximate item count */ struct ht_items_count *percpu_count; /* per-cpu item count */ }; @@ -245,6 +253,14 @@ struct rcu_resize_work { struct cds_lfht *ht; }; +struct partition_resize_work { + struct rcu_head head; + struct cds_lfht *ht; + unsigned long i, start, len; + void (*fct)(struct cds_lfht *ht, unsigned long i, + unsigned long start, unsigned long len); +}; + static struct cds_lfht_node *_cds_lfht_add(struct cds_lfht *ht, unsigned long size, @@ -549,7 +565,7 @@ void ht_count_add(struct cds_lfht *ht, unsigned long size) } static -void ht_count_remove(struct cds_lfht *ht, unsigned long size) +void ht_count_del(struct cds_lfht *ht, unsigned long size) { unsigned long percpu_count; int cpu; @@ -559,18 +575,18 @@ void ht_count_remove(struct cds_lfht *ht, unsigned long size) cpu = ht_get_cpu(); if (unlikely(cpu < 0)) return; - percpu_count = uatomic_add_return(&ht->percpu_count[cpu].remove, -1); + percpu_count = uatomic_add_return(&ht->percpu_count[cpu].del, -1); if (unlikely(!(percpu_count & ((1UL << COUNT_COMMIT_ORDER) - 1)))) { unsigned long count; - dbg_printf("remove percpu %lu\n", percpu_count); + dbg_printf("del percpu %lu\n", percpu_count); count = uatomic_add_return(&ht->count, -(1UL << COUNT_COMMIT_ORDER)); /* If power of 2 */ if (!(count & (count - 1))) { if ((count >> CHAIN_LEN_RESIZE_THRESHOLD) >= size) return; - dbg_printf("remove set global %lu\n", count); + dbg_printf("del set global %lu\n", count); cds_lfht_resize_lazy_count(ht, size, count >> (CHAIN_LEN_TARGET - 1)); } @@ -598,7 +614,7 @@ void ht_count_add(struct cds_lfht *ht, unsigned long size) } static -void ht_count_remove(struct cds_lfht *ht, unsigned long size) +void ht_count_del(struct cds_lfht *ht, unsigned long size) { } @@ -695,7 +711,7 @@ void cds_lfht_free_level(struct rcu_head *head) * Remove all logically deleted nodes from a bucket up to a certain node key. */ static -int _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node) +void _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node) { struct cds_lfht_node *iter_prev, *iter, *next, *new_next; @@ -707,15 +723,6 @@ int _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node) iter_prev = dummy; /* We can always skip the dummy node initially */ iter = rcu_dereference(iter_prev->p.next); - if (unlikely(iter == NULL)) { - /* - * We are executing concurrently with a hash table - * expand, so we see a dummy node with NULL next value. - * Help expand by linking this node into the list and - * retry. - */ - return 1; - } assert(iter_prev->p.reverse_hash <= node->p.reverse_hash); /* * We should never be called with dummy (start of chain) @@ -725,11 +732,10 @@ int _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node) */ assert(dummy != node); for (;;) { - assert(iter != NULL); if (unlikely(is_end(iter))) - return 0; + return; if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash)) - return 0; + return; next = rcu_dereference(clear_flag(iter)->p.next); if (likely(is_removed(next))) break; @@ -741,10 +747,9 @@ int _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node) new_next = flag_dummy(clear_flag(next)); else new_next = clear_flag(next); - assert(new_next != NULL); (void) uatomic_cmpxchg(&iter_prev->p.next, iter, new_next); } - return 0; + return; } static @@ -757,7 +762,6 @@ struct cds_lfht_node *_cds_lfht_add(struct cds_lfht *ht, *dummy_node; struct _cds_lfht_node *lookup; unsigned long hash, index, order; - int force_dummy = 0; assert(!is_dummy(node)); assert(!is_removed(node)); @@ -780,26 +784,8 @@ struct cds_lfht_node *_cds_lfht_add(struct cds_lfht *ht, iter_prev = (struct cds_lfht_node *) lookup; /* We can always skip the dummy node initially */ iter = rcu_dereference(iter_prev->p.next); - if (unlikely(iter == NULL)) { - /* - * We are executing concurrently with a hash table - * expand, so we see a dummy node with NULL next value. - * Help expand by linking this node into the list and - * retry. - */ - (void) _cds_lfht_add(ht, size >> 1, iter_prev, 0, 1); - continue; /* retry */ - } assert(iter_prev->p.reverse_hash <= node->p.reverse_hash); for (;;) { - assert(iter != NULL); - /* - * When adding a dummy node, we allow concurrent - * add/removal to help. If we find the dummy node in - * place, skip its insertion. - */ - if (unlikely(dummy && clear_flag(iter) == node)) - return node; if (unlikely(is_end(iter))) goto insert; if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash)) @@ -825,31 +811,14 @@ struct cds_lfht_node *_cds_lfht_add(struct cds_lfht *ht, assert(!is_removed(iter_prev)); assert(!is_removed(iter)); assert(iter_prev != node); - if (!dummy) { + if (!dummy) node->p.next = clear_flag(iter); - } else { - /* - * Dummy node insertion is performed concurrently (help - * scheme). We try to link its next node, and if this - * succeeds, it _means_ it's us who link this dummy node - * into the table. force_dummy is set as soon as we - * succeed this cmpxchg within this function. - */ - if (!force_dummy) { - if (uatomic_cmpxchg(&node->p.next, NULL, - flag_dummy(clear_flag(iter))) != NULL) { - return NULL; - } - force_dummy = 1; - } else { - node->p.next = flag_dummy(clear_flag(iter)); - } - } + else + node->p.next = flag_dummy(clear_flag(iter)); if (is_dummy(iter)) new_node = flag_dummy(node); else new_node = node; - assert(new_node != NULL); if (uatomic_cmpxchg(&iter_prev->p.next, iter, new_node) != iter) continue; /* retry */ @@ -861,7 +830,6 @@ struct cds_lfht_node *_cds_lfht_add(struct cds_lfht *ht, new_next = flag_dummy(clear_flag(next)); else new_next = clear_flag(next); - assert(new_next != NULL); (void) uatomic_cmpxchg(&iter_prev->p.next, iter, new_next); /* retry */ } @@ -871,16 +839,12 @@ gc_end: order = get_count_order_ulong(index + 1); lookup = &ht->t.tbl[order]->nodes[index & (!order ? 0 : ((1UL << (order - 1)) - 1))]; dummy_node = (struct cds_lfht_node *) lookup; - if (_cds_lfht_gc_bucket(dummy_node, node)) { - /* Help expand */ - (void) _cds_lfht_add(ht, size >> 1, dummy_node, 0, 1); - goto gc_end; /* retry */ - } + _cds_lfht_gc_bucket(dummy_node, node); return node; } static -int _cds_lfht_remove(struct cds_lfht *ht, unsigned long size, +int _cds_lfht_del(struct cds_lfht *ht, unsigned long size, struct cds_lfht_node *node, int dummy_removal) { @@ -901,7 +865,6 @@ int _cds_lfht_remove(struct cds_lfht *ht, unsigned long size, assert(is_dummy(next)); else assert(!is_dummy(next)); - assert(next != NULL); old = uatomic_cmpxchg(&node->p.next, next, flag_removed(next)); } while (old != next); @@ -914,18 +877,13 @@ int _cds_lfht_remove(struct cds_lfht *ht, unsigned long size, * the node, and remove it (along with any other logically removed node) * if found. */ -gc_retry: hash = bit_reverse_ulong(node->p.reverse_hash); assert(size > 0); index = hash & (size - 1); order = get_count_order_ulong(index + 1); lookup = &ht->t.tbl[order]->nodes[index & (!order ? 0 : ((1UL << (order - 1)) - 1))]; dummy = (struct cds_lfht_node *) lookup; - if (_cds_lfht_gc_bucket(dummy, node)) { - /* Help expand */ - (void) _cds_lfht_add(ht, size >> 1, dummy, 0, 1); - goto gc_retry; /* retry */ - } + _cds_lfht_gc_bucket(dummy, node); end: /* * Only the flagging action indicated that we (and no other) @@ -939,49 +897,103 @@ end: } static -void init_table_hash(struct cds_lfht *ht, unsigned long i, - unsigned long len) +void *partition_resize_thread(void *arg) { - unsigned long j; + struct partition_resize_work *work = arg; - for (j = 0; j < len; j++) { - struct cds_lfht_node *new_node = - (struct cds_lfht_node *) &ht->t.tbl[i]->nodes[j]; + work->ht->cds_lfht_rcu_register_thread(); + work->fct(work->ht, work->i, work->start, work->len); + work->ht->cds_lfht_rcu_unregister_thread(); + return NULL; +} - dbg_printf("init hash entry: i %lu j %lu hash %lu\n", - i, j, !i ? 0 : (1UL << (i - 1)) + j); - new_node->p.reverse_hash = - bit_reverse_ulong(!i ? 0 : (1UL << (i - 1)) + j); - if (CMM_LOAD_SHARED(ht->in_progress_destroy)) - break; +static +void partition_resize_helper(struct cds_lfht *ht, unsigned long i, + unsigned long len, + void (*fct)(struct cds_lfht *ht, unsigned long i, + unsigned long start, unsigned long len)) +{ + unsigned long partition_len; + struct partition_resize_work *work; + int thread, ret; + unsigned long nr_threads; + pthread_t *thread_id; + + /* + * Note: nr_cpus_mask + 1 is always power of 2. + * We spawn just the number of threads we need to satisfy the minimum + * partition size, up to the number of CPUs in the system. + */ + nr_threads = min(nr_cpus_mask + 1, + len >> MIN_PARTITION_PER_THREAD_ORDER); + partition_len = len >> get_count_order_ulong(nr_threads); + work = calloc(nr_threads, sizeof(*work)); + thread_id = calloc(nr_threads, sizeof(*thread_id)); + assert(work); + for (thread = 0; thread < nr_threads; thread++) { + work[thread].ht = ht; + work[thread].i = i; + work[thread].len = partition_len; + work[thread].start = thread * partition_len; + work[thread].fct = fct; + ret = pthread_create(&thread_id[thread], ht->resize_attr, + partition_resize_thread, &work[thread]); + assert(!ret); } + for (thread = 0; thread < nr_threads; thread++) { + ret = pthread_join(thread_id[thread], NULL); + assert(!ret); + } + free(work); + free(thread_id); } /* * Holding RCU read lock to protect _cds_lfht_add against memory * reclaim that could be performed by other call_rcu worker threads (ABA * problem). + * + * When we reach a certain length, we can split this population phase over + * many worker threads, based on the number of CPUs available in the system. + * This should therefore take care of not having the expand lagging behind too + * many concurrent insertion threads by using the scheduler's ability to + * schedule dummy node population fairly with insertions. */ static -void init_table_link(struct cds_lfht *ht, unsigned long i, unsigned long len) +void init_table_populate_partition(struct cds_lfht *ht, unsigned long i, + unsigned long start, unsigned long len) { unsigned long j; - ht->cds_lfht_rcu_thread_online(); ht->cds_lfht_rcu_read_lock(); - for (j = 0; j < len; j++) { + for (j = start; j < start + len; j++) { struct cds_lfht_node *new_node = (struct cds_lfht_node *) &ht->t.tbl[i]->nodes[j]; - dbg_printf("init link: i %lu j %lu hash %lu\n", + dbg_printf("init populate: i %lu j %lu hash %lu\n", i, j, !i ? 0 : (1UL << (i - 1)) + j); + new_node->p.reverse_hash = + bit_reverse_ulong(!i ? 0 : (1UL << (i - 1)) + j); (void) _cds_lfht_add(ht, !i ? 0 : (1UL << (i - 1)), new_node, 0, 1); if (CMM_LOAD_SHARED(ht->in_progress_destroy)) break; } ht->cds_lfht_rcu_read_unlock(); - ht->cds_lfht_rcu_thread_offline(); +} + +static +void init_table_populate(struct cds_lfht *ht, unsigned long i, + unsigned long len) +{ + assert(nr_cpus_mask != -1); + if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) { + ht->cds_lfht_rcu_thread_online(); + init_table_populate_partition(ht, i, 0, len); + ht->cds_lfht_rcu_thread_offline(); + return; + } + partition_resize_helper(ht, i, len, init_table_populate_partition); } static @@ -1005,24 +1017,19 @@ void init_table(struct cds_lfht *ht, ht->t.tbl[i] = calloc(1, sizeof(struct rcu_level) + (len * sizeof(struct _cds_lfht_node))); - - /* Set all dummy nodes reverse hash values for a level */ - init_table_hash(ht, i, len); + assert(ht->t.tbl[i]); /* - * Update table size. At this point, concurrent add/remove see - * dummy nodes with correctly initialized reverse hash value, - * but with NULL next pointers. If they do, they can help us - * link the dummy nodes into the list and retry. + * Set all dummy nodes reverse hash values for a level and + * link all dummy nodes into the table. */ - cmm_smp_wmb(); /* populate data before RCU size */ - CMM_STORE_SHARED(ht->t.size, !i ? 1 : (1UL << i)); + init_table_populate(ht, i, len); /* - * Link all dummy nodes into the table. Concurrent - * add/remove are helping us. + * Update table size. */ - init_table_link(ht, i, len); + cmm_smp_wmb(); /* populate data before RCU size */ + CMM_STORE_SHARED(ht->t.size, !i ? 1 : (1UL << i)); dbg_printf("init new size: %lu\n", !i ? 1 : (1UL << i)); if (CMM_LOAD_SHARED(ht->in_progress_destroy)) @@ -1049,15 +1056,20 @@ void init_table(struct cds_lfht *ht, * * Logical removal and garbage collection can therefore be done in batch or on a * node-per-node basis, as long as the guarantee above holds. + * + * When we reach a certain length, we can split this removal over many worker + * threads, based on the number of CPUs available in the system. This should + * take care of not letting resize process lag behind too many concurrent + * updater threads actively inserting into the hash table. */ static -void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len) +void remove_table_partition(struct cds_lfht *ht, unsigned long i, + unsigned long start, unsigned long len) { unsigned long j; - ht->cds_lfht_rcu_thread_online(); ht->cds_lfht_rcu_read_lock(); - for (j = 0; j < len; j++) { + for (j = start; j < start + len; j++) { struct cds_lfht_node *fini_node = (struct cds_lfht_node *) &ht->t.tbl[i]->nodes[j]; @@ -1065,13 +1077,26 @@ void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len) i, j, !i ? 0 : (1UL << (i - 1)) + j); fini_node->p.reverse_hash = bit_reverse_ulong(!i ? 0 : (1UL << (i - 1)) + j); - (void) _cds_lfht_remove(ht, !i ? 0 : (1UL << (i - 1)), + (void) _cds_lfht_del(ht, !i ? 0 : (1UL << (i - 1)), fini_node, 1); if (CMM_LOAD_SHARED(ht->in_progress_destroy)) break; } ht->cds_lfht_rcu_read_unlock(); - ht->cds_lfht_rcu_thread_offline(); +} + +static +void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len) +{ + + assert(nr_cpus_mask != -1); + if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) { + ht->cds_lfht_rcu_thread_online(); + remove_table_partition(ht, i, 0, len); + ht->cds_lfht_rcu_thread_offline(); + return; + } + partition_resize_helper(ht, i, len, remove_table_partition); } static @@ -1121,7 +1146,7 @@ void fini_table(struct cds_lfht *ht, } } -struct cds_lfht *cds_lfht_new(cds_lfht_hash_fct hash_fct, +struct cds_lfht *_cds_lfht_new(cds_lfht_hash_fct hash_fct, cds_lfht_compare_fct compare_fct, unsigned long hash_seed, unsigned long init_size, @@ -1132,7 +1157,10 @@ struct cds_lfht *cds_lfht_new(cds_lfht_hash_fct hash_fct, void (*cds_lfht_rcu_read_lock)(void), void (*cds_lfht_rcu_read_unlock)(void), void (*cds_lfht_rcu_thread_offline)(void), - void (*cds_lfht_rcu_thread_online)(void)) + void (*cds_lfht_rcu_thread_online)(void), + void (*cds_lfht_rcu_register_thread)(void), + void (*cds_lfht_rcu_unregister_thread)(void), + pthread_attr_t *attr) { struct cds_lfht *ht; unsigned long order; @@ -1141,6 +1169,7 @@ struct cds_lfht *cds_lfht_new(cds_lfht_hash_fct hash_fct, if (init_size && (init_size & (init_size - 1))) return NULL; ht = calloc(1, sizeof(struct cds_lfht)); + assert(ht); ht->hash_fct = hash_fct; ht->compare_fct = compare_fct; ht->hash_seed = hash_seed; @@ -1150,6 +1179,9 @@ struct cds_lfht *cds_lfht_new(cds_lfht_hash_fct hash_fct, ht->cds_lfht_rcu_read_unlock = cds_lfht_rcu_read_unlock; ht->cds_lfht_rcu_thread_offline = cds_lfht_rcu_thread_offline; ht->cds_lfht_rcu_thread_online = cds_lfht_rcu_thread_online; + ht->cds_lfht_rcu_register_thread = cds_lfht_rcu_register_thread; + ht->cds_lfht_rcu_unregister_thread = cds_lfht_rcu_unregister_thread; + ht->resize_attr = attr; ht->percpu_count = alloc_per_cpu_items_count(); /* this mutex should not nest in read-side C.S. */ pthread_mutex_init(&ht->resize_mutex, NULL); @@ -1173,7 +1205,6 @@ struct cds_lfht_node *cds_lfht_lookup(struct cds_lfht *ht, void *key, size_t key hash = ht->hash_fct(key, key_len, ht->hash_seed); reverse_hash = bit_reverse_ulong(hash); -restart: size = rcu_dereference(ht->t.size); index = hash & (size - 1); order = get_count_order_ulong(index + 1); @@ -1183,16 +1214,6 @@ restart: dummy_node = (struct cds_lfht_node *) lookup; /* We can always skip the dummy node initially */ node = rcu_dereference(dummy_node->p.next); - if (unlikely(node == NULL)) { - /* - * We are executing concurrently with a hash table - * expand, so we see a dummy node with NULL next value. - * Help expand by linking this node into the list and - * retry. - */ - (void) _cds_lfht_add(ht, size >> 1, dummy_node, 0, 1); - goto restart; /* retry */ - } node = clear_flag(node); for (;;) { if (unlikely(is_end(node))) { @@ -1278,15 +1299,15 @@ struct cds_lfht_node *cds_lfht_add_unique(struct cds_lfht *ht, return ret; } -int cds_lfht_remove(struct cds_lfht *ht, struct cds_lfht_node *node) +int cds_lfht_del(struct cds_lfht *ht, struct cds_lfht_node *node) { unsigned long size; int ret; size = rcu_dereference(ht->t.size); - ret = _cds_lfht_remove(ht, size, node, 0); + ret = _cds_lfht_del(ht, size, node, 0); if (!ret) - ht_count_remove(ht, size); + ht_count_del(ht, size); return ret; } @@ -1331,7 +1352,7 @@ int cds_lfht_delete_dummy(struct cds_lfht *ht) * Should only be called when no more concurrent readers nor writers can * possibly access the table. */ -int cds_lfht_destroy(struct cds_lfht *ht) +int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr) { int ret; @@ -1343,6 +1364,8 @@ int cds_lfht_destroy(struct cds_lfht *ht) if (ret) return ret; free_per_cpu_items_count(ht->percpu_count); + if (attr) + *attr = ht->resize_attr; poison_free(ht); return ret; }