* Userspace RCU library - Lock-Free Resizable RCU Hash Table
*
* Copyright 2010-2011 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright 2011 - Lai Jiangshan <laijs@cn.fujitsu.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
/*
* Define the minimum table size.
*/
-#define MIN_TABLE_SIZE 1
+#define MIN_TABLE_ORDER 0
+#define MIN_TABLE_SIZE (1UL << MIN_TABLE_ORDER)
#if (CAA_BITS_PER_LONG == 32)
#define MAX_TABLE_ORDER 32
unsigned long add, del;
} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
-/*
- * rcu_level: Contains the per order-index-level bucket node table. The
- * size of each bucket node table is half the number of hashes contained
- * in this order (except for order 0). The minimum allocation size
- * parameter allows combining the bucket node arrays of the lowermost
- * levels to improve cache locality for small index orders.
- */
-struct rcu_level {
- /* Note: manually update allocation length when adding a field */
- struct cds_lfht_node nodes[0];
-};
-
/*
* rcu_table: Contains the size and desired new size if a resize
* operation is in progress, as well as the statically-sized array of
- * rcu_level pointers.
+ * bucket table pointers.
*/
struct rcu_table {
unsigned long size; /* always a power of 2, shared (RCU) */
unsigned long resize_target;
int resize_initiated;
- struct rcu_level *tbl[MAX_TABLE_ORDER];
+
+ /*
+ * Contains the per order-index-level bucket node table. The size
+ * of each bucket node table is half the number of hashes contained
+ * in this order (except for order 0). The minimum allocation size
+ * parameter allows combining the bucket node arrays of the lowermost
+ * levels to improve cache locality for small index orders.
+ */
+ struct cds_lfht_node *tbl[MAX_TABLE_ORDER];
};
/*
*/
struct cds_lfht {
struct rcu_table t;
- unsigned long min_alloc_order;
- unsigned long min_alloc_size;
+ unsigned long min_alloc_buckets_order;
+ unsigned long min_nr_alloc_buckets;
+ unsigned long max_nr_buckets;
int flags;
/*
* We need to put the work threads offline (QSBR) when taking this
static
void _cds_lfht_add(struct cds_lfht *ht,
cds_lfht_match_fct match,
- void *key,
+ const void *key,
unsigned long size,
struct cds_lfht_node *node,
struct cds_lfht_iter *unique_ret,
}
static
-struct cds_lfht_node *lookup_bucket(struct cds_lfht *ht, unsigned long size,
- unsigned long hash)
+void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order)
+{
+ if (order == 0) {
+ ht->t.tbl[0] = calloc(ht->min_nr_alloc_buckets,
+ sizeof(struct cds_lfht_node));
+ assert(ht->t.tbl[0]);
+ } else if (order > ht->min_alloc_buckets_order) {
+ ht->t.tbl[order] = calloc(1UL << (order -1),
+ sizeof(struct cds_lfht_node));
+ assert(ht->t.tbl[order]);
+ }
+ /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */
+}
+
+/*
+ * cds_lfht_free_bucket_table() should be called with decreasing order.
+ * When cds_lfht_free_bucket_table(0) is called, it means the whole
+ * lfht is destroyed.
+ */
+static
+void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order)
{
- unsigned long index, order;
+ if (order == 0)
+ poison_free(ht->t.tbl[0]);
+ else if (order > ht->min_alloc_buckets_order)
+ poison_free(ht->t.tbl[order]);
+ /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */
+}
- assert(size > 0);
- index = hash & (size - 1);
+static inline
+struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index)
+{
+ unsigned long order;
- if (index < ht->min_alloc_size) {
- dbg_printf("lookup hash %lu index %lu order 0 aridx 0\n",
- hash, index);
- return &ht->t.tbl[0]->nodes[index];
+ if ((__builtin_constant_p(index) && index == 0)
+ || index < ht->min_nr_alloc_buckets) {
+ dbg_printf("bucket index %lu order 0 aridx 0\n", index);
+ return &ht->t.tbl[0][index];
}
/*
* equivalent to get_count_order_ulong(index + 1), but optimizes
* get_count_order_ulong.
*/
order = fls_ulong(index);
- dbg_printf("lookup hash %lu index %lu order %lu aridx %lu\n",
- hash, index, order, index & ((1UL << (order - 1)) - 1));
- return &ht->t.tbl[order]->nodes[index & ((1UL << (order - 1)) - 1)];
+ dbg_printf("bucket index %lu order %lu aridx %lu\n",
+ index, order, index & ((1UL << (order - 1)) - 1));
+ return &ht->t.tbl[order][index & ((1UL << (order - 1)) - 1)];
+}
+
+static inline
+struct cds_lfht_node *lookup_bucket(struct cds_lfht *ht, unsigned long size,
+ unsigned long hash)
+{
+ assert(size > 0);
+ return bucket_at(ht, hash & (size - 1));
}
/*
static
void _cds_lfht_add(struct cds_lfht *ht,
cds_lfht_match_fct match,
- void *key,
+ const void *key,
unsigned long size,
struct cds_lfht_node *node,
struct cds_lfht_iter *unique_ret,
void init_table_populate_partition(struct cds_lfht *ht, unsigned long i,
unsigned long start, unsigned long len)
{
- unsigned long j;
+ unsigned long j, size = 1UL << (i - 1);
- assert(i > ht->min_alloc_order);
+ assert(i > MIN_TABLE_ORDER);
ht->cds_lfht_rcu_read_lock();
- for (j = start; j < start + len; j++) {
- struct cds_lfht_node *new_node = &ht->t.tbl[i]->nodes[j];
-
- dbg_printf("init populate: i %lu j %lu hash %lu\n",
- i, j, (1UL << (i - 1)) + j);
- new_node->reverse_hash =
- bit_reverse_ulong((1UL << (i - 1)) + j);
- _cds_lfht_add(ht, NULL, NULL, 1UL << (i - 1),
- new_node, NULL, 1);
+ for (j = size + start; j < size + start + len; j++) {
+ struct cds_lfht_node *new_node = bucket_at(ht, j);
+
+ assert(j >= size && j < (size << 1));
+ dbg_printf("init populate: order %lu index %lu hash %lu\n",
+ i, j, j);
+ new_node->reverse_hash = bit_reverse_ulong(j);
+ _cds_lfht_add(ht, NULL, NULL, size, new_node, NULL, 1);
}
ht->cds_lfht_rcu_read_unlock();
}
dbg_printf("init table: first_order %lu last_order %lu\n",
first_order, last_order);
- assert(first_order > ht->min_alloc_order);
+ assert(first_order > MIN_TABLE_ORDER);
for (i = first_order; i <= last_order; i++) {
unsigned long len;
if (CMM_LOAD_SHARED(ht->t.resize_target) < (1UL << i))
break;
- ht->t.tbl[i] = calloc(1, len * sizeof(struct cds_lfht_node));
- assert(ht->t.tbl[i]);
+ cds_lfht_alloc_bucket_table(ht, i);
/*
* Set all bucket nodes reverse hash values for a level and
void remove_table_partition(struct cds_lfht *ht, unsigned long i,
unsigned long start, unsigned long len)
{
- unsigned long j;
+ unsigned long j, size = 1UL << (i - 1);
- assert(i > ht->min_alloc_order);
+ assert(i > MIN_TABLE_ORDER);
ht->cds_lfht_rcu_read_lock();
- for (j = start; j < start + len; j++) {
- struct cds_lfht_node *fini_node = &ht->t.tbl[i]->nodes[j];
-
- dbg_printf("remove entry: i %lu j %lu hash %lu\n",
- i, j, (1UL << (i - 1)) + j);
- fini_node->reverse_hash =
- bit_reverse_ulong((1UL << (i - 1)) + j);
- (void) _cds_lfht_del(ht, 1UL << (i - 1), fini_node, 1);
+ for (j = size + start; j < size + start + len; j++) {
+ struct cds_lfht_node *fini_node = bucket_at(ht, j);
+
+ assert(j >= size && j < (size << 1));
+ dbg_printf("remove entry: order %lu index %lu hash %lu\n",
+ i, j, j);
+ fini_node->reverse_hash = bit_reverse_ulong(j);
+ (void) _cds_lfht_del(ht, size, fini_node, 1);
}
ht->cds_lfht_rcu_read_unlock();
}
unsigned long first_order, unsigned long last_order)
{
long i;
- void *free_by_rcu = NULL;
+ unsigned long free_by_rcu_order = 0;
dbg_printf("fini table: first_order %lu last_order %lu\n",
first_order, last_order);
- assert(first_order > ht->min_alloc_order);
+ assert(first_order > MIN_TABLE_ORDER);
for (i = last_order; i >= first_order; i--) {
unsigned long len;
* return a logically removed node as insert position.
*/
ht->cds_lfht_synchronize_rcu();
- if (free_by_rcu)
- free(free_by_rcu);
+ if (free_by_rcu_order)
+ cds_lfht_free_bucket_table(ht, free_by_rcu_order);
/*
* Set "removed" flag in bucket nodes about to be removed.
*/
remove_table(ht, i, len);
- free_by_rcu = ht->t.tbl[i];
+ free_by_rcu_order = i;
dbg_printf("fini new size: %lu\n", 1UL << i);
if (CMM_LOAD_SHARED(ht->in_progress_destroy))
break;
}
- if (free_by_rcu) {
+ if (free_by_rcu_order) {
ht->cds_lfht_synchronize_rcu();
- free(free_by_rcu);
+ cds_lfht_free_bucket_table(ht, free_by_rcu_order);
}
}
void cds_lfht_create_bucket(struct cds_lfht *ht, unsigned long size)
{
struct cds_lfht_node *prev, *node;
- unsigned long order, len, i, j;
+ unsigned long order, len, i;
- ht->t.tbl[0] = calloc(1, ht->min_alloc_size * sizeof(struct cds_lfht_node));
- assert(ht->t.tbl[0]);
+ cds_lfht_alloc_bucket_table(ht, 0);
- dbg_printf("create bucket: order %lu index %lu hash %lu\n", 0, 0, 0);
- ht->t.tbl[0]->nodes[0].next = flag_bucket(get_end());
- ht->t.tbl[0]->nodes[0].reverse_hash = 0;
+ dbg_printf("create bucket: order 0 index 0 hash 0\n");
+ node = bucket_at(ht, 0);
+ node->next = flag_bucket(get_end());
+ node->reverse_hash = 0;
for (order = 1; order < get_count_order_ulong(size) + 1; order++) {
len = 1UL << (order - 1);
- if (order <= ht->min_alloc_order) {
- ht->t.tbl[order] = (struct rcu_level *) (ht->t.tbl[0]->nodes + len);
- } else {
- ht->t.tbl[order] = calloc(1, len * sizeof(struct cds_lfht_node));
- assert(ht->t.tbl[order]);
- }
+ cds_lfht_alloc_bucket_table(ht, order);
- i = 0;
- prev = ht->t.tbl[i]->nodes;
- for (j = 0; j < len; j++) {
- if (j & (j - 1)) { /* Between power of 2 */
- prev++;
- } else if (j) { /* At each power of 2 */
- i++;
- prev = ht->t.tbl[i]->nodes;
- }
+ for (i = 0; i < len; i++) {
+ /*
+ * Now, we are trying to init the node with the
+ * hash=(len+i) (which is also a bucket with the
+ * index=(len+i)) and insert it into the hash table,
+ * so this node has to be inserted after the bucket
+ * with the index=(len+i)&(len-1)=i. And because there
+ * is no other non-bucket node nor bucket node with
+ * larger index/hash inserted, so the bucket node
+ * being inserted should be inserted directly linked
+ * after the bucket node with index=i.
+ */
+ prev = bucket_at(ht, i);
+ node = bucket_at(ht, len + i);
- node = &ht->t.tbl[order]->nodes[j];
dbg_printf("create bucket: order %lu index %lu hash %lu\n",
- order, j, j + len);
+ order, len + i, len + i);
+ node->reverse_hash = bit_reverse_ulong(len + i);
+
+ /* insert after prev */
+ assert(is_bucket(prev->next));
node->next = prev->next;
- assert(is_bucket(node->next));
- node->reverse_hash = bit_reverse_ulong(j + len);
prev->next = flag_bucket(node);
}
}
}
struct cds_lfht *_cds_lfht_new(unsigned long init_size,
- unsigned long min_alloc_size,
+ unsigned long min_nr_alloc_buckets,
+ unsigned long max_nr_buckets,
int flags,
void (*cds_lfht_call_rcu)(struct rcu_head *head,
void (*func)(struct rcu_head *head)),
struct cds_lfht *ht;
unsigned long order;
- /* min_alloc_size must be power of two */
- if (!min_alloc_size || (min_alloc_size & (min_alloc_size - 1)))
+ /* min_nr_alloc_buckets must be power of two */
+ if (!min_nr_alloc_buckets || (min_nr_alloc_buckets & (min_nr_alloc_buckets - 1)))
return NULL;
+
/* init_size must be power of two */
if (!init_size || (init_size & (init_size - 1)))
return NULL;
- min_alloc_size = max(min_alloc_size, MIN_TABLE_SIZE);
- init_size = max(init_size, min_alloc_size);
+
+ if (!max_nr_buckets)
+ max_nr_buckets = 1UL << (MAX_TABLE_ORDER - 1);
+
+ /* max_nr_buckets must be power of two */
+ if (!max_nr_buckets || (max_nr_buckets & (max_nr_buckets - 1)))
+ return NULL;
+
+ min_nr_alloc_buckets = max(min_nr_alloc_buckets, MIN_TABLE_SIZE);
+ init_size = max(init_size, MIN_TABLE_SIZE);
+ max_nr_buckets = max(max_nr_buckets, min_nr_alloc_buckets);
+ init_size = min(init_size, max_nr_buckets);
ht = calloc(1, sizeof(struct cds_lfht));
assert(ht);
ht->flags = flags;
pthread_mutex_init(&ht->resize_mutex, NULL);
order = get_count_order_ulong(init_size);
ht->t.resize_target = 1UL << order;
- ht->min_alloc_size = min_alloc_size;
- ht->min_alloc_order = get_count_order_ulong(min_alloc_size);
+ ht->min_nr_alloc_buckets = min_nr_alloc_buckets;
+ ht->min_alloc_buckets_order = get_count_order_ulong(min_nr_alloc_buckets);
+ ht->max_nr_buckets = max_nr_buckets;
cds_lfht_create_bucket(ht, 1UL << order);
ht->t.size = 1UL << order;
return ht;
}
void cds_lfht_lookup(struct cds_lfht *ht, unsigned long hash,
- cds_lfht_match_fct match, void *key,
+ cds_lfht_match_fct match, const void *key,
struct cds_lfht_iter *iter)
{
struct cds_lfht_node *node, *next, *bucket;
}
void cds_lfht_next_duplicate(struct cds_lfht *ht, cds_lfht_match_fct match,
- void *key, struct cds_lfht_iter *iter)
+ const void *key, struct cds_lfht_iter *iter)
{
struct cds_lfht_node *node, *next;
unsigned long reverse_hash;
void cds_lfht_first(struct cds_lfht *ht, struct cds_lfht_iter *iter)
{
- struct cds_lfht_node *lookup;
-
/*
* Get next after first bucket node. The first bucket node is the
* first node of the linked list.
*/
- lookup = &ht->t.tbl[0]->nodes[0];
- iter->next = lookup->next;
+ iter->next = bucket_at(ht, 0)->next;
cds_lfht_next(ht, iter);
}
struct cds_lfht_node *cds_lfht_add_unique(struct cds_lfht *ht,
unsigned long hash,
cds_lfht_match_fct match,
- void *key,
+ const void *key,
struct cds_lfht_node *node)
{
unsigned long size;
struct cds_lfht_node *cds_lfht_add_replace(struct cds_lfht *ht,
unsigned long hash,
cds_lfht_match_fct match,
- void *key,
+ const void *key,
struct cds_lfht_node *node)
{
unsigned long size;
unsigned long order, i, size;
/* Check that the table is empty */
- node = &ht->t.tbl[0]->nodes[0];
+ node = bucket_at(ht, 0);
do {
node = clear_flag(node)->next;
if (!is_bucket(node))
*/
size = ht->t.size;
/* Internal sanity check: all nodes left should be bucket */
- for (order = 0; order < get_count_order_ulong(size) + 1; order++) {
- unsigned long len;
+ for (i = 0; i < size; i++) {
+ node = bucket_at(ht, i);
+ dbg_printf("delete bucket: index %lu expected hash %lu hash %lu\n",
+ i, i, bit_reverse_ulong(node->reverse_hash));
+ assert(is_bucket(node->next));
+ }
- len = !order ? 1 : 1UL << (order - 1);
- for (i = 0; i < len; i++) {
- dbg_printf("delete order %lu i %lu hash %lu\n",
- order, i,
- bit_reverse_ulong(ht->t.tbl[order]->nodes[i].reverse_hash));
- assert(is_bucket(ht->t.tbl[order]->nodes[i].next));
- }
+ for (order = get_count_order_ulong(size); (long)order >= 0; order--)
+ cds_lfht_free_bucket_table(ht, order);
- if (order == ht->min_alloc_order)
- poison_free(ht->t.tbl[0]);
- else if (order > ht->min_alloc_order)
- poison_free(ht->t.tbl[order]);
- /* Nothing to delete for order < ht->min_alloc_order */
- }
return 0;
}
*removed = 0;
/* Count non-bucket nodes in the table */
- node = &ht->t.tbl[0]->nodes[0];
+ node = bucket_at(ht, 0);
do {
next = rcu_dereference(node->next);
if (is_removed(next)) {
{
unsigned long old_order, new_order;
- new_size = max(new_size, ht->min_alloc_size);
+ new_size = max(new_size, MIN_TABLE_SIZE);
old_order = get_count_order_ulong(old_size);
new_order = get_count_order_ulong(new_size);
dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n",
void resize_target_update_count(struct cds_lfht *ht,
unsigned long count)
{
- count = max(count, ht->min_alloc_size);
+ count = max(count, MIN_TABLE_SIZE);
+ count = min(count, ht->max_nr_buckets);
uatomic_set(&ht->t.resize_target, count);
}
{
unsigned long target_size = size << growth;
+ target_size = min(target_size, ht->max_nr_buckets);
if (resize_target_grow(ht, target_size) >= target_size)
return;
{
if (!(ht->flags & CDS_LFHT_AUTO_RESIZE))
return;
- count = max(count, ht->min_alloc_size);
+ count = max(count, MIN_TABLE_SIZE);
+ count = min(count, ht->max_nr_buckets);
if (count == size)
return; /* Already the right size, no resize needed */
if (count > size) { /* lazy grow */