* order bits reverse
* 0 0 000 000
* |
- * 1 | 1 001 100 <-
- * | | |
- * 2 | | 2 010 010 |
- * | | | 3 011 110 <- |
- * | | | | | |
+ * 1 | 1 001 100 <- <-
+ * | | | |
+ * 2 | | 2 010 010 | |
+ * | | | 3 011 110 | <- |
+ * | | | | | | |
* 3 -> | | | 4 100 001 | |
* -> | | 5 101 101 |
* -> | 6 110 011
#define dbg_printf(fmt, args...)
#endif
+/* For testing */
+#define POISON_FREE
+
/*
* Per-CPU split-counters lazily update the global counter each 1024
* addition/removal. It automatically keeps track of resize required.
#define CHAIN_LEN_TARGET 1
#define CHAIN_LEN_RESIZE_THRESHOLD 3
+/*
+ * Define the minimum table size. Protects against hash table resize overload
+ * when too many entries are added quickly before the resize can complete.
+ * This is especially the case if the table could be shrinked to a size of 1.
+ * TODO: we might want to make the add/remove operations help the resize to
+ * add or remove dummy nodes when a resize is ongoing to ensure upper-bound on
+ * chain length.
+ */
+#define MIN_TABLE_SIZE 128
+
#ifndef max
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
cds_lfht_hash_fct hash_fct;
cds_lfht_compare_fct compare_fct;
unsigned long hash_seed;
+ int flags;
pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */
unsigned int in_progress_resize, in_progress_destroy;
void (*cds_lfht_call_rcu)(struct rcu_head *head,
void (*func)(struct rcu_head *head));
void (*cds_lfht_synchronize_rcu)(void);
+ void (*cds_lfht_rcu_read_lock)(void);
+ void (*cds_lfht_rcu_read_unlock)(void);
unsigned long count; /* global approximate item count */
struct ht_items_count *percpu_count; /* per-cpu item count */
};
return order;
}
+#ifdef POISON_FREE
+#define poison_free(ptr) \
+ do { \
+ memset(ptr, 0x42, sizeof(*(ptr))); \
+ free(ptr); \
+ } while (0)
+#else
+#define poison_free(ptr) free(ptr)
+#endif
+
static
void cds_lfht_resize_lazy(struct cds_lfht *ht, struct rcu_table *t, int growth);
static
void free_per_cpu_items_count(struct ht_items_count *count)
{
- free(count);
+ poison_free(count);
}
static
{
unsigned long count;
+ if (!(ht->flags & CDS_LFHT_AUTO_RESIZE))
+ return;
count = uatomic_read(&ht->count);
/*
* Use bucket-local length for small table expand and for
{
struct rcu_table *t =
caa_container_of(head, struct rcu_table, head);
- free(t);
+ poison_free(t);
}
static
{
struct rcu_level *l =
caa_container_of(head, struct rcu_level, head);
- free(l);
+ poison_free(l);
}
/*
void _cds_lfht_gc_bucket(struct cds_lfht_node *dummy, struct cds_lfht_node *node)
{
struct cds_lfht_node *iter_prev, *iter, *next, *new_next;
-
+ struct cds_lfht_node *iter_trace[64];
+ unsigned long trace_idx = 0;
+
+ memset(iter_trace, 0, sizeof(iter_trace));
+ assert(!is_dummy(dummy));
+ assert(!is_removed(dummy));
+ assert(!is_dummy(node));
+ assert(!is_removed(node));
for (;;) {
+ iter_trace[trace_idx++ & (64 - 1)] = (void *) 0x1;
iter_prev = dummy;
/* We can always skip the dummy node initially */
iter = rcu_dereference(iter_prev->p.next);
+ iter_trace[trace_idx++ & (64 - 1)] = iter;
assert(iter_prev->p.reverse_hash <= node->p.reverse_hash);
+ /*
+ * We should never be called with dummy (start of chain)
+ * and logically removed node (end of path compression
+ * marker) being the actual same node. This would be a
+ * bug in the algorithm implementation.
+ */
+ assert(dummy != node);
for (;;) {
if (unlikely(!clear_flag(iter)))
return;
break;
iter_prev = clear_flag(iter);
iter = next;
+ iter_trace[trace_idx++ & (64 - 1)] = iter;
}
assert(!is_removed(iter));
if (is_dummy(iter))
else
new_next = clear_flag(next);
(void) uatomic_cmpxchg(&iter_prev->p.next, iter, new_next);
+ iter_trace[trace_idx++ & (64 - 1)] = (void *) 0x2;
}
}
struct _cds_lfht_node *lookup;
unsigned long hash, index, order;
+ assert(!is_dummy(node));
+ assert(!is_removed(node));
if (!t->size) {
assert(dummy);
node->p.next = flag_dummy(NULL);
iter = rcu_dereference(iter_prev->p.next);
assert(iter_prev->p.reverse_hash <= node->p.reverse_hash);
for (;;) {
+ /* TODO: check if removed */
if (unlikely(!clear_flag(iter)))
goto insert;
+ /* TODO: check if removed */
if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash))
goto insert;
next = rcu_dereference(clear_flag(iter)->p.next);
insert:
assert(node != clear_flag(iter));
assert(!is_removed(iter_prev));
+ assert(!is_removed(iter));
assert(iter_prev != node);
if (!dummy)
node->p.next = clear_flag(iter);
unsigned long hash, index, order;
/* logically delete the node */
+ assert(!is_dummy(node));
+ assert(!is_removed(node));
old = rcu_dereference(node->p.next);
do {
next = old;
/* We performed the (logical) deletion. */
flagged = 1;
- if (dummy_removal)
- node = clear_flag(node);
-
/*
* Ensure that the node is not visible to readers anymore: lookup for
* the node, and remove it (along with any other logically removed node)
return -ENOENT;
}
+/*
+ * Holding RCU read lock to protect _cds_lfht_add against memory
+ * reclaim that could be performed by other call_rcu worker threads (ABA
+ * problem).
+ */
static
void init_table(struct cds_lfht *ht, struct rcu_table *t,
unsigned long first_order, unsigned long len_order)
dbg_printf("init order %lu len: %lu\n", i, len);
t->tbl[i] = calloc(1, sizeof(struct rcu_level)
+ (len * sizeof(struct _cds_lfht_node)));
+ ht->cds_lfht_rcu_read_lock();
for (j = 0; j < len; j++) {
struct cds_lfht_node *new_node =
(struct cds_lfht_node *) &t->tbl[i]->nodes[j];
if (CMM_LOAD_SHARED(ht->in_progress_destroy))
break;
}
+ ht->cds_lfht_rcu_read_unlock();
/* Update table size */
t->size = !i ? 1 : (1UL << i);
dbg_printf("init new size: %lu\n", t->size);
t->resize_initiated = 0;
}
+/*
+ * Holding RCU read lock to protect _cds_lfht_remove against memory
+ * reclaim that could be performed by other call_rcu worker threads (ABA
+ * problem).
+ */
static
void fini_table(struct cds_lfht *ht, struct rcu_table *t,
unsigned long first_order, unsigned long len_order)
len = !i ? 1 : 1UL << (i - 1);
dbg_printf("fini order %lu len: %lu\n", i, len);
- /* Update table size */
+ /*
+ * Update table size. Need to shrink this table prior to
+ * removal so gc lookups use non-logically-removed dummy
+ * nodes.
+ */
t->size = 1UL << (i - 1);
/* Unlink */
+ ht->cds_lfht_rcu_read_lock();
for (j = 0; j < len; j++) {
- struct cds_lfht_node *new_node =
+ struct cds_lfht_node *fini_node =
(struct cds_lfht_node *) &t->tbl[i]->nodes[j];
dbg_printf("fini entry: i %lu j %lu hash %lu\n",
i, j, !i ? 0 : (1UL << (i - 1)) + j);
- new_node->p.reverse_hash =
+ fini_node->p.reverse_hash =
bit_reverse_ulong(!i ? 0 : (1UL << (i - 1)) + j);
- (void) _cds_lfht_remove(ht, t, new_node, 1);
+ (void) _cds_lfht_remove(ht, t, fini_node, 1);
if (CMM_LOAD_SHARED(ht->in_progress_destroy))
break;
}
+ ht->cds_lfht_rcu_read_unlock();
ht->cds_lfht_call_rcu(&t->tbl[i]->head, cds_lfht_free_level);
dbg_printf("fini new size: %lu\n", t->size);
if (CMM_LOAD_SHARED(ht->in_progress_destroy))
cds_lfht_compare_fct compare_fct,
unsigned long hash_seed,
unsigned long init_size,
+ int flags,
void (*cds_lfht_call_rcu)(struct rcu_head *head,
void (*func)(struct rcu_head *head)),
- void (*cds_lfht_synchronize_rcu)(void))
+ void (*cds_lfht_synchronize_rcu)(void),
+ void (*cds_lfht_rcu_read_lock)(void),
+ void (*cds_lfht_rcu_read_unlock)(void))
{
struct cds_lfht *ht;
unsigned long order;
ht->hash_seed = hash_seed;
ht->cds_lfht_call_rcu = cds_lfht_call_rcu;
ht->cds_lfht_synchronize_rcu = cds_lfht_synchronize_rcu;
+ ht->cds_lfht_rcu_read_lock = cds_lfht_rcu_read_lock;
+ ht->cds_lfht_rcu_read_unlock = cds_lfht_rcu_read_unlock;
ht->in_progress_resize = 0;
ht->percpu_count = alloc_per_cpu_items_count();
/* this mutex should not nest in read-side C.S. */
pthread_mutex_init(&ht->resize_mutex, NULL);
- order = get_count_order_ulong(max(init_size, 1)) + 1;
+ order = get_count_order_ulong(max(init_size, MIN_TABLE_SIZE)) + 1;
ht->t = calloc(1, sizeof(struct cds_lfht)
+ (order * sizeof(struct rcu_level *)));
ht->t->size = 0;
+ ht->flags = flags;
pthread_mutex_lock(&ht->resize_mutex);
init_table(ht, ht->t, 0, order);
pthread_mutex_unlock(&ht->resize_mutex);
bit_reverse_ulong(t->tbl[order]->nodes[i].reverse_hash));
assert(is_dummy(t->tbl[order]->nodes[i].next));
}
- free(t->tbl[order]);
+ poison_free(t->tbl[order]);
}
return 0;
}
ret = cds_lfht_delete_dummy(ht);
if (ret)
return ret;
- free(ht->t);
+ poison_free(ht->t);
free_per_cpu_items_count(ht->percpu_count);
- free(ht);
+ poison_free(ht);
return ret;
}
unsigned long old_order, new_order;
struct rcu_table *new_t;
- new_size = max(new_size, 1);
+ new_size = max(new_size, MIN_TABLE_SIZE);
old_order = get_count_order_ulong(old_size) + 1;
new_order = get_count_order_ulong(new_size) + 1;
printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n",
memcpy(&new_t->tbl, &old_t->tbl,
new_order * sizeof(struct rcu_level *));
new_t->size = !new_order ? 1 : (1UL << (new_order - 1));
+ assert(new_t->size == new_size);
new_t->resize_target = new_t->size;
new_t->resize_initiated = 0;
rcu_assign_pointer(ht->t, new_t);
/*
- * We need to wait for all reader threads to reach Q.S. (and
+ * We need to wait for all add operations to reach Q.S. (and
* thus use the new table for lookups) before we can start
- * releasing the old dummy nodes.
+ * releasing the old dummy nodes. Otherwise their lookup will
+ * return a logically removed node as insert position.
*/
ht->cds_lfht_synchronize_rcu();
}
static
-unsigned long resize_target_update_count(struct rcu_table *t,
- unsigned long count)
+void resize_target_update_count(struct rcu_table *t,
+ unsigned long count)
{
- count = max(count, 1);
- return uatomic_set(&t->resize_target, count);
+ count = max(count, MIN_TABLE_SIZE);
+ uatomic_set(&t->resize_target, count);
}
void cds_lfht_resize(struct cds_lfht *ht, unsigned long new_size)
{
struct rcu_table *t = rcu_dereference(ht->t);
- unsigned long target_size;
- target_size = resize_target_update_count(t, new_size);
+ resize_target_update_count(t, new_size);
CMM_STORE_SHARED(t->resize_initiated, 1);
pthread_mutex_lock(&ht->resize_mutex);
_do_cds_lfht_resize(ht);
pthread_mutex_lock(&ht->resize_mutex);
_do_cds_lfht_resize(ht);
pthread_mutex_unlock(&ht->resize_mutex);
- free(work);
+ poison_free(work);
cmm_smp_mb(); /* finish resize before decrement */
uatomic_dec(&ht->in_progress_resize);
}
unsigned long count)
{
struct rcu_resize_work *work;
- unsigned long target_size;
- target_size = resize_target_update_count(t, count);
+ if (!(ht->flags & CDS_LFHT_AUTO_RESIZE))
+ return;
+ resize_target_update_count(t, count);
if (!CMM_LOAD_SHARED(t->resize_initiated)) {
uatomic_inc(&ht->in_progress_resize);
cmm_smp_mb(); /* increment resize count before calling it */