- t->tbl[i] = calloc(len, sizeof(struct _cds_lfht_node));
- for (j = 0; j < len; j++) {
- dbg_printf("init entry: i %lu j %lu hash %lu\n",
- i, j, !i ? 0 : (1UL << (i - 1)) + j);
- struct cds_lfht_node *new_node =
- (struct cds_lfht_node *) &t->tbl[i][j];
- new_node->p.reverse_hash =
- bit_reverse_ulong(!i ? 0 : (1UL << (i - 1)) + j);
- (void) _cds_lfht_add(ht, t, new_node, 0, 1);
- if (CMM_LOAD_SHARED(ht->in_progress_destroy))
- break;
- }
- /* Update table size */
- t->size = !i ? 1 : (1UL << i);
- dbg_printf("init new size: %lu\n", t->size);
+
+ /* Stop expand if the resize target changes under us */
+ if (CMM_LOAD_SHARED(ht->resize_target) < (1UL << i))
+ break;
+
+ cds_lfht_alloc_bucket_table(ht, i);
+
+ /*
+ * Set all bucket nodes reverse hash values for a level and
+ * link all bucket nodes into the table.
+ */
+ init_table_populate(ht, i, len);
+
+ /*
+ * Update table size.
+ */
+ cmm_smp_wmb(); /* populate data before RCU size */
+ CMM_STORE_SHARED(ht->size, 1UL << i);
+
+ dbg_printf("init new size: %lu\n", 1UL << i);
+ if (CMM_LOAD_SHARED(ht->in_progress_destroy))
+ break;
+ }
+}
+
+/*
+ * Holding RCU read lock to protect _cds_lfht_remove against memory
+ * reclaim that could be performed by other call_rcu worker threads (ABA
+ * problem).
+ * For a single level, we logically remove and garbage collect each node.
+ *
+ * As a design choice, we perform logical removal and garbage collection on a
+ * node-per-node basis to simplify this algorithm. We also assume keeping good
+ * cache locality of the operation would overweight possible performance gain
+ * that could be achieved by batching garbage collection for multiple levels.
+ * However, this would have to be justified by benchmarks.
+ *
+ * Concurrent removal and add operations are helping us perform garbage
+ * collection of logically removed nodes. We guarantee that all logically
+ * removed nodes have been garbage-collected (unlinked) before call_rcu is
+ * invoked to free a hole level of bucket nodes (after a grace period).
+ *
+ * Logical removal and garbage collection can therefore be done in batch or on a
+ * node-per-node basis, as long as the guarantee above holds.
+ *
+ * When we reach a certain length, we can split this removal over many worker
+ * threads, based on the number of CPUs available in the system. This should
+ * take care of not letting resize process lag behind too many concurrent
+ * updater threads actively inserting into the hash table.
+ */
+static
+void remove_table_partition(struct cds_lfht *ht, unsigned long i,
+ unsigned long start, unsigned long len)
+{
+ unsigned long j, size = 1UL << (i - 1);
+
+ assert(i > MIN_TABLE_ORDER);
+ ht->flavor->read_lock();
+ for (j = size + start; j < size + start + len; j++) {
+ struct cds_lfht_node *fini_node = bucket_at(ht, j);
+
+ assert(j >= size && j < (size << 1));
+ dbg_printf("remove entry: order %lu index %lu hash %lu\n",
+ i, j, j);
+ fini_node->reverse_hash = bit_reverse_ulong(j);
+ (void) _cds_lfht_del(ht, size, fini_node, 1);
+ }
+ ht->flavor->read_unlock();
+}
+
+static
+void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len)
+{
+
+ assert(nr_cpus_mask != -1);
+ if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) {
+ ht->flavor->thread_online();
+ remove_table_partition(ht, i, 0, len);
+ ht->flavor->thread_offline();
+ return;
+ }
+ partition_resize_helper(ht, i, len, remove_table_partition);
+}
+
+static
+void fini_table(struct cds_lfht *ht,
+ unsigned long first_order, unsigned long last_order)
+{
+ long i;
+ unsigned long free_by_rcu_order = 0;
+
+ dbg_printf("fini table: first_order %lu last_order %lu\n",
+ first_order, last_order);
+ assert(first_order > MIN_TABLE_ORDER);
+ for (i = last_order; i >= first_order; i--) {
+ unsigned long len;
+
+ len = 1UL << (i - 1);
+ dbg_printf("fini order %lu len: %lu\n", i, len);
+
+ /* Stop shrink if the resize target changes under us */
+ if (CMM_LOAD_SHARED(ht->resize_target) > (1UL << (i - 1)))
+ break;
+
+ cmm_smp_wmb(); /* populate data before RCU size */
+ CMM_STORE_SHARED(ht->size, 1UL << (i - 1));
+
+ /*
+ * We need to wait for all add operations to reach Q.S. (and
+ * thus use the new table for lookups) before we can start
+ * releasing the old bucket nodes. Otherwise their lookup will
+ * return a logically removed node as insert position.
+ */
+ ht->flavor->update_synchronize_rcu();
+ if (free_by_rcu_order)
+ cds_lfht_free_bucket_table(ht, free_by_rcu_order);
+
+ /*
+ * Set "removed" flag in bucket nodes about to be removed.
+ * Unlink all now-logically-removed bucket node pointers.
+ * Concurrent add/remove operation are helping us doing
+ * the gc.
+ */
+ remove_table(ht, i, len);
+
+ free_by_rcu_order = i;
+
+ dbg_printf("fini new size: %lu\n", 1UL << i);