3 * TODO: keys are currently assumed <= sizeof(void *). Key target never freed.
13 #include <urcu-defer.h>
14 #include <urcu/arch.h>
15 #include <urcu/uatomic.h>
16 #include <urcu/jhash.h>
17 #include <urcu/compiler.h>
20 #include <urcu/rculfhash.h>
23 * Maximum number of hash table buckets: 256M on 64-bit.
24 * Should take about 512MB max if we assume 1 node per 4 buckets.
26 #define MAX_HT_BUCKETS ((256 << 10) / sizeof(void *))
29 #define NODE_STOLEN (1 << 0)
34 struct rcu_ht_node
*next
;
42 struct rcu_ht_node
*tbl
[0];
46 struct rcu_table
*t
; /* shared */
48 void (*free_fct
)(void *data
); /* fct to free data */
51 pthread_mutex_t resize_mutex
; /* resize mutex: add/del mutex */
52 int resize_ongoing
; /* fast-path resize check */
55 struct rcu_ht
*ht_new(ht_hash_fct hash_fct
, void (*free_fct
)(void *data
),
56 unsigned long init_size
, uint32_t keylen
,
61 ht
= calloc(1, sizeof(struct rcu_ht
));
62 ht
->hash_fct
= hash_fct
;
63 ht
->free_fct
= free_fct
;
65 ht
->hashseed
= hashseed
;
66 /* this mutex should not nest in read-side C.S. */
67 pthread_mutex_init(&ht
->resize_mutex
, NULL
);
68 ht
->resize_ongoing
= 0; /* shared */
69 ht
->t
= calloc(1, sizeof(struct rcu_table
)
70 + (init_size
* sizeof(struct rcu_ht_node
*)));
71 ht
->t
->size
= init_size
;
75 void *ht_lookup(struct rcu_ht
*ht
, void *key
)
79 struct rcu_ht_node
*node
;
83 t
= rcu_dereference(ht
->t
);
84 smp_read_barrier_depends(); /* read t before size and table */
85 hash
= ht
->hash_fct(key
, ht
->keylen
, ht
->hashseed
) % t
->size
;
86 smp_read_barrier_depends(); /* read size before links */
87 node
= rcu_dereference(t
->tbl
[hash
]);
93 if (node
->key
== key
) {
97 node
= rcu_dereference(node
->next
);
105 * Will re-try until either:
106 * - The key is already there (-EEXIST)
107 * - We successfully add the key at the head of a table bucket.
109 int ht_add(struct rcu_ht
*ht
, void *key
, void *data
)
111 struct rcu_ht_node
*node
, *old_head
, *new_head
;
116 new_head
= calloc(1, sizeof(struct rcu_ht_node
));
118 new_head
->data
= data
;
120 /* here comes the fun and tricky part.
121 * Add at the beginning with a cmpxchg.
122 * Hold a read lock between the moment the first element is read
123 * and the nodes traversal (to find duplicates). This ensures
124 * the head pointer has not been reclaimed when cmpxchg is done.
125 * Always adding at the head ensures that we would have to
126 * re-try if a new item has been added concurrently. So we ensure that
127 * we never add duplicates. */
131 if (unlikely(LOAD_SHARED(ht
->resize_ongoing
))) {
134 * Wait for resize to complete before continuing.
136 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
138 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
143 t
= rcu_dereference(ht
->t
);
144 /* no read barrier needed, because no concurrency with resize */
145 hash
= ht
->hash_fct(key
, ht
->keylen
, ht
->hashseed
) % t
->size
;
147 old_head
= node
= rcu_dereference(t
->tbl
[hash
]);
152 if (node
->key
== key
) {
156 node
= rcu_dereference(node
->next
);
158 new_head
->next
= old_head
;
159 if (rcu_cmpxchg_pointer(&t
->tbl
[hash
], old_head
, new_head
) != old_head
)
165 /* restart loop, release and re-take the read lock to be kind to GP */
172 * Restart until we successfully remove the entry, or no entry is left
173 * ((void *)(unsigned long)-ENOENT).
174 * Deal with concurrent stealers by doing an extra verification pass to check
175 * that no element in the list are still pointing to the element stolen.
176 * This could happen if two concurrent steal for consecutive objects are
177 * executed. A pointer to an object being stolen could be saved by the
178 * concurrent stealer for the previous object.
179 * Also, given that in this precise scenario, another stealer can also want to
180 * delete the doubly-referenced object; use a "stolen" flag to let only one
181 * stealer delete the object.
183 void *ht_steal(struct rcu_ht
*ht
, void *key
)
185 struct rcu_ht_node
**prev
, *node
, *del_node
= NULL
;
194 if (unlikely(LOAD_SHARED(ht
->resize_ongoing
))) {
197 * Wait for resize to complete before continuing.
199 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
201 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
206 t
= rcu_dereference(ht
->t
);
207 /* no read barrier needed, because no concurrency with resize */
208 hash
= ht
->hash_fct(key
, ht
->keylen
, ht
->hashseed
) % t
->size
;
210 prev
= &t
->tbl
[hash
];
211 node
= rcu_dereference(*prev
);
220 if (node
->key
== key
) {
224 node
= rcu_dereference(*prev
);
229 * Another concurrent thread stole it ? If so, let it deal with
230 * this. Assume NODE_STOLEN is the only flag. If this changes,
231 * read flags before cmpxchg.
233 if (cmpxchg(&node
->flags
, 0, NODE_STOLEN
) != 0)
237 /* Found it ! pointer to object is in "prev" */
238 if (rcu_cmpxchg_pointer(prev
, node
, node
->next
) == node
)
244 * From that point, we own node. Note that there can still be concurrent
245 * RCU readers using it. We can free it outside of read lock after a GP.
249 data
= del_node
->data
;
250 call_rcu(free
, del_node
);
254 data
= (void *)(unsigned long)-ENOENT
;
258 /* restart loop, release and re-take the read lock to be kind to GP */
264 int ht_delete(struct rcu_ht
*ht
, void *key
)
268 data
= ht_steal(ht
, key
);
269 if (data
&& data
!= (void *)(unsigned long)-ENOENT
) {
271 call_rcu(ht
->free_fct
, data
);
278 /* Delete all old elements. Allow concurrent writer accesses. */
279 int ht_delete_all(struct rcu_ht
*ht
)
282 struct rcu_ht_node
**prev
, *node
, *inext
;
288 * Mutual exclusion with resize operations, but leave add/steal execute
289 * concurrently. This is OK because we operate only on the heads.
291 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
294 t
= rcu_dereference(ht
->t
);
295 /* no read barrier needed, because no concurrency with resize */
296 for (i
= 0; i
< t
->size
; i
++) {
300 * Cut the head. After that, we own the first element.
302 node
= rcu_xchg_pointer(prev
, NULL
);
308 * We manage a list shared with concurrent writers and readers.
309 * Note that a concurrent add may or may not be deleted by us,
310 * depending if it arrives before or after the head is cut.
311 * "node" points to our first node. Remove first elements
318 inext
= rcu_xchg_pointer(prev
, NULL
);
320 * "node" is the first element of the list we have cut.
321 * We therefore own it, no concurrent writer may delete
322 * it. There can only be concurrent lookups. Concurrent
323 * add can only be done on a bucket head, but we've cut
324 * it already. inext is also owned by us, because we
325 * have exchanged it for "NULL". It will therefore be
326 * safe to use it after a G.P.
330 call_rcu(ht
->free_fct
, node
->data
);
331 call_rcu(free
, node
);
340 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
346 * Should only be called when no more concurrent readers nor writers can
347 * possibly access the table.
349 int ht_destroy(struct rcu_ht
*ht
)
353 ret
= ht_delete_all(ht
);
359 static void ht_resize_grow(struct rcu_ht
*ht
)
361 unsigned long i
, new_size
, old_size
;
362 struct rcu_table
*new_t
, *old_t
;
363 struct rcu_ht_node
*node
, *new_node
, *tmp
;
367 old_size
= old_t
->size
;
369 if (old_size
== MAX_HT_BUCKETS
)
372 new_size
= old_size
<< 1;
373 new_t
= calloc(1, sizeof(struct rcu_table
)
374 + (new_size
* sizeof(struct rcu_ht_node
*)));
375 new_t
->size
= new_size
;
377 for (i
= 0; i
< old_size
; i
++) {
379 * Re-hash each entry, insert in new table.
380 * It's important that a reader looking for a key _will_ find it
381 * if it's in the table.
382 * Copy each node. (just the node, not ->data)
384 node
= old_t
->tbl
[i
];
386 hash
= ht
->hash_fct(node
->key
, ht
->keylen
, ht
->hashseed
)
388 new_node
= malloc(sizeof(struct rcu_ht_node
));
389 new_node
->key
= node
->key
;
390 new_node
->data
= node
->data
;
391 new_node
->flags
= node
->flags
;
392 new_node
->next
= new_t
->tbl
[hash
]; /* link to first */
393 new_t
->tbl
[hash
] = new_node
; /* add to head */
398 /* Changing table and size atomically wrt lookups */
399 rcu_assign_pointer(ht
->t
, new_t
);
401 /* Ensure all concurrent lookups use new size and table */
404 for (i
= 0; i
< old_size
; i
++) {
405 node
= old_t
->tbl
[i
];
415 static void ht_resize_shrink(struct rcu_ht
*ht
)
417 unsigned long i
, new_size
;
418 struct rcu_table
*new_t
, *old_t
;
419 struct rcu_ht_node
**prev
, *node
;
422 if (old_t
->size
== 1)
425 new_size
= old_t
->size
>> 1;
427 for (i
= 0; i
< new_size
; i
++) {
428 /* Link end with first entry of i + new_size */
429 prev
= &old_t
->tbl
[i
];
435 *prev
= old_t
->tbl
[i
+ new_size
];
437 smp_wmb(); /* write links before changing size */
438 STORE_SHARED(old_t
->size
, new_size
);
440 /* Ensure all concurrent lookups use new size */
443 new_t
= realloc(old_t
, sizeof(struct rcu_table
)
444 + (new_size
* sizeof(struct rcu_ht_node
*)));
445 /* shrinking, pointers should not move */
446 assert(new_t
== old_t
);
450 * growth: >0: *2, <0: /2
452 void ht_resize(struct rcu_ht
*ht
, int growth
)
456 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
458 STORE_SHARED(ht
->resize_ongoing
, 1);
460 /* All add/remove are waiting on the mutex. */
464 ht_resize_shrink(ht
);
466 STORE_SHARED(ht
->resize_ongoing
, 0);
467 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
472 * Expects keys <= than pointer size to be encoded in the pointer itself.
474 uint32_t ht_jhash(void *key
, uint32_t length
, uint32_t initval
)
479 if (length
<= sizeof(void *))
483 ret
= jhash(vkey
, length
, initval
);