rculfhash: add missing clear flag in gc
[urcu.git] / rculfhash.c
1 /*
2 * rculfhash.c
3 *
4 * Userspace RCU library - Lock-Free Expandable RCU Hash Table
5 *
6 * Copyright 2010-2011 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #define _LGPL_SOURCE
24 #include <stdlib.h>
25 #include <errno.h>
26 #include <assert.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <string.h>
30
31 #include <urcu.h>
32 #include <urcu-call-rcu.h>
33 #include <urcu/arch.h>
34 #include <urcu/uatomic.h>
35 #include <urcu/jhash.h>
36 #include <urcu/compiler.h>
37 #include <urcu/rculfhash.h>
38 #include <stdio.h>
39 #include <pthread.h>
40
41 #define DEBUG /* Test */
42
43 #ifdef DEBUG
44 #define dbg_printf(args...) printf(args)
45 #else
46 #define dbg_printf(args...)
47 #endif
48
49 #define CHAIN_LEN_TARGET 1
50 #define CHAIN_LEN_RESIZE_THRESHOLD 2
51
52 #ifndef max
53 #define max(a, b) ((a) > (b) ? (a) : (b))
54 #endif
55
56 /*
57 * The removed flag needs to be updated atomically with the pointer.
58 * The dummy flag does not require to be updated atomically with the
59 * pointer, but it is added as a pointer low bit flag to save space.
60 */
61 #define REMOVED_FLAG (1UL << 0)
62 #define DUMMY_FLAG (1UL << 1)
63 #define FLAGS_MASK ((1UL << 2) - 1)
64
65 struct rcu_table {
66 unsigned long size; /* always a power of 2 */
67 unsigned long resize_target;
68 int resize_initiated;
69 struct rcu_head head;
70 struct rcu_ht_node *tbl[0];
71 };
72
73 struct rcu_ht {
74 struct rcu_table *t; /* shared */
75 ht_hash_fct hash_fct;
76 ht_compare_fct compare_fct;
77 unsigned long hash_seed;
78 pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */
79 unsigned int in_progress_resize;
80 void (*ht_call_rcu)(struct rcu_head *head,
81 void (*func)(struct rcu_head *head));
82 };
83
84 struct rcu_resize_work {
85 struct rcu_head head;
86 struct rcu_ht *ht;
87 };
88
89 /*
90 * Algorithm to reverse bits in a word by lookup table, extended to
91 * 64-bit words.
92 * Source:
93 * http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
94 * Originally from Public Domain.
95 */
96
97 static const uint8_t BitReverseTable256[256] =
98 {
99 #define R2(n) (n), (n) + 2*64, (n) + 1*64, (n) + 3*64
100 #define R4(n) R2(n), R2((n) + 2*16), R2((n) + 1*16), R2((n) + 3*16)
101 #define R6(n) R4(n), R4((n) + 2*4 ), R4((n) + 1*4 ), R4((n) + 3*4 )
102 R6(0), R6(2), R6(1), R6(3)
103 };
104 #undef R2
105 #undef R4
106 #undef R6
107
108 static
109 uint8_t bit_reverse_u8(uint8_t v)
110 {
111 return BitReverseTable256[v];
112 }
113
114 static __attribute__((unused))
115 uint32_t bit_reverse_u32(uint32_t v)
116 {
117 return ((uint32_t) bit_reverse_u8(v) << 24) |
118 ((uint32_t) bit_reverse_u8(v >> 8) << 16) |
119 ((uint32_t) bit_reverse_u8(v >> 16) << 8) |
120 ((uint32_t) bit_reverse_u8(v >> 24));
121 }
122
123 static __attribute__((unused))
124 uint64_t bit_reverse_u64(uint64_t v)
125 {
126 return ((uint64_t) bit_reverse_u8(v) << 56) |
127 ((uint64_t) bit_reverse_u8(v >> 8) << 48) |
128 ((uint64_t) bit_reverse_u8(v >> 16) << 40) |
129 ((uint64_t) bit_reverse_u8(v >> 24) << 32) |
130 ((uint64_t) bit_reverse_u8(v >> 32) << 24) |
131 ((uint64_t) bit_reverse_u8(v >> 40) << 16) |
132 ((uint64_t) bit_reverse_u8(v >> 48) << 8) |
133 ((uint64_t) bit_reverse_u8(v >> 56));
134 }
135
136 static
137 unsigned long bit_reverse_ulong(unsigned long v)
138 {
139 #if (CAA_BITS_PER_LONG == 32)
140 return bit_reverse_u32(v);
141 #else
142 return bit_reverse_u64(v);
143 #endif
144 }
145
146 /*
147 * Algorithm to find the log2 of a 32-bit unsigned integer.
148 * source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup
149 * Originally from Public Domain.
150 */
151 static const char LogTable256[256] =
152 {
153 #define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
154 -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
155 LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6),
156 LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7)
157 };
158
159 uint32_t log2_u32(uint32_t v)
160 {
161 uint32_t t, tt;
162
163 if ((tt = (v >> 16)))
164 return (t = (tt >> 8))
165 ? 24 + LogTable256[t]
166 : 16 + LogTable256[tt];
167 else
168 return (t = (v >> 8))
169 ? 8 + LogTable256[t]
170 : LogTable256[v];
171 }
172
173 static
174 void ht_resize_lazy(struct rcu_ht *ht, struct rcu_table *t, int growth);
175
176 static
177 void check_resize(struct rcu_ht *ht, struct rcu_table *t,
178 uint32_t chain_len)
179 {
180 if (chain_len >= CHAIN_LEN_RESIZE_THRESHOLD)
181 ht_resize_lazy(ht, t,
182 log2_u32(chain_len - CHAIN_LEN_TARGET - 1));
183 }
184
185 static
186 struct rcu_ht_node *clear_flag(struct rcu_ht_node *node)
187 {
188 return (struct rcu_ht_node *) (((unsigned long) node) & ~FLAGS_MASK);
189 }
190
191 static
192 int is_removed(struct rcu_ht_node *node)
193 {
194 return ((unsigned long) node) & REMOVED_FLAG;
195 }
196
197 static
198 struct rcu_ht_node *flag_removed(struct rcu_ht_node *node)
199 {
200 return (struct rcu_ht_node *) (((unsigned long) node) | REMOVED_FLAG);
201 }
202
203 static
204 int is_dummy(struct rcu_ht_node *node)
205 {
206 return ((unsigned long) node) & DUMMY_FLAG;
207 }
208
209 static
210 struct rcu_ht_node *flag_dummy(struct rcu_ht_node *node)
211 {
212 return (struct rcu_ht_node *) (((unsigned long) node) | DUMMY_FLAG);
213 }
214
215 static
216 unsigned long _uatomic_max(unsigned long *ptr, unsigned long v)
217 {
218 unsigned long old1, old2;
219
220 old1 = uatomic_read(ptr);
221 do {
222 old2 = old1;
223 if (old2 >= v)
224 return old2;
225 } while ((old1 = uatomic_cmpxchg(ptr, old2, v)) != old2);
226 return v;
227 }
228
229 /*
230 * Remove all logically deleted nodes from a bucket up to a certain node key.
231 */
232 static
233 void _ht_gc_bucket(struct rcu_ht_node *dummy, struct rcu_ht_node *node)
234 {
235 struct rcu_ht_node *iter_prev, *iter, *next, *new_next;
236
237 for (;;) {
238 iter_prev = dummy;
239 /* We can always skip the dummy node initially */
240 iter = rcu_dereference(iter_prev->p.next);
241 assert(iter_prev->p.reverse_hash <= node->p.reverse_hash);
242 for (;;) {
243 if (unlikely(!clear_flag(iter)))
244 return;
245 if (clear_flag(iter)->p.reverse_hash > node->p.reverse_hash)
246 return;
247 next = rcu_dereference(clear_flag(iter)->p.next);
248 if (is_removed(next))
249 break;
250 iter_prev = clear_flag(iter);
251 iter = next;
252 }
253 assert(!is_removed(iter));
254 if (is_dummy(iter))
255 new_next = flag_dummy(clear_flag(next));
256 else
257 new_next = clear_flag(next);
258 (void) uatomic_cmpxchg(&iter_prev->p.next, iter, new_next);
259 }
260 }
261
262 static
263 struct rcu_ht_node *_ht_add(struct rcu_ht *ht, struct rcu_table *t,
264 struct rcu_ht_node *node, int unique, int dummy)
265 {
266 struct rcu_ht_node *iter_prev, *iter, *next, *new_node, *new_next,
267 *dummy_node;
268 unsigned long hash;
269
270 if (!t->size) {
271 assert(dummy);
272 node->p.next = flag_dummy(NULL);
273 return node; /* Initial first add (head) */
274 }
275 hash = bit_reverse_ulong(node->p.reverse_hash);
276 for (;;) {
277 uint32_t chain_len = 0;
278
279 /*
280 * iter_prev points to the non-removed node prior to the
281 * insert location.
282 */
283 iter_prev = rcu_dereference(t->tbl[hash & (t->size - 1)]);
284 /* We can always skip the dummy node initially */
285 iter = rcu_dereference(iter_prev->p.next);
286 assert(iter_prev->p.reverse_hash <= node->p.reverse_hash);
287 for (;;) {
288 if (unlikely(!clear_flag(iter)))
289 goto insert;
290 if (clear_flag(iter)->p.reverse_hash > node->p.reverse_hash)
291 goto insert;
292 next = rcu_dereference(clear_flag(iter)->p.next);
293 if (is_removed(next))
294 goto gc_node;
295 if (unique
296 && !is_dummy(next)
297 && !ht->compare_fct(node->key, node->key_len,
298 clear_flag(iter)->key,
299 clear_flag(iter)->key_len))
300 return clear_flag(iter);
301 /* Only account for identical reverse hash once */
302 if (iter_prev->p.reverse_hash != clear_flag(iter)->p.reverse_hash)
303 check_resize(ht, t, ++chain_len);
304 iter_prev = clear_flag(iter);
305 iter = next;
306 }
307 insert:
308 assert(node != clear_flag(iter));
309 assert(!is_removed(iter_prev));
310 assert(iter_prev != node);
311 if (!dummy)
312 node->p.next = clear_flag(iter);
313 else
314 node->p.next = flag_dummy(clear_flag(iter));
315 if (is_dummy(iter))
316 new_node = flag_dummy(node);
317 else
318 new_node = node;
319 if (uatomic_cmpxchg(&iter_prev->p.next, iter,
320 new_node) != iter)
321 continue; /* retry */
322 else
323 goto gc_end;
324 gc_node:
325 assert(!is_removed(iter));
326 if (is_dummy(iter))
327 new_next = flag_dummy(clear_flag(next));
328 else
329 new_next = clear_flag(next);
330 (void) uatomic_cmpxchg(&iter_prev->p.next, iter, new_next);
331 /* retry */
332 }
333 gc_end:
334 /* Garbage collect logically removed nodes in the bucket */
335 dummy_node = rcu_dereference(t->tbl[hash & (t->size - 1)]);
336 _ht_gc_bucket(dummy_node, node);
337 return node;
338 }
339
340 static
341 int _ht_remove(struct rcu_ht *ht, struct rcu_table *t, struct rcu_ht_node *node)
342 {
343 struct rcu_ht_node *dummy, *next, *old;
344 int flagged = 0;
345 unsigned long hash;
346
347 /* logically delete the node */
348 old = rcu_dereference(node->p.next);
349 do {
350 next = old;
351 if (is_removed(next))
352 goto end;
353 assert(!is_dummy(next));
354 old = uatomic_cmpxchg(&node->p.next, next,
355 flag_removed(next));
356 } while (old != next);
357
358 /* We performed the (logical) deletion. */
359 flagged = 1;
360
361 /*
362 * Ensure that the node is not visible to readers anymore: lookup for
363 * the node, and remove it (along with any other logically removed node)
364 * if found.
365 */
366 hash = bit_reverse_ulong(node->p.reverse_hash);
367 dummy = rcu_dereference(t->tbl[hash & (t->size - 1)]);
368 _ht_gc_bucket(dummy, node);
369 end:
370 /*
371 * Only the flagging action indicated that we (and no other)
372 * removed the node from the hash.
373 */
374 if (flagged) {
375 assert(is_removed(rcu_dereference(node->p.next)));
376 return 0;
377 } else
378 return -ENOENT;
379 }
380
381 static
382 void init_table(struct rcu_ht *ht, struct rcu_table *t,
383 unsigned long first, unsigned long len)
384 {
385 unsigned long i, end;
386
387 end = first + len;
388 for (i = first; i < end; i++) {
389 /* Update table size when power of two */
390 if (i != 0 && !(i & (i - 1)))
391 t->size = i;
392 t->tbl[i] = calloc(1, sizeof(struct _rcu_ht_node));
393 t->tbl[i]->p.reverse_hash = bit_reverse_ulong(i);
394 (void) _ht_add(ht, t, t->tbl[i], 0, 1);
395 }
396 t->resize_target = t->size = end;
397 t->resize_initiated = 0;
398 }
399
400 struct rcu_ht *ht_new(ht_hash_fct hash_fct,
401 ht_compare_fct compare_fct,
402 unsigned long hash_seed,
403 unsigned long init_size,
404 void (*ht_call_rcu)(struct rcu_head *head,
405 void (*func)(struct rcu_head *head)))
406 {
407 struct rcu_ht *ht;
408
409 ht = calloc(1, sizeof(struct rcu_ht));
410 ht->hash_fct = hash_fct;
411 ht->compare_fct = compare_fct;
412 ht->hash_seed = hash_seed;
413 ht->ht_call_rcu = ht_call_rcu;
414 ht->in_progress_resize = 0;
415 /* this mutex should not nest in read-side C.S. */
416 pthread_mutex_init(&ht->resize_mutex, NULL);
417 ht->t = calloc(1, sizeof(struct rcu_table)
418 + (max(init_size, 1) * sizeof(struct rcu_ht_node *)));
419 ht->t->size = 0;
420 pthread_mutex_lock(&ht->resize_mutex);
421 init_table(ht, ht->t, 0, max(init_size, 1));
422 pthread_mutex_unlock(&ht->resize_mutex);
423 return ht;
424 }
425
426 struct rcu_ht_node *ht_lookup(struct rcu_ht *ht, void *key, size_t key_len)
427 {
428 struct rcu_table *t;
429 struct rcu_ht_node *node, *next;
430 unsigned long hash, reverse_hash;
431
432 hash = ht->hash_fct(key, key_len, ht->hash_seed);
433 reverse_hash = bit_reverse_ulong(hash);
434
435 t = rcu_dereference(ht->t);
436 node = rcu_dereference(t->tbl[hash & (t->size - 1)]);
437 for (;;) {
438 if (unlikely(!node))
439 break;
440 if (unlikely(node->p.reverse_hash > reverse_hash)) {
441 node = NULL;
442 break;
443 }
444 next = rcu_dereference(node->p.next);
445 if (likely(!is_removed(next))
446 && !is_dummy(next)
447 && likely(!ht->compare_fct(node->key, node->key_len, key, key_len))) {
448 break;
449 }
450 node = clear_flag(next);
451 }
452 assert(!node || !is_dummy(rcu_dereference(node->p.next)));
453 return node;
454 }
455
456 void ht_add(struct rcu_ht *ht, struct rcu_ht_node *node)
457 {
458 struct rcu_table *t;
459 unsigned long hash;
460
461 hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed);
462 node->p.reverse_hash = bit_reverse_ulong((unsigned long) hash);
463
464 t = rcu_dereference(ht->t);
465 (void) _ht_add(ht, t, node, 0, 0);
466 }
467
468 struct rcu_ht_node *ht_add_unique(struct rcu_ht *ht, struct rcu_ht_node *node)
469 {
470 struct rcu_table *t;
471 unsigned long hash;
472
473 hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed);
474 node->p.reverse_hash = bit_reverse_ulong((unsigned long) hash);
475
476 t = rcu_dereference(ht->t);
477 return _ht_add(ht, t, node, 1, 0);
478 }
479
480 int ht_remove(struct rcu_ht *ht, struct rcu_ht_node *node)
481 {
482 struct rcu_table *t;
483
484 t = rcu_dereference(ht->t);
485 return _ht_remove(ht, t, node);
486 }
487
488 static
489 int ht_delete_dummy(struct rcu_ht *ht)
490 {
491 struct rcu_table *t;
492 struct rcu_ht_node *node;
493 unsigned long i;
494
495 t = ht->t;
496 /* Check that the table is empty */
497 node = t->tbl[0];
498 do {
499 node = clear_flag(node)->p.next;
500 if (!is_dummy(node))
501 return -EPERM;
502 assert(!is_removed(node));
503 } while (clear_flag(node));
504 /* Internal sanity check: all nodes left should be dummy */
505 for (i = 0; i < t->size; i++) {
506 assert(is_dummy(t->tbl[i]->p.next));
507 free(t->tbl[i]);
508 }
509 return 0;
510 }
511
512 /*
513 * Should only be called when no more concurrent readers nor writers can
514 * possibly access the table.
515 */
516 int ht_destroy(struct rcu_ht *ht)
517 {
518 int ret;
519
520 /* Wait for in-flight resize operations to complete */
521 while (uatomic_read(&ht->in_progress_resize))
522 poll(NULL, 0, 100); /* wait for 100ms */
523 ret = ht_delete_dummy(ht);
524 if (ret)
525 return ret;
526 free(ht->t);
527 free(ht);
528 return ret;
529 }
530
531 void ht_count_nodes(struct rcu_ht *ht,
532 unsigned long *count,
533 unsigned long *removed)
534 {
535 struct rcu_table *t;
536 struct rcu_ht_node *node, *next;
537
538 *count = 0;
539 *removed = 0;
540
541 t = rcu_dereference(ht->t);
542 /* Check that the table is empty */
543 node = rcu_dereference(t->tbl[0]);
544 do {
545 next = rcu_dereference(node->p.next);
546 if (is_removed(next)) {
547 assert(!is_dummy(next));
548 (*removed)++;
549 } else if (!is_dummy(next))
550 (*count)++;
551 node = clear_flag(next);
552 } while (node);
553 }
554
555 static
556 void ht_free_table_cb(struct rcu_head *head)
557 {
558 struct rcu_table *t =
559 caa_container_of(head, struct rcu_table, head);
560 free(t);
561 }
562
563 /* called with resize mutex held */
564 static
565 void _do_ht_resize(struct rcu_ht *ht)
566 {
567 unsigned long new_size, old_size;
568 struct rcu_table *new_t, *old_t;
569
570 old_t = ht->t;
571 old_size = old_t->size;
572
573 new_size = CMM_LOAD_SHARED(old_t->resize_target);
574 dbg_printf("rculfhash: resize from %lu to %lu buckets\n",
575 old_size, new_size);
576 if (old_size == new_size)
577 return;
578 new_t = malloc(sizeof(struct rcu_table)
579 + (new_size * sizeof(struct rcu_ht_node *)));
580 assert(new_size > old_size);
581 memcpy(&new_t->tbl, &old_t->tbl,
582 old_size * sizeof(struct rcu_ht_node *));
583 init_table(ht, new_t, old_size, new_size - old_size);
584 /* Changing table and size atomically wrt lookups */
585 rcu_assign_pointer(ht->t, new_t);
586 ht->ht_call_rcu(&old_t->head, ht_free_table_cb);
587 }
588
589 static
590 unsigned long resize_target_update(struct rcu_table *t,
591 int growth_order)
592 {
593 return _uatomic_max(&t->resize_target,
594 t->size << growth_order);
595 }
596
597 void ht_resize(struct rcu_ht *ht, int growth)
598 {
599 struct rcu_table *t = rcu_dereference(ht->t);
600 unsigned long target_size;
601
602 target_size = resize_target_update(t, growth);
603 if (t->size < target_size) {
604 CMM_STORE_SHARED(t->resize_initiated, 1);
605 pthread_mutex_lock(&ht->resize_mutex);
606 _do_ht_resize(ht);
607 pthread_mutex_unlock(&ht->resize_mutex);
608 }
609 }
610
611 static
612 void do_resize_cb(struct rcu_head *head)
613 {
614 struct rcu_resize_work *work =
615 caa_container_of(head, struct rcu_resize_work, head);
616 struct rcu_ht *ht = work->ht;
617
618 pthread_mutex_lock(&ht->resize_mutex);
619 _do_ht_resize(ht);
620 pthread_mutex_unlock(&ht->resize_mutex);
621 free(work);
622 cmm_smp_mb(); /* finish resize before decrement */
623 uatomic_dec(&ht->in_progress_resize);
624 }
625
626 static
627 void ht_resize_lazy(struct rcu_ht *ht, struct rcu_table *t, int growth)
628 {
629 struct rcu_resize_work *work;
630 unsigned long target_size;
631
632 target_size = resize_target_update(t, growth);
633 if (!CMM_LOAD_SHARED(t->resize_initiated) && t->size < target_size) {
634 uatomic_inc(&ht->in_progress_resize);
635 cmm_smp_mb(); /* increment resize count before calling it */
636 work = malloc(sizeof(*work));
637 work->ht = ht;
638 ht->ht_call_rcu(&work->head, do_resize_cb);
639 CMM_STORE_SHARED(t->resize_initiated, 1);
640 }
641 }
This page took 0.0419389999999999 seconds and 5 git commands to generate.