4 * Userspace RCU library - RCU Judy Array Range Support
6 * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include <urcu/rcuja.h>
31 #include <urcu/compiler.h>
32 #include <urcu/arch.h>
33 #include <urcu-pointer.h>
34 #include <urcu/uatomic.h>
35 #include <urcu/rcuja-range.h>
36 #include <urcu-flavor.h>
38 #include "rcuja-internal.h"
41 * Discussion about order of lookup/lock vs allocated node deletion.
43 * - If node deletion returns before call to
44 * cds_ja_range_lookup(), the node will not be found by lookup.
45 * - If node deletion is called after cds_ja_range_lock() returns a
46 * non-NULL range, the deletion will wait until the lock is released
47 * before it takes place.
48 * - If node deletion call/return overlaps with the call to
49 * cds_ja_range_lookup() and return from cds_ja_range_lock(), the node
50 * may or may not be found by each of cds_ja_range_lookup() and
51 * cds_ja_range_lock().
55 * Discussion about order of lookup/lock vs allocated node add. Assuming
56 * no concurrent delete.
58 * - If node add returns before call to
59 * cds_ja_range_lookup(), the node will be found by lookup.
60 * - If node add is called after cds_ja_range_lookup returns, the node
61 * will not be found by lookup.
62 * - If node add call/return overlaps with the call to and return from
63 * cds_ja_range_lookup(), the node may or may not be found.
64 * - If node add call/return overlaps with call to cds_ja_range_lookup()
65 * and return from cds_ja_range_lock(), in the specific case where
66 * cds_ja_range_lookup() _does_ succeed, then cds_ja_range_lock() will
67 * succeed (still assuming no concurrent deletion).
71 * Discussion: concurrent deletion of contiguous allocated ranges.
73 * Ensuring that merge of contiguous free ranges is always performed, we
74 * need to ensure locking of concurrent removal of contiguous allocated
75 * ranges one with respect to another. This is done by locking the
76 * ranges prior to and after the range to remove, even if that range is
77 * allocated. This serializes removal of contiguous ranges. The only
78 * cases for which there is no range to lock is when removing an
79 * allocated range starting at 0, and/or ending at the end of the key
84 * Discussion: concurrent lookup vs add
86 * When executed concurrently with node add, the inequality
87 * lookup can see no node for the looked-up range, because a range can
88 * be shrinked. This can happen if, for instance, we lookup key 2
89 * between addition of a "free" range for values [1,2], and removal of
90 * the old "free" range for values [0,2]. We would then fail to observe
91 * any range for key 2. Given that the lookup is performed during a
92 * range transition, we can safely return that there is no allocated
97 * Discussion: concurrent lookup vs del
99 * There is no special case for lookups performed concurrently with node
100 * del, because node del either replaces the node with the exact same
101 * start key (see duplicates guarantees), or replaces it with a larger
102 * range containing the prior range. Therefore, we are sure that
103 * inequality lookups will see the larger range before the old range is
104 * deleted, in whichever direction the lookup is performed.
108 * Discussion of the type state transitions.
110 * State transitions of "type" always go from either:
112 * CDS_JA_RANGE_FREE -> CDS_JA_RANGE_REMOVED
114 * CDS_JA_RANGE_ALLOCATED -> CDS_JA_RANGE_REMOVED
116 * A range type never changes otherwise.
119 //#define RANGE_DEBUG
124 #define dbg_printf(fmt, args...) \
125 fprintf(stderr, "[debug rcuja-range %lu %s()@%s:%u] " fmt, \
126 (unsigned long) gettid(), __func__, \
127 __FILE__, __LINE__, ## args)
129 #define dbg_printf(fmt, args...) \
131 /* do nothing but check printf format */ \
133 fprintf(stderr, "[debug rcuja-range %lu %s()@%s:%u] " fmt, \
134 (unsigned long) gettid(), __func__, \
135 __FILE__, __LINE__, ## args); \
139 #define CDS_JA_RANGE_KEY_BITS 64
141 enum cds_ja_range_type
{
142 CDS_JA_RANGE_ALLOCATED
,
144 CDS_JA_RANGE_REMOVED
,
148 * Range goes from start (inclusive) to end (inclusive).
149 * Range start is used as node key in the Judy array.
151 struct cds_ja_range
{
153 struct cds_ja_node ja_node
;
154 pthread_mutex_t lock
;
156 enum cds_ja_range_type type
;
158 /* not required on lookup fast-path */
160 struct rcu_head head
;
163 struct cds_ja_range
*cds_ja_range_lookup(struct cds_ja
*ja
, uint64_t key
)
165 struct cds_ja_node
*node
, *last_node
;
166 struct cds_ja_range
*range
;
168 dbg_printf("key: %" PRIu64
"\n", key
);
169 node
= cds_ja_lookup_below_equal(ja
, key
, NULL
);
173 * Get the last of duplicate chain. Adding a node to Judy array
174 * duplicates inserts them at the end of the chain.
176 cds_ja_for_each_duplicate_rcu(node
)
178 range
= caa_container_of(last_node
, struct cds_ja_range
, ja_node
);
180 /* Check if range is currently hidden by concurrent add */
181 if (range
->end
< key
)
185 * If last node in the duplicates is removed or free, we can
186 * consider that either a removal or add operation is in
187 * progress, or removal is the last completed operation to
188 * update this range. We can therefore consider that this area
191 if (range
->type
!= CDS_JA_RANGE_ALLOCATED
)
194 * We found an allocated range. We can return it for use with
195 * RCU read-side protection for existence. However, we have no
196 * mutual exclusion against removal at this point.
202 * Provide mutual exclusion against removal.
204 struct cds_ja_range
*cds_ja_range_lock(struct cds_ja_range
*range
)
206 pthread_mutex_lock(&range
->lock
);
208 if (range
->type
== CDS_JA_RANGE_REMOVED
)
213 pthread_mutex_unlock(&range
->lock
);
217 void cds_ja_range_unlock(struct cds_ja_range
*range
)
219 pthread_mutex_unlock(&range
->lock
);
223 struct cds_ja_range
*range_create(
224 uint64_t start
, /* inclusive */
225 uint64_t end
, /* inclusive */
227 enum cds_ja_range_type type
)
229 struct cds_ja_range
*range
;
231 range
= calloc(sizeof(*range
), 1);
234 range
->start
= start
;
238 pthread_mutex_init(&range
->lock
, NULL
);
243 void free_range_cb(struct rcu_head
*head
)
245 struct cds_ja_range
*range
=
246 caa_container_of(head
, struct cds_ja_range
, head
);
251 void free_range(struct cds_ja_range
*range
)
257 void rcu_free_range(struct cds_ja
*ja
, struct cds_ja_range
*range
)
259 cds_lfht_rcu_flavor(ja
->ht
)->update_call_rcu(&range
->head
,
263 int cds_ja_range_add(struct cds_ja
*ja
,
264 uint64_t start
, /* inclusive */
265 uint64_t end
, /* inclusive */
268 struct cds_ja_node
*old_node
;
269 struct cds_ja_range
*old_range
, *new_range
, *ranges
[3];
270 unsigned int nr_ranges
, i
;
273 if (start
> end
|| end
== UINT64_MAX
)
277 dbg_printf("start: %" PRIu64
", end: %" PRIu64
", priv %p\n",
280 * Find if requested range is entirely contained within a single
283 old_node
= cds_ja_lookup_below_equal(ja
, start
, NULL
);
284 /* Range hidden by concurrent add */
288 old_range
= caa_container_of(old_node
, struct cds_ja_range
, ja_node
);
290 /* Range hidden by concurrent add */
291 if (old_range
->end
< start
)
294 /* We now know that old_range overlaps with our range */
295 switch (CMM_LOAD_SHARED(old_range
->type
)) {
296 case CDS_JA_RANGE_ALLOCATED
:
298 case CDS_JA_RANGE_FREE
:
300 case CDS_JA_RANGE_REMOVED
:
304 /* We do not fit entirely within the range */
305 if (old_range
->end
< end
)
308 pthread_mutex_lock(&old_range
->lock
);
310 if (old_range
->type
== CDS_JA_RANGE_REMOVED
) {
311 pthread_mutex_unlock(&old_range
->lock
);
315 /* Create replacement ranges: at most 2 free and 1 allocated */
316 if (start
== old_range
->start
) {
317 if (end
== old_range
->end
) {
319 ranges
[0] = new_range
= range_create(start
, end
,
320 priv
, CDS_JA_RANGE_ALLOCATED
);
324 assert(old_range
->end
> end
);
325 ranges
[0] = new_range
= range_create(start
, end
,
326 priv
, CDS_JA_RANGE_ALLOCATED
);
327 ranges
[1] = range_create(end
+ 1, old_range
->end
,
328 NULL
, CDS_JA_RANGE_FREE
);
332 if (end
== old_range
->end
) {
334 assert(old_range
->start
< start
);
335 ranges
[0] = range_create(old_range
->start
, start
- 1,
336 NULL
, CDS_JA_RANGE_FREE
);
337 ranges
[1] = new_range
= range_create(start
, end
,
338 priv
, CDS_JA_RANGE_ALLOCATED
);
342 assert(old_range
->start
< start
);
343 assert(old_range
->end
> end
);
344 ranges
[0] = range_create(old_range
->start
, start
- 1,
345 NULL
, CDS_JA_RANGE_FREE
);
346 ranges
[1] = new_range
= range_create(start
, end
,
347 priv
, CDS_JA_RANGE_ALLOCATED
);
348 ranges
[2] = range_create(end
+ 1, old_range
->end
,
349 NULL
, CDS_JA_RANGE_FREE
);
354 /* Add replacement ranges to Judy array */
355 for (i
= 0; i
< nr_ranges
; i
++) {
356 dbg_printf("ADD RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
357 ranges
[i
]->start
, ranges
[i
]->end
,
358 ranges
[i
]->type
== CDS_JA_RANGE_ALLOCATED
?
359 "allocated" : "free");
360 pthread_mutex_lock(&ranges
[i
]->lock
);
361 ret
= cds_ja_add(ja
, ranges
[i
]->start
, &ranges
[i
]->ja_node
);
366 * We add replacement ranges _before_ removing old ranges, so
367 * concurrent traversals will always see one or the other. This
368 * is OK because we temporarily have a duplicate key, and Judy
369 * arrays provide key existence guarantee for lookups performed
370 * concurrently with add followed by del of duplicate keys.
373 dbg_printf("REM RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
374 old_range
->start
, old_range
->end
,
375 old_range
->type
== CDS_JA_RANGE_ALLOCATED
?
376 "allocated" : "free");
377 /* Remove old free range */
378 ret
= cds_ja_del(ja
, old_range
->start
, &old_range
->ja_node
);
380 old_range
->type
= CDS_JA_RANGE_REMOVED
;
381 pthread_mutex_unlock(&old_range
->lock
);
382 for (i
= 0; i
< nr_ranges
; i
++)
383 pthread_mutex_unlock(&ranges
[i
]->lock
);
385 rcu_free_range(ja
, old_range
);
387 dbg_printf("<SUCCEED>\n");
392 int cds_ja_range_del(struct cds_ja
*ja
, struct cds_ja_range
*range
)
394 struct cds_ja_node
*prev_node
, *next_node
;
395 struct cds_ja_range
*new_range
;
396 struct cds_ja_range
*merge_ranges
[3], *lock_ranges
[3];
397 unsigned int nr_merge
, nr_lock
, i
;
402 dbg_printf("start: %" PRIu64
", end %" PRIu64
", priv: %p\n",
403 range
->start
, range
->end
, range
->priv
);
409 * Range has been concurrently updated.
411 if (range
->type
!= CDS_JA_RANGE_ALLOCATED
)
414 if (range
->start
> 0) {
415 struct cds_ja_range
*prev_range
;
417 prev_node
= cds_ja_lookup_below_equal(ja
, range
->start
- 1,
422 prev_range
= caa_container_of(prev_node
,
423 struct cds_ja_range
, ja_node
);
424 /* Prev range temporarily hidden due to concurrent add. */
425 if (prev_range
->end
!= range
->start
- 1)
428 lock_ranges
[nr_lock
++] = prev_range
;
429 if (prev_range
->type
!= CDS_JA_RANGE_ALLOCATED
)
430 merge_ranges
[nr_merge
++] = prev_range
;
433 lock_ranges
[nr_lock
++] = range
;
434 merge_ranges
[nr_merge
++] = range
;
436 if (range
->end
< UINT64_MAX
- 1) {
437 struct cds_ja_range
*next_range
;
439 next_node
= cds_ja_lookup_below_equal(ja
, range
->end
+ 1,
441 /* Next range temporarily hidden due to concurrent add. */
445 next_range
= caa_container_of(next_node
,
446 struct cds_ja_range
, ja_node
);
447 if (next_range
->start
!= range
->end
+ 1)
450 lock_ranges
[nr_lock
++] = next_range
;
451 if (next_range
->type
!= CDS_JA_RANGE_ALLOCATED
)
452 merge_ranges
[nr_merge
++] = next_range
;
455 /* Acquire locks in increasing key order for range merge */
456 for (i
= 0; i
< nr_lock
; i
++)
457 pthread_mutex_lock(&lock_ranges
[i
]->lock
);
458 if (range
->type
!= CDS_JA_RANGE_ALLOCATED
) {
462 /* Ensure they are valid */
463 for (i
= 0; i
< nr_lock
; i
++) {
464 if (lock_ranges
[i
]->type
== CDS_JA_RANGE_REMOVED
)
468 /* Create new free range */
469 start
= merge_ranges
[0]->start
;
470 end
= merge_ranges
[nr_merge
- 1]->end
;
471 new_range
= range_create(start
, end
, NULL
, CDS_JA_RANGE_FREE
);
472 pthread_mutex_lock(&new_range
->lock
);
474 dbg_printf("ADD RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
475 new_range
->start
, new_range
->end
,
476 new_range
->type
== CDS_JA_RANGE_ALLOCATED
?
477 "allocated" : "free");
479 ret
= cds_ja_add(ja
, start
, &new_range
->ja_node
);
482 /* Remove old ranges */
483 for (i
= 0; i
< nr_merge
; i
++) {
485 dbg_printf("REM RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
486 merge_ranges
[i
]->start
, merge_ranges
[i
]->end
,
487 merge_ranges
[i
]->type
== CDS_JA_RANGE_ALLOCATED
?
488 "allocated" : "free");
489 ret
= cds_ja_del(ja
, merge_ranges
[i
]->start
,
490 &merge_ranges
[i
]->ja_node
);
492 merge_ranges
[i
]->type
= CDS_JA_RANGE_REMOVED
;
494 for (i
= 0; i
< nr_lock
; i
++)
495 pthread_mutex_unlock(&lock_ranges
[i
]->lock
);
496 pthread_mutex_unlock(&new_range
->lock
);
497 /* Free old merged ranges */
498 for (i
= 0; i
< nr_merge
; i
++)
499 rcu_free_range(ja
, merge_ranges
[i
]);
501 dbg_printf("<SUCCEED>\n");
507 for (i
= 0; i
< nr_lock
; i
++)
508 pthread_mutex_unlock(&lock_ranges
[i
]->lock
);
512 for (i
= 0; i
< nr_lock
; i
++)
513 pthread_mutex_unlock(&lock_ranges
[i
]->lock
);
517 struct cds_ja
*_cds_ja_range_new(const struct rcu_flavor_struct
*flavor
)
519 struct cds_ja_range
*range
;
523 ja
= _cds_ja_new(CDS_JA_RANGE_KEY_BITS
, flavor
);
526 range
= range_create(0, UINT64_MAX
- 1, NULL
, CDS_JA_RANGE_FREE
);
529 cds_lfht_rcu_flavor(ja
->ht
)->read_lock();
530 ret
= cds_ja_add(ja
, 0, &range
->ja_node
);
531 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
539 ret
= cds_ja_destroy(ja
);
544 int cds_ja_range_validate(struct cds_ja
*ja
)
546 uint64_t iter_key
, start
, end
, last_end
= UINT64_MAX
;
547 struct cds_ja_node
*ja_node
, *last_node
;
550 cds_lfht_rcu_flavor(ja
->ht
)->read_lock();
551 cds_ja_for_each_key_rcu(ja
, iter_key
, ja_node
) {
552 struct cds_ja_range
*range
;
553 struct cds_ja_node
*first_node
;
555 first_node
= ja_node
;
556 cds_ja_for_each_duplicate_rcu(ja_node
)
558 if (last_node
!= first_node
) {
559 struct cds_ja_range
*first_range
= caa_container_of(first_node
,
560 struct cds_ja_range
, ja_node
);
561 struct cds_ja_range
*last_range
= caa_container_of(last_node
,
562 struct cds_ja_range
, ja_node
);
563 fprintf(stderr
, "found duplicate node: first %" PRIu64
"-%" PRIu64
" last %" PRIu64
"-%" PRIu64
"\n",
564 first_range
->start
, first_range
->end
, last_range
->start
, last_range
->end
);
567 range
= caa_container_of(last_node
,
568 struct cds_ja_range
, ja_node
);
569 start
= range
->start
;
571 if (last_end
!= UINT64_MAX
) {
572 if (start
!= last_end
+ 1) {
573 fprintf(stderr
, "ja range discrepancy: last end: %" PRIu64
", start: %" PRIu64
"\n",
580 if (last_end
!= UINT64_MAX
- 1) {
581 fprintf(stderr
, "ja range error: end of last range is: %" PRIu64
"\n",
585 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
589 int cds_ja_range_destroy(struct cds_ja
*ja
,
590 void (*free_priv
)(void *ptr
))
593 struct cds_ja_node
*ja_node
;
596 cds_lfht_rcu_flavor(ja
->ht
)->read_lock();
597 cds_ja_for_each_key_rcu(ja
, key
, ja_node
) {
598 struct cds_ja_node
*tmp_node
;
600 cds_ja_for_each_duplicate_safe_rcu(ja_node
, tmp_node
) {
601 struct cds_ja_range
*range
;
603 range
= caa_container_of(ja_node
,
604 struct cds_ja_range
, ja_node
);
605 ret
= cds_ja_del(ja
, key
, &range
->ja_node
);
609 free_priv(range
->priv
);
610 /* Alone using Judy array, OK to free now */
614 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
615 return cds_ja_destroy(ja
);
618 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
This page took 0.048237 seconds and 4 git commands to generate.