X-Git-Url: http://git.liburcu.org/?a=blobdiff_plain;f=rcuja%2Frcuja.c;h=1103116d2591a58c36df36d9363cd667e0a3c4d9;hb=47d2eab3f8339799a6ddbeaf0cc505baf57cfc44;hp=e5145ea36a3b22dbec54cd4a45eeefe7334e343a;hpb=48cbe00177cd135553ee34d2f443c351b53b6d25;p=userspace-rcu.git diff --git a/rcuja/rcuja.c b/rcuja/rcuja.c index e5145ea..1103116 100644 --- a/rcuja/rcuja.c +++ b/rcuja/rcuja.c @@ -34,7 +34,6 @@ #include #include "rcuja-internal.h" -#include "bitfield.h" #ifndef abs #define abs_int(a) ((int) (a) > 0 ? (int) (a) : -((int) (a))) @@ -128,7 +127,7 @@ const struct cds_ja_type ja_types[] = { * Upon node removal below min_child, if child pool is filled * beyond capacity, we roll back to pigeon. */ - { .type_class = RCU_JA_PIGEON, .min_child = 89, .max_child = ja_type_7_max_child, .order = 10, }, + { .type_class = RCU_JA_PIGEON, .min_child = 83, .max_child = ja_type_7_max_child, .order = 10, }, { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, }, }; @@ -176,7 +175,7 @@ const struct cds_ja_type ja_types[] = { * Upon node removal below min_child, if child pool is filled * beyond capacity, we roll back to pigeon. */ - { .type_class = RCU_JA_PIGEON, .min_child = 101, .max_child = ja_type_7_max_child, .order = 11, }, + { .type_class = RCU_JA_PIGEON, .min_child = 95, .max_child = ja_type_7_max_child, .order = 11, }, { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, }, }; @@ -244,11 +243,6 @@ enum ja_recompact { JA_RECOMPACT_DEL, }; -static -unsigned long node_fallback_count_distribution[JA_ENTRY_PER_NODE]; -static -unsigned long nr_nodes_allocated, nr_nodes_freed; - static struct cds_ja_inode *_ja_node_mask_ptr(struct cds_ja_inode_flag *node) { @@ -291,7 +285,9 @@ struct cds_ja_inode *ja_node_ptr(struct cds_ja_inode_flag *node) } } -struct cds_ja_inode *alloc_cds_ja_node(const struct cds_ja_type *ja_type) +static +struct cds_ja_inode *alloc_cds_ja_node(struct cds_ja *ja, + const struct cds_ja_type *ja_type) { size_t len = 1U << ja_type->order; void *p; @@ -302,15 +298,15 @@ struct cds_ja_inode *alloc_cds_ja_node(const struct cds_ja_type *ja_type) return NULL; } memset(p, 0, len); - uatomic_inc(&nr_nodes_allocated); + uatomic_inc(&ja->nr_nodes_allocated); return p; } -void free_cds_ja_node(struct cds_ja_inode *node) +void free_cds_ja_node(struct cds_ja *ja, struct cds_ja_inode *node) { free(node); if (node) - uatomic_inc(&nr_nodes_freed); + uatomic_inc(&ja->nr_nodes_freed); } #define __JA_ALIGN_MASK(v, mask) (((v) + (mask)) & ~(mask)) @@ -373,6 +369,43 @@ struct cds_ja_inode_flag *ja_linear_node_get_nth(const struct cds_ja_type *type, return ptr; } +static +struct cds_ja_inode_flag *ja_linear_node_get_left(const struct cds_ja_type *type, + struct cds_ja_inode *node, + unsigned int n) +{ + uint8_t nr_child; + uint8_t *values; + struct cds_ja_inode_flag **pointers; + struct cds_ja_inode_flag *ptr; + unsigned int i, match_idx; + int match_v = -1; + + assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL); + + nr_child = ja_linear_node_get_nr_child(type, node); + cmm_smp_rmb(); /* read nr_child before values and pointers */ + assert(nr_child <= type->max_linear_child); + assert(type->type_class != RCU_JA_LINEAR || nr_child >= type->min_child); + + values = &node->u.data[1]; + for (i = 0; i < nr_child; i++) { + unsigned int v; + + v = CMM_LOAD_SHARED(values[i]); + if (v < n && (int) v > match_v) { + match_v = v; + match_idx = i; + } + } + if (match_v < 0) { + return NULL; + } + pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]); + ptr = rcu_dereference(pointers[match_idx]); + return ptr; +} + static void ja_linear_node_get_ith_pos(const struct cds_ja_type *type, struct cds_ja_inode *node, @@ -445,6 +478,42 @@ struct cds_ja_inode *ja_pool_node_get_ith_pool(const struct cds_ja_type *type, &node->u.data[(unsigned int) i << type->pool_size_order]; } +static +struct cds_ja_inode_flag *ja_pool_node_get_left(const struct cds_ja_type *type, + struct cds_ja_inode *node, + unsigned int n) +{ + unsigned int pool_nr; + int match_v = -1; + struct cds_ja_inode_flag *match_node_flag = NULL; + + assert(type->type_class == RCU_JA_POOL); + + for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) { + struct cds_ja_inode *pool = + ja_pool_node_get_ith_pool(type, + node, pool_nr); + uint8_t nr_child = + ja_linear_node_get_nr_child(type, pool); + unsigned int j; + + for (j = 0; j < nr_child; j++) { + struct cds_ja_inode_flag *iter; + uint8_t v; + + ja_linear_node_get_ith_pos(type, pool, + j, &v, &iter); + if (!iter) + continue; + if (v < n && (int) v > match_v) { + match_v = v; + match_node_flag = iter; + } + } + } + return match_node_flag; +} + static struct cds_ja_inode_flag *ja_pigeon_node_get_nth(const struct cds_ja_type *type, struct cds_ja_inode *node, @@ -464,6 +533,30 @@ struct cds_ja_inode_flag *ja_pigeon_node_get_nth(const struct cds_ja_type *type, return child_node_flag; } +static +struct cds_ja_inode_flag *ja_pigeon_node_get_left(const struct cds_ja_type *type, + struct cds_ja_inode *node, + unsigned int n) +{ + struct cds_ja_inode_flag **child_node_flag_ptr; + struct cds_ja_inode_flag *child_node_flag; + int i; + + assert(type->type_class == RCU_JA_PIGEON); + + /* n - 1 is first value left of n */ + for (i = n - 1; i >= 0; i--) { + child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[i]; + child_node_flag = rcu_dereference(*child_node_flag_ptr); + if (child_node_flag) { + dbg_printf("ja_pigeon_node_get_left child_node_flag %p\n", + child_node_flag); + return child_node_flag; + } + } + return NULL; +} + static struct cds_ja_inode_flag *ja_pigeon_node_get_ith_pos(const struct cds_ja_type *type, struct cds_ja_inode *node, @@ -506,6 +599,38 @@ struct cds_ja_inode_flag *ja_node_get_nth(struct cds_ja_inode_flag *node_flag, } } +static +struct cds_ja_inode_flag *ja_node_get_left(struct cds_ja_inode_flag *node_flag, + unsigned int n) +{ + unsigned int type_index; + struct cds_ja_inode *node; + const struct cds_ja_type *type; + + node = ja_node_ptr(node_flag); + assert(node != NULL); + type_index = ja_node_type(node_flag); + type = &ja_types[type_index]; + + switch (type->type_class) { + case RCU_JA_LINEAR: + return ja_linear_node_get_left(type, node, n); + case RCU_JA_POOL: + return ja_pool_node_get_left(type, node, n); + case RCU_JA_PIGEON: + return ja_pigeon_node_get_left(type, node, n); + default: + assert(0); + return (void *) -1UL; + } +} + +static +struct cds_ja_inode_flag *ja_node_get_rightmost(struct cds_ja_inode_flag *node_flag) +{ + return ja_node_get_left(node_flag, JA_ENTRY_PER_NODE); +} + static int ja_linear_node_set_nth(const struct cds_ja_type *type, struct cds_ja_inode *node, @@ -1238,7 +1363,7 @@ retry: /* for fallback */ old_type_index, new_type_index); new_type = &ja_types[new_type_index]; if (new_type_index != NODE_INDEX_NULL) { - new_node = alloc_cds_ja_node(new_type); + new_node = alloc_cds_ja_node(ja, new_type); if (!new_node) return -ENOMEM; @@ -1285,7 +1410,7 @@ retry: /* for fallback */ dbg_printf("Recompact inherit lock from %p\n", shadow_node); new_shadow_node = rcuja_shadow_set(ja->ht, new_node_flag, shadow_node, ja, level); if (!new_shadow_node) { - free_cds_ja_node(new_node); + free_cds_ja_node(ja, new_node); return -ENOMEM; } if (fallback) @@ -1409,7 +1534,7 @@ skip_copy: dbg_printf("Using fallback for %u children, node type index: %u, mode %s\n", new_shadow_node->nr_child, old_type_index, mode == JA_RECOMPACT_ADD_NEXT ? "add_next" : (mode == JA_RECOMPACT_DEL ? "del" : "add_same")); - uatomic_inc(&node_fallback_count_distribution[new_shadow_node->nr_child]); + uatomic_inc(&ja->node_fallback_count_distribution[new_shadow_node->nr_child]); } /* Return pointer to new recompacted node through old_node_flag_ptr */ @@ -1613,6 +1738,89 @@ struct cds_hlist_head cds_ja_lookup(struct cds_ja *ja, uint64_t key) return head; } +struct cds_hlist_head cds_ja_lookup_lower_equal(struct cds_ja *ja, uint64_t key) +{ + int tree_depth, level; + struct cds_ja_inode_flag *node_flag, *cur_node_depth[JA_MAX_DEPTH]; + struct cds_hlist_head head = { NULL }; + + if (caa_unlikely(key > ja->key_max || !key)) + return head; + + memset(cur_node_depth, 0, sizeof(cur_node_depth)); + tree_depth = ja->tree_depth; + node_flag = rcu_dereference(ja->root); + cur_node_depth[0] = node_flag; + + /* level 0: root node */ + if (!ja_node_ptr(node_flag)) + return head; + + for (level = 1; level < tree_depth; level++) { + uint8_t iter_key; + + iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - level - 1))); + node_flag = ja_node_get_nth(node_flag, NULL, iter_key); + if (!ja_node_ptr(node_flag)) + break; + cur_node_depth[level] = node_flag; + dbg_printf("cds_ja_lookup iter key lookup %u finds node_flag %p\n", + (unsigned int) iter_key, node_flag); + } + + if (level == tree_depth) { + /* Last level lookup succeded. We got an equal match. */ + head.next = (struct cds_hlist_node *) node_flag; + return head; + } + + /* + * Find highest value left of current node. + * Current node is cur_node_depth[level]. + * Start at current level. If we cannot find any key left of + * ours, go one level up, seek highest value left of current + * (recursively), and when we find one, get the rightmost child + * of its rightmost child (recursively). + */ + for (; level > 0; level--) { + uint8_t iter_key; + + iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - level - 1))); + node_flag = ja_node_get_left(cur_node_depth[level - 1], + iter_key); + /* If found left sibling, find rightmost child. */ + if (ja_node_ptr(node_flag)) + break; + } + + if (!level) { + /* Reached the root and could not find a left sibling. */ + return head; + } + + level++; + + /* + * From this point, we are guaranteed to be able to find a + * "lower than" match. ja_attach_node() and ja_detach_node() + * both guarantee that it is not possible for a lookup to reach + * a dead-end. + */ + + /* Find rightmost child of rightmost child (recursively). */ + for (; level < tree_depth; level++) { + node_flag = ja_node_get_rightmost(node_flag); + /* If found left sibling, find rightmost child. */ + if (!ja_node_ptr(node_flag)) + break; + } + + assert(level == tree_depth); + + head.next = (struct cds_hlist_node *) node_flag; + return head; +} + /* * We reached an unpopulated node. Create it and the children we need, * and then attach the entire branch to the current node. This may @@ -1624,6 +1832,12 @@ struct cds_hlist_head cds_ja_lookup(struct cds_ja *ja, uint64_t key) * parent lock (if needed). Then we can proceed to create the new * branch. Publish the new branch, and release locks. * TODO: we currently always take the parent lock even when not needed. + * + * ja_attach_node() ensures that a lookup will _never_ see a branch that + * leads to a dead-end: before attaching a branch, the entire content of + * the new branch is populated, thus creating a cluster, before + * attaching the cluster to the rest of the tree, thus making it visible + * to lookups. */ static int ja_attach_node(struct cds_ja *ja, @@ -1674,6 +1888,27 @@ int ja_attach_node(struct cds_ja *ja, goto unlock_parent; } + /* + * Perform a lookup query to handle the case where + * old_node_flag_ptr is NULL. We cannot use it to check if the + * node has been populated between RCU lookup and mutex + * acquisition. + */ + if (!old_node_flag_ptr) { + uint8_t iter_key; + struct cds_ja_inode_flag *lookup_node_flag; + struct cds_ja_inode_flag **lookup_node_flag_ptr; + + iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - level))); + lookup_node_flag = ja_node_get_nth(attach_node_flag, + &lookup_node_flag_ptr, + iter_key); + if (lookup_node_flag) { + ret = -EEXIST; + goto unlock_parent; + } + } + if (attach_node_flag_ptr && ja_node_ptr(*attach_node_flag_ptr) != ja_node_ptr(attach_node_flag)) { /* @@ -1701,8 +1936,10 @@ int ja_attach_node(struct cds_ja *ja, iter_key, iter_node_flag, NULL, i); - if (ret) + if (ret) { + dbg_printf("branch creation error %d\n", ret); goto check_error; + } created_nodes[nr_created_nodes++] = iter_dest_node_flag; iter_node_flag = iter_dest_node_flag; } @@ -1726,8 +1963,10 @@ int ja_attach_node(struct cds_ja *ja, iter_key, iter_node_flag, shadow_node, level - 1); - if (ret) + if (ret) { + dbg_printf("branch publish error %d\n", ret); goto check_error; + } /* * Attach branch */ @@ -1793,8 +2032,10 @@ end: return ret; } -int cds_ja_add(struct cds_ja *ja, uint64_t key, - struct cds_ja_node *new_node) +static +int _cds_ja_add(struct cds_ja *ja, uint64_t key, + struct cds_ja_node *new_node, + struct cds_ja_node **unique_node_ret) { unsigned int tree_depth, i; struct cds_ja_inode_flag *attach_node_flag, @@ -1847,6 +2088,7 @@ retry: if (!ja_node_ptr(node_flag)) { dbg_printf("cds_ja_add NULL parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n", parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag); + attach_node_flag = parent_node_flag; attach_node_flag_ptr = parent_node_flag_ptr; parent_attach_node_flag = parent2_node_flag; @@ -1858,8 +2100,14 @@ retry: node_flag, key, i, new_node); } else { + if (unique_node_ret) { + *unique_node_ret = (struct cds_ja_node *) ja_node_ptr(node_flag); + return -EEXIST; + } + dbg_printf("cds_ja_add duplicate parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n", parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag); + attach_node_flag = node_flag; attach_node_flag_ptr = node_flag_ptr; parent_attach_node_flag = parent_node_flag; @@ -1876,6 +2124,25 @@ retry: return ret; } +int cds_ja_add(struct cds_ja *ja, uint64_t key, + struct cds_ja_node *new_node) +{ + return _cds_ja_add(ja, key, new_node, NULL); +} + +struct cds_ja_node *cds_ja_add_unique(struct cds_ja *ja, uint64_t key, + struct cds_ja_node *new_node) +{ + int ret; + struct cds_ja_node *ret_node; + + ret = _cds_ja_add(ja, key, new_node, &ret_node); + if (ret == -EEXIST) + return ret_node; + else + return new_node; +} + /* * Note: there is no need to lookup the pointer address associated with * each node's nth item after taking the lock: it's already been done by @@ -1886,6 +2153,11 @@ retry: * However, when a child is removed from "linear" nodes, its pointer * is set to NULL. We therefore check, while holding the locks, if this * pointer is NULL, and return -ENOENT to the caller if it is the case. + * + * ja_detach_node() ensures that a lookup will _never_ see a branch that + * leads to a dead-end: when removing branch, it makes sure to perform + * the "cut" at the highest node that has only one child, effectively + * replacing it with a NULL pointer. */ static int ja_detach_node(struct cds_ja *ja, @@ -2312,7 +2584,7 @@ void rcuja_free_all_children(struct cds_ja_shadow_node *shadow_node, struct cds_hlist_node *pos, *tmp; uint8_t v; - ja_linear_node_get_ith_pos(type, node, j, &v, &iter); + ja_linear_node_get_ith_pos(type, pool, j, &v, &iter); if (!iter) continue; head.next = (struct cds_hlist_node *) iter; @@ -2351,17 +2623,47 @@ void rcuja_free_all_children(struct cds_ja_shadow_node *shadow_node, } static -void print_debug_fallback_distribution(void) +void print_debug_fallback_distribution(struct cds_ja *ja) { int i; fprintf(stderr, "Fallback node distribution:\n"); for (i = 0; i < JA_ENTRY_PER_NODE; i++) { - if (!node_fallback_count_distribution[i]) + if (!ja->node_fallback_count_distribution[i]) continue; fprintf(stderr, " %3u: %4lu\n", - i, node_fallback_count_distribution[i]); + i, ja->node_fallback_count_distribution[i]); + } +} + +static +int ja_final_checks(struct cds_ja *ja) +{ + double fallback_ratio; + unsigned long na, nf, nr_fallback; + int ret = 0; + + fallback_ratio = (double) uatomic_read(&ja->nr_fallback); + fallback_ratio /= (double) uatomic_read(&ja->nr_nodes_allocated); + nr_fallback = uatomic_read(&ja->nr_fallback); + if (nr_fallback) + fprintf(stderr, + "[warning] RCU Judy Array used %lu fallback node(s) (ratio: %g)\n", + uatomic_read(&ja->nr_fallback), + fallback_ratio); + + na = uatomic_read(&ja->nr_nodes_allocated); + nf = uatomic_read(&ja->nr_nodes_freed); + dbg_printf("Nodes allocated: %lu, Nodes freed: %lu.\n", na, nf); + if (nr_fallback) + print_debug_fallback_distribution(ja); + + if (na != nf) { + fprintf(stderr, "[error] Judy array leaked %ld nodes. Allocated: %lu, freed: %lu.\n", + (long) na - nf, na, nf); + ret = -1; } + return ret; } /* @@ -2382,16 +2684,12 @@ int cds_ja_destroy(struct cds_ja *ja, ret = rcuja_delete_ht(ja->ht); if (ret) return ret; + + /* Wait for in-flight call_rcu free to complete. */ + flavor->barrier(); + flavor->thread_online(); - if (uatomic_read(&ja->nr_fallback)) - fprintf(stderr, - "[warning] RCU Judy Array used %lu fallback node(s)\n", - uatomic_read(&ja->nr_fallback)); - fprintf(stderr, "Nodes allocated: %lu, Nodes freed: %lu. Fallback ratio: %g\n", - uatomic_read(&nr_nodes_allocated), - uatomic_read(&nr_nodes_freed), - (double) uatomic_read(&ja->nr_fallback) / (double) uatomic_read(&nr_nodes_allocated)); - print_debug_fallback_distribution(); + ret = ja_final_checks(ja); free(ja); - return 0; + return ret; }