From fd80077697a32434d2b7eaa8336f447904150f0e Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 9 Mar 2012 18:14:21 -0500 Subject: [PATCH] rcuja: use pool of linear array instead of bitmap Signed-off-by: Mathieu Desnoyers --- rcuja/design.txt | 79 ++++++++++++---------- rcuja/rcuja.c | 167 +++++++++-------------------------------------- 2 files changed, 76 insertions(+), 170 deletions(-) diff --git a/rcuja/design.txt b/rcuja/design.txt index 947f61a..425d80d 100644 --- a/rcuja/design.txt +++ b/rcuja/design.txt @@ -89,31 +89,39 @@ Array of 8-bit values followed by array of associated pointers. 64-bit: 1 byte + 113 bytes + 6 bytes pad + 113*8 = 1024 bytes -- Type B: bitmap, followed by pointers array. +- Type B: pools of values and pointers arrays -bitmask (256 entries -> 256 bits -> 32 bytes) of populated children -followed by an array of children pointers, in same order as appears in -the bitmask +Pools of values and pointers arrays. Each pool values array is 32-bytes +in size (so it fits in a L1 cacheline). Each pool begins with an 8-bit +integer, which is the number of children in this pool, followed by an +array of 8-bit values, padding, and an array of pointers. Values and +pointer arrays are associated as in Type A. -+ Allows lookup failure to use 32-byte cache-line only. (1 cacheline) -+ Allows lookup success to use 32-byte cache-line (bitmap), - followed by direct lookup into pointer array. (2 cachelines) +The entries of a node are associated to their respective pool based +on their index position. -Filled at 8 entries 32-bit, 12 entries 64-bit -32-bit: 32 + (4*8) -> 64 bytes -64-bit: 32 + (8*12) -> 128 bytes ++ Allows lookup failure to use 1 32-byte cache-line only. (1 cacheline) + lookup success: 2 cache lines. -Filled at 24 entries 32-bit, 28 entries 64-bit -32-bit: 32 + (4*24) -> 128 bytes -64-bit: 32 + (8*28) -> 256 bytes ++ Allows in-place updates without reallocation, except when a pool is + full. (this was not possible with bitmap-based nodes) +- If one pool exhausts its space, we need to increase the node size. + Therefore, for very dense populations, we will end up using the + pigeon-hole node type sooner, thus consuming more space. -Filled at 56 entries 32-bit, 60 entries 64-bit -32-bit: 32 + (4*56) -> 256 bytes -64-bit: 32 + (8*60) -> 512 bytes +Pool configuration: -Filled at 120 entries 32-bit, 124 entries 64-bit -32-bit: 32 + (4*95) -> 512 bytes -64-bit: 32 + (8*124) -> 1024 bytes +Per pool, filled at 25 entries (32-bit), 28 entries (64-bit) +32-bit: 1 byte + 25 bytes + 2 bytes pad + 25*4bytes = 128 bytes +64-bit: 1 byte + 28 bytes + 3 bytes pad + 28*8 = 256 bytes + +Total up to 50 entries (32-bit), 56 entries (64-bit) +2 pools: 32-bit = 256 bytes +2 pools: 64-bit = 512 bytes + +Total up to 100 entries (32-bit), 112 entries (64-bit) +4 pools: 32-bit = 512 bytes +4 pools: 32-bit = 1024 bytes - Type C: pigeon-hole array @@ -152,17 +160,18 @@ up to 6 children up to 12 children -- Type B: bitmap, followed by pointers array. -- 2 cache line hit for lookup success - 128 bytes storage -up to 24 children +up to 25 children + +- Type B: pool - 256 bytes storage -up to 56 children + +up to 50 children - 512 bytes storage -up to 120 children +up to 100 children - Type C: pigeon-hole array - 1 cache line hit for lookup success @@ -192,17 +201,17 @@ up to 7 children up to 14 children -- Type B: bitmap, followed by pointers array. -- 2 cache line hit for lookup success - 256 bytes storage up to 28 children +- Type B: pool + - 512 bytes storage -up to 60 children +up to 56 children - 1024 bytes storage -up to 124 children +up to 112 children - Type C: pigeon-hole array - 1 cache line hit for lookup success @@ -224,14 +233,14 @@ Types of children: enum child_type { RCU_JA_LINEAR = 0, /* Type A */ - /* 32-bit: 1 to 12 children, 8 to 64 bytes */ - /* 64-bit: 1 to 14 children, 16 to 128 bytes */ - RCU_JA_BITMAP = 1, /* Type B */ - /* 32-bit: 13 to 120 children, 128 to 512 bytes */ - /* 64-bit: 15 to 124 children, 256 to 1024 bytes */ + /* 32-bit: 1 to 25 children, 8 to 128 bytes */ + /* 64-bit: 1 to 28 children, 16 to 256 bytes */ + RCU_JA_POOL = 1, /* Type B */ + /* 32-bit: 26 to 100 children, 256 to 512 bytes */ + /* 64-bit: 29 to 112 children, 512 to 1024 bytes */ RCU_JA_PIGEON = 2, /* Type C */ - /* 32-bit: 121 to 256 children, 1024 bytes */ - /* 64-bit: 125 to 256 children, 2048 bytes */ + /* 32-bit: 101 to 256 children, 1024 bytes */ + /* 64-bit: 113 to 256 children, 2048 bytes */ /* Leaf nodes are implicit from their height in the tree */ }; diff --git a/rcuja/rcuja.c b/rcuja/rcuja.c index 8d3bfd7..04cbd6c 100644 --- a/rcuja/rcuja.c +++ b/rcuja/rcuja.c @@ -31,14 +31,14 @@ enum rcu_ja_type_class { RCU_JA_LINEAR = 0, /* Type A */ - /* 32-bit: 1 to 12 children, 8 to 64 bytes */ - /* 64-bit: 1 to 14 children, 16 to 128 bytes */ - RCU_JA_BITMAP = 1, /* Type B */ - /* 32-bit: 13 to 120 children, 128 to 512 bytes */ - /* 64-bit: 15 to 124 children, 256 to 1024 bytes */ + /* 32-bit: 1 to 25 children, 8 to 128 bytes */ + /* 64-bit: 1 to 28 children, 16 to 256 bytes */ + RCU_JA_POOL = 1, /* Type B */ + /* 32-bit: 26 to 100 children, 256 to 512 bytes */ + /* 64-bit: 29 to 112 children, 512 to 1024 bytes */ RCU_JA_PIGEON = 2, /* Type C */ - /* 32-bit: 121 to 256 children, 1024 bytes */ - /* 64-bit: 125 to 256 children, 2048 bytes */ + /* 32-bit: 101 to 256 children, 1024 bytes */ + /* 64-bit: 113 to 256 children, 2048 bytes */ /* Leaf nodes are implicit from their height in the tree */ }; @@ -47,6 +47,8 @@ struct rcu_ja_type { uint16_t min_child; /* minimum number of children: 1 to 256 */ uint16_t max_child; /* maximum number of children: 1 to 256 */ uint16_t order; /* node size is (1 << order), in bytes */ + uint16_t nr_pool_order; /* number of pools */ + uint16_t pool_size_order; /* pool size */ }; /* @@ -79,12 +81,14 @@ const struct rcu_ja_type ja_types[] = { { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = 3, .order = 4, }, { .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = 6, .order = 5, }, { .type_class = RCU_JA_LINEAR, .min_child = 4, .max_child = 12, .order = 6, }, + { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = 25, .order = 7, }, - { .type_class = RCU_JA_BITMAP, .min_child = 10, .max_child = 24, .order = 7, }, - { .type_class = RCU_JA_BITMAP, .min_child = 20, .max_child = 56, .order = 8, }, - { .type_class = RCU_JA_BITMAP, .min_child = 46, .max_child = 120, .order = 9, }, + /* Pools may fill sooner than max_child */ + { .type_class = RCU_JA_POOL, .min_child = 20, .max_child = 50, .order = 8, .nr_pool_order = 1, .pool_size_order = 7, }, + { .type_class = RCU_JA_POOL, .min_child = 42, .max_child = 100, .order = 9, .nr_pool_order = 2, .pool_size_order = 7, }, - { .type_class = RCU_JA_PIGEON, .min_child = 100, .max_child = 256, .order = 10, }, + /* TODO: Upon downsize, if at least one pool is filled, we need to keep pigeon */ + { .type_class = RCU_JA_PIGEON, .min_child = 90, .max_child = 256, .order = 10, }, }; CAA_BUILD_BUG_ON(CAA_ARRAY_SIZE(ja_types) > JA_TYPE_MAX_NR); #else /* !(CAA_BITS_PER_LONG < 64) */ @@ -94,12 +98,14 @@ const struct rcu_ja_type ja_types[] = { { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = 3, .order = 5, }, { .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = 7, .order = 6, }, { .type_class = RCU_JA_LINEAR, .min_child = 5, .max_child = 14, .order = 7, }, + { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = 28, .order = 8, }, - { .type_class = RCU_JA_BITMAP, .min_child = 10, .max_child = 28, .order = 8, }, - { .type_class = RCU_JA_BITMAP, .min_child = 22, .max_child = 60, .order = 9, }, - { .type_class = RCU_JA_BITMAP, .min_child = 49, .max_child = 124, .order = 10, }, + /* Pools may fill sooner than max_child */ + { .type_class = RCU_JA_POOL, .min_child = 22, .max_child = 56, .order = 9, .nr_pool_order = 1, .pool_size_order = 8, }, + { .type_class = RCU_JA_POOL, .min_child = 44, .max_child = 112, .order = 10, .nr_pool_order = 2, .pool_size_order = 8, }, - { .type_class = RCU_JA_PIGEON, .min_child = 102, .max_child = 256, .order = 11, }, + /* TODO: Upon downsize, if at least one pool is filled, we need to keep pigeon */ + { .type_class = RCU_JA_PIGEON, .min_child = 100, .max_child = 256, .order = 11, }, }; CAA_BUILD_BUG_ON(CAA_ARRAY_SIZE(ja_types) > JA_TYPE_MAX_NR); #endif /* !(BITS_PER_LONG < 64) */ @@ -151,18 +157,6 @@ void free_rcu_ja_node(struct rcu_ja_node *node) free(node); } -/* The bitmap for 256 entries is always 32 bytes */ -#define CHAR_BIT_SHIFT 3UL -#define CHAR_BIT_MASK ((1UL << CHAR_BIT_SHIFT) - 1) -#if (CHAR_BIT != (1UL << CHAR_BIT_SHIFT)) -#error "char size not supported." -#endif - -#define ULONG_BIT_MASK (CAA_BITS_PER_LONG - 1) - -#define JA_BITMAP_BITS JA_ENTRY_PER_NODE -#define JA_BITMAP_LEN (JA_BITMAP_BITS / CHAR_BIT) - #define __JA_ALIGN_MASK(v, mask) (((v) + (mask)) & ~(mask)) #define JA_ALIGN(v, align) __JA_ALIGN_MASK(v, (typeof(v)) (align) - 1) #define __JA_FLOOR_MASK(v, mask) ((v) & ~(mask)) @@ -185,12 +179,12 @@ struct rcu_ja_node_flag *ja_linear_node_get_nth(const struct rcu_ja_type *type, struct rcu_ja_node_flag *ptr; unsigned int i; - assert(type->type_class == RCU_JA_LINEAR); + assert(!type || type->type_class == RCU_JA_LINEAR); nr_child = node->data[0]; cmm_smp_rmb(); /* read nr_child before values */ - assert(nr_child <= type->max_child); - assert(nr_child >= type->min_child); + assert(!type || nr_child <= type->max_child); + assert(!type || nr_child >= type->min_child); values = &node[1]; for (i = 0; i < nr_child; i++) { @@ -206,115 +200,18 @@ struct rcu_ja_node_flag *ja_linear_node_get_nth(const struct rcu_ja_type *type, return ptr; } -#if 0 -/* - * Count hweight. Expect most bits to be 0. Algorithm from - * Wegner (1960): count those in n steps (n being the number of - * hot bits). Ref.: Wegner, Peter (1960), "A technique for - * counting ones in a binary computer", Communications of the - * ACM 3 (5): 322, doi:10.1145/367236.367286. - */ -static -unsigned int ja_hweight_uchar(unsigned char value) -{ - unsigned int count = 0; - - for (; value; count++) - value &= value - 1; - return count; -} -#endif //0 - -#if (CAA_BITS_PER_LONG < 64) -static -unsigned int ja_hweight_ulong(unsigned long value) -{ - unsigned long r; - - r = value; - r = r - ((r >> 1) & 0x55555555); - r = (r & 0x33333333) + ((r >> 2) & 0x33333333); - r += r >> 4; - r &= 0x0F0F0F0F; - r += r >> 8; - r += r >> 16; - r &= 0x000000FF; - return r; -} -#else /* !(CAA_BITS_PER_LONG < 64) */ -static -unsigned int ja_hweight_ulong(unsigned long value) -{ - unsigned long r; - - r = value; - r = r - ((r >> 1) & 0x5555555555555555UL); - r = (r & 0x3333333333333333UL) + ((r >> 2) & 0x3333333333333333UL); - r += r >> 4; - r &= 0x0F0F0F0F0F0F0F0FUL; - r += r >> 8; - r += r >> 16; - r += r >> 32; - r &= 0x00000000000000FFUL; - return r; -} -#endif /* !(BITS_PER_LONG < 64) */ - static -struct rcu_ja_node_flag *ja_bitmap_node_get_nth(const struct rcu_ja_type *type, +struct rcu_ja_node_flag *ja_pool_node_get_nth(const struct rcu_ja_type *type, struct rcu_ja_node *node, uint8_t n) { - uint8_t *bitmap; - uint8_t byte_nr; - struct rcu_ja_node_flag *pointers; struct rcu_ja_node_flag *ptr; - unsigned int count; - - assert(type->type_class == RCU_JA_BITMAP); - - bitmap = &node->data[0]; - /* - * Check if n is hot in the bitmap. If yes, count the hweight - * prior to n, including n, to get the pointer index. - * The bitmap goes from least significant (0) to most - * significant (255) as bytes increase. - */ - byte_nr = n >> CHAR_BIT_SHIFT; - if (bitmap[byte_nr] & (1U << (n & CHAR_BIT_MASK))) { - uint8_t byte_iter; - unsigned long v; - - count = 0; - /* Count entire ulong prior to the one containing n */ - for (byte_iter = 0; byte_iter < JA_FLOOR(byte_nr, sizeof(unsigned long)); - byte_iter += sizeof(unsigned long)) { - v = *((unsigned long *) &bitmap[byte_iter]); - count += ja_hweight_ulong(v); - } - /* - * Read only the bits prior to and including n within - * the ulong containing n. ja_bitfield_read_le goes from - * less significant to most significant as bytes - * increase. - */ - ja_bitfield_read_le( - (unsigned long *) &bitmap[JA_FLOOR(byte_nr, sizeof(unsigned long))], - unsigned long, 0, (n & ULONG_BIT_MASK) + 1, - &v); - count += ja_hweight_ulong(v); - } else { - return NULL; - } + struct rcu_ja_node *linear; - assert(count <= type->max_child); - assert(count >= type->min_child); - - cmm_smp_rmb(); /* read bitmap before pointers */ - pointers = &bitmap[JA_BITMAP_LEN]; - ptr = pointers[count - 1]; - assert(ja_node_ptr(ptr) != NULL); - return ptr; + assert(type->type_class == RCU_JA_POOL); + linear = (struct rcu_ja_node *) + &node->data[(n >> (CHAR_BIT - type->nr_pool_order)) << type->pool_size_order]; + return ja_linear_node_get_nth(NULL, linear, n); } static @@ -344,8 +241,8 @@ struct rcu_ja_node_flag *ja_node_get_nth(struct rcu_ja_node_flag *node_flag, switch (type->type_class) { case RCU_JA_LINEAR: return ja_linear_node_get_nth(type, node, n); - case RCU_JA_BITMAP: - return ja_bitmap_node_get_nth(type, node, n); + case RCU_JA_POOL: + return ja_pool_node_get_nth(type, node, n); case RCU_JA_PIGEON: return ja_pigeon_node_get_nth(type, node, n); default: -- 2.34.1