urcu-ht: fix del node race
[urcu.git] / urcu-ht.c
1
2 /*
3 * TODO: keys are currently assumed <= sizeof(void *). Key target never freed.
4 */
5
6 #define _LGPL_SOURCE
7 #include <stdlib.h>
8 #include <urcu.h>
9 #include <arch.h>
10 #include <arch_atomic.h>
11 #include <assert.h>
12 #include <compiler.h>
13 #include <urcu-defer.h>
14 #include <errno.h>
15 #include <urcu-ht.h>
16 #include <urcu/jhash.h>
17 #include <stdio.h>
18
19 struct rcu_ht_node;
20
21 struct rcu_ht_node {
22 struct rcu_ht_node *next;
23 void *key;
24 void *data;
25 };
26
27 struct rcu_ht {
28 struct rcu_ht_node **tbl;
29 ht_hash_fct hash_fct;
30 void (*free_fct)(void *data); /* fct to free data */
31 uint32_t keylen;
32 uint32_t hashseed;
33 struct ht_size {
34 unsigned long add;
35 unsigned long lookup;
36 } size;
37 };
38
39 struct rcu_ht *ht_new(ht_hash_fct hash_fct, void (*free_fct)(void *data),
40 unsigned long init_size, uint32_t keylen,
41 uint32_t hashseed)
42 {
43 struct rcu_ht *ht;
44
45 ht = calloc(1, sizeof(struct rcu_ht));
46 ht->hash_fct = hash_fct;
47 ht->free_fct = free_fct;
48 ht->size.add = init_size;
49 ht->size.lookup = init_size;
50 ht->keylen = keylen;
51 ht->hashseed = hashseed;
52 ht->tbl = calloc(init_size, sizeof(struct rcu_ht_node *));
53 return ht;
54 }
55
56 void *ht_lookup(struct rcu_ht *ht, void *key)
57 {
58 unsigned long hash;
59 struct rcu_ht_node *node;
60 void *ret;
61
62 hash = ht->hash_fct(key, ht->keylen, ht->hashseed) % ht->size.lookup;
63
64 rcu_read_lock();
65 node = rcu_dereference(ht->tbl[hash]);
66 for (;;) {
67 if (likely(!node)) {
68 ret = NULL;
69 break;
70 }
71 if (node->key == key) {
72 ret = node->data;
73 break;
74 }
75 node = rcu_dereference(node->next);
76 }
77 rcu_read_unlock();
78
79 return ret;
80 }
81
82 /*
83 * Will re-try until either:
84 * - The key is already there (-EEXIST)
85 * - We successfully add the key at the head of a table bucket.
86 */
87 int ht_add(struct rcu_ht *ht, void *key, void *data)
88 {
89 struct rcu_ht_node *node, *old_head, *new_head;
90 unsigned long hash;
91 int ret = 0;
92
93 new_head = calloc(1, sizeof(struct rcu_ht_node));
94 new_head->key = key;
95 new_head->data = data;
96 /* here comes the fun and tricky part.
97 * Add at the beginning with a cmpxchg.
98 * Hold a read lock between the moment the first element is read
99 * and the nodes traversal (to find duplicates). This ensures
100 * the head pointer has not been reclaimed when cmpxchg is done.
101 * Always adding at the head ensures that we would have to
102 * re-try if a new item has been added concurrently. So we ensure that
103 * we never add duplicates. */
104 retry:
105 rcu_read_lock();
106
107 hash = ht->hash_fct(key, ht->keylen, ht->hashseed) % ht->size.add;
108
109 old_head = node = rcu_dereference(ht->tbl[hash]);
110 for (;;) {
111 if (likely(!node)) {
112 break;
113 }
114 if (node->key == key) {
115 ret = -EEXIST;
116 goto end;
117 }
118 node = rcu_dereference(node->next);
119 }
120 new_head->next = old_head;
121 if (rcu_cmpxchg_pointer(&ht->tbl[hash], old_head, new_head) != old_head)
122 goto restart;
123 end:
124 rcu_read_unlock();
125
126 return ret;
127
128 /* restart loop, release and re-take the read lock to be kind to GP */
129 restart:
130 rcu_read_unlock();
131 goto retry;
132 }
133
134 /*
135 * Restart until we successfully remove the entry, or no entry is left
136 * ((void *)(unsigned long)-ENOENT).
137 * Deal with concurrent stealers by verifying that there are no element
138 * in the list still pointing to the element stolen. (del_node)
139 */
140 void *ht_steal(struct rcu_ht *ht, void *key)
141 {
142 struct rcu_ht_node **prev, *node, *del_node = NULL;
143 unsigned long hash;
144 void *data;
145
146 retry:
147 rcu_read_lock();
148
149 hash = ht->hash_fct(key, ht->keylen, ht->hashseed) % ht->size.lookup;
150
151 prev = &ht->tbl[hash];
152 node = rcu_dereference(*prev);
153 for (;;) {
154 if (likely(!node)) {
155 if (del_node) {
156 goto end;
157 } else {
158 data = (void *)(unsigned long)-ENOENT;
159 goto error;
160 }
161 }
162 if (node->key == key) {
163 break;
164 }
165 prev = &node->next;
166 node = rcu_dereference(*prev);
167 }
168 /* Found it ! pointer to object is in "prev" */
169 if (rcu_cmpxchg_pointer(prev, node, node->next) != node)
170 del_node = node;
171 goto restart;
172
173 end:
174 /*
175 * From that point, we own node. Note that there can still be concurrent
176 * RCU readers using it. We can free it outside of read lock after a GP.
177 */
178 rcu_read_unlock();
179
180 data = node->data;
181 call_rcu(free, node);
182 return data;
183
184 error:
185 rcu_read_unlock();
186 return data;
187
188 /* restart loop, release and re-take the read lock to be kind to GP */
189 restart:
190 rcu_read_unlock();
191 goto retry;
192 }
193
194 int ht_delete(struct rcu_ht *ht, void *key)
195 {
196 void *data;
197
198 data = ht_steal(ht, key);
199 if (data && data != (void *)(unsigned long)-ENOENT) {
200 if (ht->free_fct)
201 call_rcu(ht->free_fct, data);
202 return 0;
203 } else {
204 return -ENOENT;
205 }
206 }
207
208 /* Delete all old elements. Allow concurrent writer accesses. */
209 int ht_delete_all(struct rcu_ht *ht)
210 {
211 unsigned long i;
212 struct rcu_ht_node **prev, *node, *inext;
213 int cnt = 0;
214
215 for (i = 0; i < ht->size.lookup; i++) {
216 rcu_read_lock();
217 prev = &ht->tbl[i];
218 /*
219 * Cut the head. After that, we own the first element.
220 */
221 node = rcu_xchg_pointer(prev, NULL);
222 if (!node) {
223 rcu_read_unlock();
224 continue;
225 }
226 /*
227 * We manage a list shared with concurrent writers and readers.
228 * Note that a concurrent add may or may not be deleted by us,
229 * depending if it arrives before or after the head is cut.
230 * "node" points to our first node. Remove first elements
231 * iteratively.
232 */
233 for (;;) {
234 inext = NULL;
235 prev = &node->next;
236 if (prev)
237 inext = rcu_xchg_pointer(prev, NULL);
238 /*
239 * "node" is the first element of the list we have cut.
240 * We therefore own it, no concurrent writer may delete
241 * it. There can only be concurrent lookups. Concurrent
242 * add can only be done on a bucket head, but we've cut
243 * it already. inext is also owned by us, because we
244 * have exchanged it for "NULL". It will therefore be
245 * safe to use it after a G.P.
246 */
247 rcu_read_unlock();
248 if (node->data)
249 call_rcu(ht->free_fct, node->data);
250 call_rcu(free, node);
251 cnt++;
252 if (likely(!inext))
253 break;
254 rcu_read_lock();
255 node = inext;
256 }
257 }
258 return cnt;
259 }
260
261 /*
262 * Should only be called when no more concurrent readers nor writers can
263 * possibly access the table.
264 */
265 int ht_destroy(struct rcu_ht *ht)
266 {
267 int ret;
268
269 ret = ht_delete_all(ht);
270 free(ht->tbl);
271 free(ht);
272 return ret;
273 }
274
275 /*
276 * Expects keys <= than pointer size to be encoded in the pointer itself.
277 */
278 uint32_t ht_jhash(void *key, uint32_t length, uint32_t initval)
279 {
280 uint32_t ret;
281 void *vkey;
282
283 if (length <= sizeof(void *))
284 vkey = &key;
285 else
286 vkey = key;
287 ret = jhash(vkey, length, initval);
288 return ret;
289 }
This page took 0.034471 seconds and 5 git commands to generate.