urcu-ht: fix del node race
[urcu.git] / urcu-ht.c
... / ...
CommitLineData
1
2/*
3 * TODO: keys are currently assumed <= sizeof(void *). Key target never freed.
4 */
5
6#define _LGPL_SOURCE
7#include <stdlib.h>
8#include <urcu.h>
9#include <arch.h>
10#include <arch_atomic.h>
11#include <assert.h>
12#include <compiler.h>
13#include <urcu-defer.h>
14#include <errno.h>
15#include <urcu-ht.h>
16#include <urcu/jhash.h>
17#include <stdio.h>
18
19struct rcu_ht_node;
20
21struct rcu_ht_node {
22 struct rcu_ht_node *next;
23 void *key;
24 void *data;
25};
26
27struct rcu_ht {
28 struct rcu_ht_node **tbl;
29 ht_hash_fct hash_fct;
30 void (*free_fct)(void *data); /* fct to free data */
31 uint32_t keylen;
32 uint32_t hashseed;
33 struct ht_size {
34 unsigned long add;
35 unsigned long lookup;
36 } size;
37};
38
39struct rcu_ht *ht_new(ht_hash_fct hash_fct, void (*free_fct)(void *data),
40 unsigned long init_size, uint32_t keylen,
41 uint32_t hashseed)
42{
43 struct rcu_ht *ht;
44
45 ht = calloc(1, sizeof(struct rcu_ht));
46 ht->hash_fct = hash_fct;
47 ht->free_fct = free_fct;
48 ht->size.add = init_size;
49 ht->size.lookup = init_size;
50 ht->keylen = keylen;
51 ht->hashseed = hashseed;
52 ht->tbl = calloc(init_size, sizeof(struct rcu_ht_node *));
53 return ht;
54}
55
56void *ht_lookup(struct rcu_ht *ht, void *key)
57{
58 unsigned long hash;
59 struct rcu_ht_node *node;
60 void *ret;
61
62 hash = ht->hash_fct(key, ht->keylen, ht->hashseed) % ht->size.lookup;
63
64 rcu_read_lock();
65 node = rcu_dereference(ht->tbl[hash]);
66 for (;;) {
67 if (likely(!node)) {
68 ret = NULL;
69 break;
70 }
71 if (node->key == key) {
72 ret = node->data;
73 break;
74 }
75 node = rcu_dereference(node->next);
76 }
77 rcu_read_unlock();
78
79 return ret;
80}
81
82/*
83 * Will re-try until either:
84 * - The key is already there (-EEXIST)
85 * - We successfully add the key at the head of a table bucket.
86 */
87int ht_add(struct rcu_ht *ht, void *key, void *data)
88{
89 struct rcu_ht_node *node, *old_head, *new_head;
90 unsigned long hash;
91 int ret = 0;
92
93 new_head = calloc(1, sizeof(struct rcu_ht_node));
94 new_head->key = key;
95 new_head->data = data;
96 /* here comes the fun and tricky part.
97 * Add at the beginning with a cmpxchg.
98 * Hold a read lock between the moment the first element is read
99 * and the nodes traversal (to find duplicates). This ensures
100 * the head pointer has not been reclaimed when cmpxchg is done.
101 * Always adding at the head ensures that we would have to
102 * re-try if a new item has been added concurrently. So we ensure that
103 * we never add duplicates. */
104retry:
105 rcu_read_lock();
106
107 hash = ht->hash_fct(key, ht->keylen, ht->hashseed) % ht->size.add;
108
109 old_head = node = rcu_dereference(ht->tbl[hash]);
110 for (;;) {
111 if (likely(!node)) {
112 break;
113 }
114 if (node->key == key) {
115 ret = -EEXIST;
116 goto end;
117 }
118 node = rcu_dereference(node->next);
119 }
120 new_head->next = old_head;
121 if (rcu_cmpxchg_pointer(&ht->tbl[hash], old_head, new_head) != old_head)
122 goto restart;
123end:
124 rcu_read_unlock();
125
126 return ret;
127
128 /* restart loop, release and re-take the read lock to be kind to GP */
129restart:
130 rcu_read_unlock();
131 goto retry;
132}
133
134/*
135 * Restart until we successfully remove the entry, or no entry is left
136 * ((void *)(unsigned long)-ENOENT).
137 * Deal with concurrent stealers by verifying that there are no element
138 * in the list still pointing to the element stolen. (del_node)
139 */
140void *ht_steal(struct rcu_ht *ht, void *key)
141{
142 struct rcu_ht_node **prev, *node, *del_node = NULL;
143 unsigned long hash;
144 void *data;
145
146retry:
147 rcu_read_lock();
148
149 hash = ht->hash_fct(key, ht->keylen, ht->hashseed) % ht->size.lookup;
150
151 prev = &ht->tbl[hash];
152 node = rcu_dereference(*prev);
153 for (;;) {
154 if (likely(!node)) {
155 if (del_node) {
156 goto end;
157 } else {
158 data = (void *)(unsigned long)-ENOENT;
159 goto error;
160 }
161 }
162 if (node->key == key) {
163 break;
164 }
165 prev = &node->next;
166 node = rcu_dereference(*prev);
167 }
168 /* Found it ! pointer to object is in "prev" */
169 if (rcu_cmpxchg_pointer(prev, node, node->next) != node)
170 del_node = node;
171 goto restart;
172
173end:
174 /*
175 * From that point, we own node. Note that there can still be concurrent
176 * RCU readers using it. We can free it outside of read lock after a GP.
177 */
178 rcu_read_unlock();
179
180 data = node->data;
181 call_rcu(free, node);
182 return data;
183
184error:
185 rcu_read_unlock();
186 return data;
187
188 /* restart loop, release and re-take the read lock to be kind to GP */
189restart:
190 rcu_read_unlock();
191 goto retry;
192}
193
194int ht_delete(struct rcu_ht *ht, void *key)
195{
196 void *data;
197
198 data = ht_steal(ht, key);
199 if (data && data != (void *)(unsigned long)-ENOENT) {
200 if (ht->free_fct)
201 call_rcu(ht->free_fct, data);
202 return 0;
203 } else {
204 return -ENOENT;
205 }
206}
207
208/* Delete all old elements. Allow concurrent writer accesses. */
209int ht_delete_all(struct rcu_ht *ht)
210{
211 unsigned long i;
212 struct rcu_ht_node **prev, *node, *inext;
213 int cnt = 0;
214
215 for (i = 0; i < ht->size.lookup; i++) {
216 rcu_read_lock();
217 prev = &ht->tbl[i];
218 /*
219 * Cut the head. After that, we own the first element.
220 */
221 node = rcu_xchg_pointer(prev, NULL);
222 if (!node) {
223 rcu_read_unlock();
224 continue;
225 }
226 /*
227 * We manage a list shared with concurrent writers and readers.
228 * Note that a concurrent add may or may not be deleted by us,
229 * depending if it arrives before or after the head is cut.
230 * "node" points to our first node. Remove first elements
231 * iteratively.
232 */
233 for (;;) {
234 inext = NULL;
235 prev = &node->next;
236 if (prev)
237 inext = rcu_xchg_pointer(prev, NULL);
238 /*
239 * "node" is the first element of the list we have cut.
240 * We therefore own it, no concurrent writer may delete
241 * it. There can only be concurrent lookups. Concurrent
242 * add can only be done on a bucket head, but we've cut
243 * it already. inext is also owned by us, because we
244 * have exchanged it for "NULL". It will therefore be
245 * safe to use it after a G.P.
246 */
247 rcu_read_unlock();
248 if (node->data)
249 call_rcu(ht->free_fct, node->data);
250 call_rcu(free, node);
251 cnt++;
252 if (likely(!inext))
253 break;
254 rcu_read_lock();
255 node = inext;
256 }
257 }
258 return cnt;
259}
260
261/*
262 * Should only be called when no more concurrent readers nor writers can
263 * possibly access the table.
264 */
265int ht_destroy(struct rcu_ht *ht)
266{
267 int ret;
268
269 ret = ht_delete_all(ht);
270 free(ht->tbl);
271 free(ht);
272 return ret;
273}
274
275/*
276 * Expects keys <= than pointer size to be encoded in the pointer itself.
277 */
278uint32_t ht_jhash(void *key, uint32_t length, uint32_t initval)
279{
280 uint32_t ret;
281 void *vkey;
282
283 if (length <= sizeof(void *))
284 vkey = &key;
285 else
286 vkey = key;
287 ret = jhash(vkey, length, initval);
288 return ret;
289}
This page took 0.023529 seconds and 4 git commands to generate.