call_rcu threads should clear their PAUSED flag when they unpause
[urcu.git] / urcu / uatomic / x86.h
1 #ifndef _URCU_ARCH_UATOMIC_X86_H
2 #define _URCU_ARCH_UATOMIC_X86_H
3
4 /*
5 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
6 * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
7 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
8 * Copyright (c) 2009 Mathieu Desnoyers
9 *
10 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
11 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
12 *
13 * Permission is hereby granted to use or copy this program
14 * for any purpose, provided the above notices are retained on all copies.
15 * Permission to modify the code and to distribute modified code is granted,
16 * provided the above notices are retained, and a notice that the code was
17 * modified is included with the above copyright notice.
18 *
19 * Code inspired from libuatomic_ops-1.2, inherited in part from the
20 * Boehm-Demers-Weiser conservative garbage collector.
21 */
22
23 #include <urcu/compiler.h>
24 #include <urcu/system.h>
25
26 #define UATOMIC_HAS_ATOMIC_BYTE
27 #define UATOMIC_HAS_ATOMIC_SHORT
28
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32
33 /*
34 * Derived from AO_compare_and_swap() and AO_test_and_set_full().
35 */
36
37 struct __uatomic_dummy {
38 unsigned long v[10];
39 };
40 #define __hp(x) ((struct __uatomic_dummy *)(x))
41
42 #define _uatomic_set(addr, v) ((void) CMM_STORE_SHARED(*(addr), (v)))
43
44 /* cmpxchg */
45
46 static inline __attribute__((always_inline))
47 unsigned long __uatomic_cmpxchg(void *addr, unsigned long old,
48 unsigned long _new, int len)
49 {
50 switch (len) {
51 case 1:
52 {
53 unsigned char result = old;
54
55 __asm__ __volatile__(
56 "lock; cmpxchgb %2, %1"
57 : "+a"(result), "+m"(*__hp(addr))
58 : "q"((unsigned char)_new)
59 : "memory");
60 return result;
61 }
62 case 2:
63 {
64 unsigned short result = old;
65
66 __asm__ __volatile__(
67 "lock; cmpxchgw %2, %1"
68 : "+a"(result), "+m"(*__hp(addr))
69 : "r"((unsigned short)_new)
70 : "memory");
71 return result;
72 }
73 case 4:
74 {
75 unsigned int result = old;
76
77 __asm__ __volatile__(
78 "lock; cmpxchgl %2, %1"
79 : "+a"(result), "+m"(*__hp(addr))
80 : "r"((unsigned int)_new)
81 : "memory");
82 return result;
83 }
84 #if (CAA_BITS_PER_LONG == 64)
85 case 8:
86 {
87 unsigned long result = old;
88
89 __asm__ __volatile__(
90 "lock; cmpxchgq %2, %1"
91 : "+a"(result), "+m"(*__hp(addr))
92 : "r"((unsigned long)_new)
93 : "memory");
94 return result;
95 }
96 #endif
97 }
98 /*
99 * generate an illegal instruction. Cannot catch this with
100 * linker tricks when optimizations are disabled.
101 */
102 __asm__ __volatile__("ud2");
103 return 0;
104 }
105
106 #define _uatomic_cmpxchg(addr, old, _new) \
107 ((__typeof__(*(addr))) __uatomic_cmpxchg((addr), \
108 caa_cast_long_keep_sign(old), \
109 caa_cast_long_keep_sign(_new),\
110 sizeof(*(addr))))
111
112 /* xchg */
113
114 static inline __attribute__((always_inline))
115 unsigned long __uatomic_exchange(void *addr, unsigned long val, int len)
116 {
117 /* Note: the "xchg" instruction does not need a "lock" prefix. */
118 switch (len) {
119 case 1:
120 {
121 unsigned char result;
122 __asm__ __volatile__(
123 "xchgb %0, %1"
124 : "=q"(result), "+m"(*__hp(addr))
125 : "0" ((unsigned char)val)
126 : "memory");
127 return result;
128 }
129 case 2:
130 {
131 unsigned short result;
132 __asm__ __volatile__(
133 "xchgw %0, %1"
134 : "=r"(result), "+m"(*__hp(addr))
135 : "0" ((unsigned short)val)
136 : "memory");
137 return result;
138 }
139 case 4:
140 {
141 unsigned int result;
142 __asm__ __volatile__(
143 "xchgl %0, %1"
144 : "=r"(result), "+m"(*__hp(addr))
145 : "0" ((unsigned int)val)
146 : "memory");
147 return result;
148 }
149 #if (CAA_BITS_PER_LONG == 64)
150 case 8:
151 {
152 unsigned long result;
153 __asm__ __volatile__(
154 "xchgq %0, %1"
155 : "=r"(result), "+m"(*__hp(addr))
156 : "0" ((unsigned long)val)
157 : "memory");
158 return result;
159 }
160 #endif
161 }
162 /*
163 * generate an illegal instruction. Cannot catch this with
164 * linker tricks when optimizations are disabled.
165 */
166 __asm__ __volatile__("ud2");
167 return 0;
168 }
169
170 #define _uatomic_xchg(addr, v) \
171 ((__typeof__(*(addr))) __uatomic_exchange((addr), \
172 caa_cast_long_keep_sign(v), \
173 sizeof(*(addr))))
174
175 /* uatomic_add_return */
176
177 static inline __attribute__((always_inline))
178 unsigned long __uatomic_add_return(void *addr, unsigned long val,
179 int len)
180 {
181 switch (len) {
182 case 1:
183 {
184 unsigned char result = val;
185
186 __asm__ __volatile__(
187 "lock; xaddb %1, %0"
188 : "+m"(*__hp(addr)), "+q" (result)
189 :
190 : "memory");
191 return result + (unsigned char)val;
192 }
193 case 2:
194 {
195 unsigned short result = val;
196
197 __asm__ __volatile__(
198 "lock; xaddw %1, %0"
199 : "+m"(*__hp(addr)), "+r" (result)
200 :
201 : "memory");
202 return result + (unsigned short)val;
203 }
204 case 4:
205 {
206 unsigned int result = val;
207
208 __asm__ __volatile__(
209 "lock; xaddl %1, %0"
210 : "+m"(*__hp(addr)), "+r" (result)
211 :
212 : "memory");
213 return result + (unsigned int)val;
214 }
215 #if (CAA_BITS_PER_LONG == 64)
216 case 8:
217 {
218 unsigned long result = val;
219
220 __asm__ __volatile__(
221 "lock; xaddq %1, %0"
222 : "+m"(*__hp(addr)), "+r" (result)
223 :
224 : "memory");
225 return result + (unsigned long)val;
226 }
227 #endif
228 }
229 /*
230 * generate an illegal instruction. Cannot catch this with
231 * linker tricks when optimizations are disabled.
232 */
233 __asm__ __volatile__("ud2");
234 return 0;
235 }
236
237 #define _uatomic_add_return(addr, v) \
238 ((__typeof__(*(addr))) __uatomic_add_return((addr), \
239 caa_cast_long_keep_sign(v), \
240 sizeof(*(addr))))
241
242 /* uatomic_and */
243
244 static inline __attribute__((always_inline))
245 void __uatomic_and(void *addr, unsigned long val, int len)
246 {
247 switch (len) {
248 case 1:
249 {
250 __asm__ __volatile__(
251 "lock; andb %1, %0"
252 : "=m"(*__hp(addr))
253 : "iq" ((unsigned char)val)
254 : "memory");
255 return;
256 }
257 case 2:
258 {
259 __asm__ __volatile__(
260 "lock; andw %1, %0"
261 : "=m"(*__hp(addr))
262 : "ir" ((unsigned short)val)
263 : "memory");
264 return;
265 }
266 case 4:
267 {
268 __asm__ __volatile__(
269 "lock; andl %1, %0"
270 : "=m"(*__hp(addr))
271 : "ir" ((unsigned int)val)
272 : "memory");
273 return;
274 }
275 #if (CAA_BITS_PER_LONG == 64)
276 case 8:
277 {
278 __asm__ __volatile__(
279 "lock; andq %1, %0"
280 : "=m"(*__hp(addr))
281 : "er" ((unsigned long)val)
282 : "memory");
283 return;
284 }
285 #endif
286 }
287 /*
288 * generate an illegal instruction. Cannot catch this with
289 * linker tricks when optimizations are disabled.
290 */
291 __asm__ __volatile__("ud2");
292 return;
293 }
294
295 #define _uatomic_and(addr, v) \
296 (__uatomic_and((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
297
298 /* uatomic_or */
299
300 static inline __attribute__((always_inline))
301 void __uatomic_or(void *addr, unsigned long val, int len)
302 {
303 switch (len) {
304 case 1:
305 {
306 __asm__ __volatile__(
307 "lock; orb %1, %0"
308 : "=m"(*__hp(addr))
309 : "iq" ((unsigned char)val)
310 : "memory");
311 return;
312 }
313 case 2:
314 {
315 __asm__ __volatile__(
316 "lock; orw %1, %0"
317 : "=m"(*__hp(addr))
318 : "ir" ((unsigned short)val)
319 : "memory");
320 return;
321 }
322 case 4:
323 {
324 __asm__ __volatile__(
325 "lock; orl %1, %0"
326 : "=m"(*__hp(addr))
327 : "ir" ((unsigned int)val)
328 : "memory");
329 return;
330 }
331 #if (CAA_BITS_PER_LONG == 64)
332 case 8:
333 {
334 __asm__ __volatile__(
335 "lock; orq %1, %0"
336 : "=m"(*__hp(addr))
337 : "er" ((unsigned long)val)
338 : "memory");
339 return;
340 }
341 #endif
342 }
343 /*
344 * generate an illegal instruction. Cannot catch this with
345 * linker tricks when optimizations are disabled.
346 */
347 __asm__ __volatile__("ud2");
348 return;
349 }
350
351 #define _uatomic_or(addr, v) \
352 (__uatomic_or((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
353
354 /* uatomic_add */
355
356 static inline __attribute__((always_inline))
357 void __uatomic_add(void *addr, unsigned long val, int len)
358 {
359 switch (len) {
360 case 1:
361 {
362 __asm__ __volatile__(
363 "lock; addb %1, %0"
364 : "=m"(*__hp(addr))
365 : "iq" ((unsigned char)val)
366 : "memory");
367 return;
368 }
369 case 2:
370 {
371 __asm__ __volatile__(
372 "lock; addw %1, %0"
373 : "=m"(*__hp(addr))
374 : "ir" ((unsigned short)val)
375 : "memory");
376 return;
377 }
378 case 4:
379 {
380 __asm__ __volatile__(
381 "lock; addl %1, %0"
382 : "=m"(*__hp(addr))
383 : "ir" ((unsigned int)val)
384 : "memory");
385 return;
386 }
387 #if (CAA_BITS_PER_LONG == 64)
388 case 8:
389 {
390 __asm__ __volatile__(
391 "lock; addq %1, %0"
392 : "=m"(*__hp(addr))
393 : "er" ((unsigned long)val)
394 : "memory");
395 return;
396 }
397 #endif
398 }
399 /*
400 * generate an illegal instruction. Cannot catch this with
401 * linker tricks when optimizations are disabled.
402 */
403 __asm__ __volatile__("ud2");
404 return;
405 }
406
407 #define _uatomic_add(addr, v) \
408 (__uatomic_add((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
409
410
411 /* uatomic_inc */
412
413 static inline __attribute__((always_inline))
414 void __uatomic_inc(void *addr, int len)
415 {
416 switch (len) {
417 case 1:
418 {
419 __asm__ __volatile__(
420 "lock; incb %0"
421 : "=m"(*__hp(addr))
422 :
423 : "memory");
424 return;
425 }
426 case 2:
427 {
428 __asm__ __volatile__(
429 "lock; incw %0"
430 : "=m"(*__hp(addr))
431 :
432 : "memory");
433 return;
434 }
435 case 4:
436 {
437 __asm__ __volatile__(
438 "lock; incl %0"
439 : "=m"(*__hp(addr))
440 :
441 : "memory");
442 return;
443 }
444 #if (CAA_BITS_PER_LONG == 64)
445 case 8:
446 {
447 __asm__ __volatile__(
448 "lock; incq %0"
449 : "=m"(*__hp(addr))
450 :
451 : "memory");
452 return;
453 }
454 #endif
455 }
456 /* generate an illegal instruction. Cannot catch this with linker tricks
457 * when optimizations are disabled. */
458 __asm__ __volatile__("ud2");
459 return;
460 }
461
462 #define _uatomic_inc(addr) (__uatomic_inc((addr), sizeof(*(addr))))
463
464 /* uatomic_dec */
465
466 static inline __attribute__((always_inline))
467 void __uatomic_dec(void *addr, int len)
468 {
469 switch (len) {
470 case 1:
471 {
472 __asm__ __volatile__(
473 "lock; decb %0"
474 : "=m"(*__hp(addr))
475 :
476 : "memory");
477 return;
478 }
479 case 2:
480 {
481 __asm__ __volatile__(
482 "lock; decw %0"
483 : "=m"(*__hp(addr))
484 :
485 : "memory");
486 return;
487 }
488 case 4:
489 {
490 __asm__ __volatile__(
491 "lock; decl %0"
492 : "=m"(*__hp(addr))
493 :
494 : "memory");
495 return;
496 }
497 #if (CAA_BITS_PER_LONG == 64)
498 case 8:
499 {
500 __asm__ __volatile__(
501 "lock; decq %0"
502 : "=m"(*__hp(addr))
503 :
504 : "memory");
505 return;
506 }
507 #endif
508 }
509 /*
510 * generate an illegal instruction. Cannot catch this with
511 * linker tricks when optimizations are disabled.
512 */
513 __asm__ __volatile__("ud2");
514 return;
515 }
516
517 #define _uatomic_dec(addr) (__uatomic_dec((addr), sizeof(*(addr))))
518
519 #if ((CAA_BITS_PER_LONG != 64) && defined(CONFIG_RCU_COMPAT_ARCH))
520 extern int __rcu_cas_avail;
521 extern int __rcu_cas_init(void);
522
523 #define UATOMIC_COMPAT(insn) \
524 ((caa_likely(__rcu_cas_avail > 0)) \
525 ? (_uatomic_##insn) \
526 : ((caa_unlikely(__rcu_cas_avail < 0) \
527 ? ((__rcu_cas_init() > 0) \
528 ? (_uatomic_##insn) \
529 : (compat_uatomic_##insn)) \
530 : (compat_uatomic_##insn))))
531
532 /*
533 * We leave the return value so we don't break the ABI, but remove the
534 * return value from the API.
535 */
536 extern unsigned long _compat_uatomic_set(void *addr,
537 unsigned long _new, int len);
538 #define compat_uatomic_set(addr, _new) \
539 ((void) _compat_uatomic_set((addr), \
540 caa_cast_long_keep_sign(_new), \
541 sizeof(*(addr))))
542
543
544 extern unsigned long _compat_uatomic_xchg(void *addr,
545 unsigned long _new, int len);
546 #define compat_uatomic_xchg(addr, _new) \
547 ((__typeof__(*(addr))) _compat_uatomic_xchg((addr), \
548 caa_cast_long_keep_sign(_new), \
549 sizeof(*(addr))))
550
551 extern unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old,
552 unsigned long _new, int len);
553 #define compat_uatomic_cmpxchg(addr, old, _new) \
554 ((__typeof__(*(addr))) _compat_uatomic_cmpxchg((addr), \
555 caa_cast_long_keep_sign(old), \
556 caa_cast_long_keep_sign(_new), \
557 sizeof(*(addr))))
558
559 extern void _compat_uatomic_and(void *addr, unsigned long _new, int len);
560 #define compat_uatomic_and(addr, v) \
561 (_compat_uatomic_and((addr), \
562 caa_cast_long_keep_sign(v), \
563 sizeof(*(addr))))
564
565 extern void _compat_uatomic_or(void *addr, unsigned long _new, int len);
566 #define compat_uatomic_or(addr, v) \
567 (_compat_uatomic_or((addr), \
568 caa_cast_long_keep_sign(v), \
569 sizeof(*(addr))))
570
571 extern unsigned long _compat_uatomic_add_return(void *addr,
572 unsigned long _new, int len);
573 #define compat_uatomic_add_return(addr, v) \
574 ((__typeof__(*(addr))) _compat_uatomic_add_return((addr), \
575 caa_cast_long_keep_sign(v), \
576 sizeof(*(addr))))
577
578 #define compat_uatomic_add(addr, v) \
579 ((void)compat_uatomic_add_return((addr), (v)))
580 #define compat_uatomic_inc(addr) \
581 (compat_uatomic_add((addr), 1))
582 #define compat_uatomic_dec(addr) \
583 (compat_uatomic_add((addr), -1))
584
585 #else
586 #define UATOMIC_COMPAT(insn) (_uatomic_##insn)
587 #endif
588
589 /* Read is atomic even in compat mode */
590 #define uatomic_set(addr, v) \
591 UATOMIC_COMPAT(set(addr, v))
592
593 #define uatomic_cmpxchg(addr, old, _new) \
594 UATOMIC_COMPAT(cmpxchg(addr, old, _new))
595 #define uatomic_xchg(addr, v) \
596 UATOMIC_COMPAT(xchg(addr, v))
597
598 #define uatomic_and(addr, v) \
599 UATOMIC_COMPAT(and(addr, v))
600 #define cmm_smp_mb__before_uatomic_and() cmm_barrier()
601 #define cmm_smp_mb__after_uatomic_and() cmm_barrier()
602
603 #define uatomic_or(addr, v) \
604 UATOMIC_COMPAT(or(addr, v))
605 #define cmm_smp_mb__before_uatomic_or() cmm_barrier()
606 #define cmm_smp_mb__after_uatomic_or() cmm_barrier()
607
608 #define uatomic_add_return(addr, v) \
609 UATOMIC_COMPAT(add_return(addr, v))
610
611 #define uatomic_add(addr, v) UATOMIC_COMPAT(add(addr, v))
612 #define cmm_smp_mb__before_uatomic_add() cmm_barrier()
613 #define cmm_smp_mb__after_uatomic_add() cmm_barrier()
614
615 #define uatomic_inc(addr) UATOMIC_COMPAT(inc(addr))
616 #define cmm_smp_mb__before_uatomic_inc() cmm_barrier()
617 #define cmm_smp_mb__after_uatomic_inc() cmm_barrier()
618
619 #define uatomic_dec(addr) UATOMIC_COMPAT(dec(addr))
620 #define cmm_smp_mb__before_uatomic_dec() cmm_barrier()
621 #define cmm_smp_mb__after_uatomic_dec() cmm_barrier()
622
623 #ifdef __cplusplus
624 }
625 #endif
626
627 #include <urcu/uatomic/generic.h>
628
629 #endif /* _URCU_ARCH_UATOMIC_X86_H */
This page took 0.041272 seconds and 4 git commands to generate.