From 776b7d456bc2adbd880c1917002b4c1a4b479b34 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Wed, 6 Sep 2023 09:25:22 -0400
Subject: [PATCH 01/16] LoongArch: Document that byte and short atomics are
 implemented with LL/SC

Based on the LoongArch Reference Manual:

https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html

Section 2.2.7 "Atomic Memory Access Instructions" only lists atomic
operations for 32-bit and 64-bit integers. As detailed in Section
2.2.7.1, LL/SC instructions operating on 32-bit and 64-bit integers are
also available. Those are used by the compiler to support atomics on
byte and short types.

This means atomics on 32-bit and 64-bit types have stronger forward
progress guarantees than those operating on 8-bit and 16-bit types.

Link: https://github.com/urcu/userspace-rcu/pull/11#issuecomment-1706528796
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: I01569b718f7300a46d984c34065c0bbfbd2f7cc6
---
 include/urcu/uatomic/loongarch.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/urcu/uatomic/loongarch.h b/include/urcu/uatomic/loongarch.h
index f41302a..dcf9e93 100644
--- a/include/urcu/uatomic/loongarch.h
+++ b/include/urcu/uatomic/loongarch.h
@@ -32,6 +32,11 @@
 extern "C" {
 #endif
 
+/*
+ * LoongArch implements byte and short atomics with LL/SC instructions,
+ * which retry if the cache line is modified concurrently between LL and
+ * SC.
+ */
 #define UATOMIC_HAS_ATOMIC_BYTE
 #define UATOMIC_HAS_ATOMIC_SHORT
 
-- 
2.34.1


From dad4e6b76774924762a4eb56def7fbaee38d7653 Mon Sep 17 00:00:00 2001
From: Olivier Dion <odion@efficios.com>
Date: Thu, 21 Sep 2023 16:16:21 -0400
Subject: [PATCH 02/16] rculfhash: Only pass integral types to atomic builtins

Clang expects the pointers passed to atomic builtins to be integral. Fix
this by casting nodes address to uintptr_t *.

Change-Id: Ifb8833c493df849a542a22f0bb2baeeb85be0297
Signed-off-by: Olivier Dion <odion@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 src/rculfhash.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rculfhash.c b/src/rculfhash.c
index 09cd2b7..a453326 100644
--- a/src/rculfhash.c
+++ b/src/rculfhash.c
@@ -1153,7 +1153,7 @@ int _cds_lfht_del(struct cds_lfht *ht, unsigned long size,
 		struct cds_lfht_node *node)
 {
 	struct cds_lfht_node *bucket, *next;
-	struct cds_lfht_node **node_next;
+	uintptr_t *node_next;
 
 	if (!node)	/* Return -ENOENT if asked to delete NULL node */
 		return -ENOENT;
@@ -1185,7 +1185,7 @@ int _cds_lfht_del(struct cds_lfht *ht, unsigned long size,
 	 * NOTE: The node_next variable is present to avoid breaking
 	 * strict-aliasing rules.
 	 */
-	node_next = &node->next;
+	node_next = (uintptr_t*)&node->next;
 	uatomic_or_mo(node_next, REMOVED_FLAG, CMM_RELEASE);
 
 	/* We performed the (logical) deletion. */
@@ -1426,7 +1426,7 @@ void remove_table_partition(struct cds_lfht *ht, unsigned long i,
 	for (j = size + start; j < size + start + len; j++) {
 		struct cds_lfht_node *fini_bucket = bucket_at(ht, j);
 		struct cds_lfht_node *parent_bucket = bucket_at(ht, j - size);
-		struct cds_lfht_node **fini_bucket_next;
+		uintptr_t *fini_bucket_next;
 
 		urcu_posix_assert(j >= size && j < (size << 1));
 		dbg_printf("remove entry: order %lu index %lu hash %lu\n",
@@ -1436,7 +1436,7 @@ void remove_table_partition(struct cds_lfht *ht, unsigned long i,
 		 * NOTE: The fini_bucket_next variable is present to
 		 * avoid breaking strict-aliasing rules.
 		 */
-		fini_bucket_next = &fini_bucket->next;
+		fini_bucket_next = (uintptr_t*)&fini_bucket->next;
 		uatomic_or(fini_bucket_next, REMOVED_FLAG);
 		_cds_lfht_gc_bucket(parent_bucket, fini_bucket);
 	}
-- 
2.34.1


From 965e60edf1aad42f8981887d5fc50e29380c37e4 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?=
 <jeremie.galarneau@efficios.com>
Date: Fri, 29 Sep 2023 15:16:48 -0400
Subject: [PATCH 03/16] Fix: urcu-bp: misaligned reader accesses
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

This is a port from a fix in LTTng-UST's embedded urcu (d1a0fad8). The
original message follows:

    Running the LTTng-tools tests (test_valid_filter, for example) under
    address sanitizer results in the following warning:

      /usr/include/lttng/urcu/static/urcu-ust.h:155:6: runtime error: member access within misaligned address 0x7fc45db3a020 for type 'struct lttng_ust_urcu_reader', which requires 128 byte alignment
      0x7fc45db3a020: note: pointer points here
       c4 7f 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00
                    ^

    While the node member of lttng_ust_urcu_reader has an "aligned"
    attribute of CAA_CACHE_LINE_SIZE, the compiler can't ensure the
    alignment of members for dynamically allocated instances.

    The `data` pointer is changed from char* to struct
    lttng_ust_urcu_reader*, allowing the compiler to enforce the expected
    alignment constraints.

    Since `data` was addressed in bytes, the code using this field is
    adapted to use element counts. As the chunks are only used to allocate
    reader instances (and not other types), it makes the code a bit easier
    to read.

Signed-off-by: JÃ©rÃ©mie Galarneau <jeremie.galarneau@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: I89ea1c32ca3c5c45621b562ab68f47a8428d3574
---
 src/urcu-bp.c | 105 +++++++++++++++++++++++++-------------------------
 1 file changed, 52 insertions(+), 53 deletions(-)

diff --git a/src/urcu-bp.c b/src/urcu-bp.c
index 46397c7..38f867e 100644
--- a/src/urcu-bp.c
+++ b/src/urcu-bp.c
@@ -75,10 +75,7 @@ void *mremap_wrapper(void *old_address __attribute__((unused)),
 
 /* Sleep delay in ms */
 #define RCU_SLEEP_DELAY_MS	10
-#define INIT_NR_THREADS		8
-#define ARENA_INIT_ALLOC		\
-	sizeof(struct registry_chunk)	\
-	+ INIT_NR_THREADS * sizeof(struct urcu_bp_reader)
+#define INIT_READER_COUNT	8
 
 /*
  * Active attempts to check for reader Q.S. before calling sleep().
@@ -148,10 +145,10 @@ DEFINE_URCU_TLS(struct urcu_bp_reader *, urcu_bp_reader);
 static CDS_LIST_HEAD(registry);
 
 struct registry_chunk {
-	size_t data_len;		/* data length */
-	size_t used;			/* amount of data used */
+	size_t capacity;		/* capacity of this chunk (in elements) */
+	size_t used;			/* count of elements used */
 	struct cds_list_head node;	/* chunk_list node */
-	char data[];
+	struct urcu_bp_reader readers[];
 };
 
 struct registry_arena {
@@ -201,6 +198,13 @@ static void smp_mb_master(void)
 	}
 }
 
+/* Get the size of a chunk's allocation from its capacity (an element count). */
+static size_t chunk_allocation_size(size_t capacity)
+{
+	return (capacity * sizeof(struct urcu_bp_reader)) +
+		sizeof(struct registry_chunk);
+}
+
 /*
  * Always called with rcu_registry lock held. Releases this lock between
  * iterations and grabs it again. Holds the lock when it returns.
@@ -375,24 +379,20 @@ static
 void expand_arena(struct registry_arena *arena)
 {
 	struct registry_chunk *new_chunk, *last_chunk;
-	size_t old_chunk_len, new_chunk_len;
+	size_t old_chunk_size_bytes, new_chunk_size_bytes, new_capacity;
 
 	/* No chunk. */
 	if (cds_list_empty(&arena->chunk_list)) {
-		urcu_posix_assert(ARENA_INIT_ALLOC >=
-			sizeof(struct registry_chunk)
-			+ sizeof(struct rcu_reader));
-		new_chunk_len = ARENA_INIT_ALLOC;
+		new_chunk_size_bytes = chunk_allocation_size(INIT_READER_COUNT);
 		new_chunk = (struct registry_chunk *) mmap(NULL,
-			new_chunk_len,
+			new_chunk_size_bytes,
 			PROT_READ | PROT_WRITE,
 			MAP_ANONYMOUS | MAP_PRIVATE,
 			-1, 0);
 		if (new_chunk == MAP_FAILED)
 			abort();
-		memset(new_chunk, 0, new_chunk_len);
-		new_chunk->data_len =
-			new_chunk_len - sizeof(struct registry_chunk);
+		memset(new_chunk, 0, new_chunk_size_bytes);
+		new_chunk->capacity = INIT_READER_COUNT;
 		cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
 		return;		/* We're done. */
 	}
@@ -400,34 +400,32 @@ void expand_arena(struct registry_arena *arena)
 	/* Try expanding last chunk. */
 	last_chunk = cds_list_entry(arena->chunk_list.prev,
 		struct registry_chunk, node);
-	old_chunk_len =
-		last_chunk->data_len + sizeof(struct registry_chunk);
-	new_chunk_len = old_chunk_len << 1;
+	old_chunk_size_bytes = chunk_allocation_size(last_chunk->capacity);
+	new_capacity = last_chunk->capacity << 1;
+	new_chunk_size_bytes = chunk_allocation_size(new_capacity);
 
 	/* Don't allow memory mapping to move, just expand. */
-	new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
-		new_chunk_len, 0);
+	new_chunk = mremap_wrapper(last_chunk, old_chunk_size_bytes,
+		new_chunk_size_bytes, 0);
 	if (new_chunk != MAP_FAILED) {
 		/* Should not have moved. */
-		urcu_posix_assert(new_chunk == last_chunk);
-		memset((char *) last_chunk + old_chunk_len, 0,
-			new_chunk_len - old_chunk_len);
-		last_chunk->data_len =
-			new_chunk_len - sizeof(struct registry_chunk);
+		assert(new_chunk == last_chunk);
+		memset((char *) last_chunk + old_chunk_size_bytes, 0,
+			new_chunk_size_bytes - old_chunk_size_bytes);
+		last_chunk->capacity = new_capacity;
 		return;		/* We're done. */
 	}
 
 	/* Remap did not succeed, we need to add a new chunk. */
 	new_chunk = (struct registry_chunk *) mmap(NULL,
-		new_chunk_len,
+		new_chunk_size_bytes,
 		PROT_READ | PROT_WRITE,
 		MAP_ANONYMOUS | MAP_PRIVATE,
 		-1, 0);
 	if (new_chunk == MAP_FAILED)
 		abort();
-	memset(new_chunk, 0, new_chunk_len);
-	new_chunk->data_len =
-		new_chunk_len - sizeof(struct registry_chunk);
+	memset(new_chunk, 0, new_chunk_size_bytes);
+	new_chunk->capacity = new_capacity;
 	cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
 }
 
@@ -435,22 +433,23 @@ static
 struct rcu_reader *arena_alloc(struct registry_arena *arena)
 {
 	struct registry_chunk *chunk;
-	struct rcu_reader *rcu_reader_reg;
 	int expand_done = 0;	/* Only allow to expand once per alloc */
-	size_t len = sizeof(struct rcu_reader);
 
 retry:
 	cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
-		if (chunk->data_len - chunk->used < len)
+		size_t spot_idx;
+
+		/* Skip fully used chunks. */
+		if (chunk->used == chunk->capacity) {
 			continue;
-		/* Find spot */
-		for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
-				rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
-				rcu_reader_reg++) {
-			if (!rcu_reader_reg->alloc) {
-				rcu_reader_reg->alloc = 1;
-				chunk->used += len;
-				return rcu_reader_reg;
+		}
+
+		/* Find a spot. */
+		for (spot_idx = 0; spot_idx < chunk->capacity; spot_idx++) {
+			if (!chunk->readers[spot_idx].alloc) {
+				chunk->readers[spot_idx].alloc = 1;
+				chunk->used++;
+				return &chunk->readers[spot_idx];
 			}
 		}
 	}
@@ -498,7 +497,7 @@ void cleanup_thread(struct registry_chunk *chunk,
 	cds_list_del(&rcu_reader_reg->node);
 	rcu_reader_reg->tid = 0;
 	rcu_reader_reg->alloc = 0;
-	chunk->used -= sizeof(struct rcu_reader);
+	chunk->used--;
 }
 
 static
@@ -507,9 +506,9 @@ struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg)
 	struct registry_chunk *chunk;
 
 	cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
-		if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0])
+		if (rcu_reader_reg < (struct urcu_bp_reader *) &chunk->readers[0])
 			continue;
-		if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len])
+		if (rcu_reader_reg >= (struct urcu_bp_reader *) &chunk->readers[chunk->capacity])
 			continue;
 		return chunk;
 	}
@@ -658,8 +657,7 @@ void urcu_bp_exit(void)
 
 		cds_list_for_each_entry_safe(chunk, tmp,
 				&registry_arena.chunk_list, node) {
-			munmap((void *) chunk, chunk->data_len
-					+ sizeof(struct registry_chunk));
+			munmap((void *) chunk, chunk_allocation_size(chunk->capacity));
 		}
 		CDS_INIT_LIST_HEAD(&registry_arena.chunk_list);
 		ret = pthread_key_delete(urcu_bp_key);
@@ -716,17 +714,18 @@ static
 void urcu_bp_prune_registry(void)
 {
 	struct registry_chunk *chunk;
-	struct urcu_bp_reader *rcu_reader_reg;
 
 	cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
-		for (rcu_reader_reg = (struct urcu_bp_reader *) &chunk->data[0];
-				rcu_reader_reg < (struct urcu_bp_reader *) &chunk->data[chunk->data_len];
-				rcu_reader_reg++) {
-			if (!rcu_reader_reg->alloc)
+		size_t spot_idx;
+
+		for (spot_idx = 0; spot_idx < chunk->capacity; spot_idx++) {
+			struct urcu_bp_reader *reader = &chunk->readers[spot_idx];
+
+			if (!reader->alloc)
 				continue;
-			if (rcu_reader_reg->tid == pthread_self())
+			if (reader->tid == pthread_self())
 				continue;
-			cleanup_thread(chunk, rcu_reader_reg);
+			cleanup_thread(chunk, reader);
 		}
 	}
 }
-- 
2.34.1


From 46980605309e922d14f646c6705d184cb674c0f7 Mon Sep 17 00:00:00 2001
From: Olivier Dion <odion@efficios.com>
Date: Thu, 28 Sep 2023 12:53:46 -0400
Subject: [PATCH 04/16] urcu/uatomic/riscv: Mark RISC-V as broken

Implementations of some atomic operations of GCC for RISC-V are
insufficient for sequential consistency. For this reason Userspace RCU
is currently marked as `broken' for RISC-V with GCC. However, it is
still possible to use other toolchains.

See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104831 for details.

For now, we mark every version of GCC as unsupported. Distribution
package maintainers will have to cherry-pick the relevant patches in GCC
then remove the #error in Userspace RCU if they want to support it.

As for us, we will incrementally add specific versions of GCC that have
fixed the issue whenever new stable releases are made from the GCC
project.

Change-Id: I2cd7c8f12068628b845a096e03f5f8100eacbe43
Signed-off-by: Olivier Dion <odion@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 include/urcu/uatomic/riscv.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/urcu/uatomic/riscv.h b/include/urcu/uatomic/riscv.h
index 9313190..a70ea25 100644
--- a/include/urcu/uatomic/riscv.h
+++ b/include/urcu/uatomic/riscv.h
@@ -3,9 +3,28 @@
 // SPDX-License-Identifier: MIT
 
 /*
- * Atomic exchange operations for the RISC-V architecture. Let GCC do it.
+ * Atomic exchange operations for the RISC-V architecture.
+ *
+ * Let the compiler do it.
  */
 
+/*
+ * See <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104831> for details.
+ *
+ * Until the following patches are backported, Userspace RCU is broken for the
+ * RISC-V architecture when compiled with GCC.
+ *
+ *  - <https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4990cf84c460f064d6281d0813f20b0ef20c7448>
+ *  - <https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4990cf84c460f064d6281d0813f20b0ef20c7448>
+ *  - <https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d199d2e56da2379004e7e0457150409c0c99d3e6>
+ */
+#if defined(__GNUC__)
+#  error "Implementations of some atomic operations of GCC for RISC-V \
+          are insufficient for sequential consistency. For this reason \
+          Userspace RCU is currently marked as 'broken' for RISC-V with \
+          GCC. However, it is still possible to use other toolchains."
+#endif
+
 #ifndef _URCU_ARCH_UATOMIC_RISCV_H
 #define _URCU_ARCH_UATOMIC_RISCV_H
 
-- 
2.34.1


From 1c64a40d8b2a4e65ae002ea0f15ab42f556b0987 Mon Sep 17 00:00:00 2001
From: Michael Jeanson <mjeanson@efficios.com>
Date: Tue, 3 Oct 2023 14:59:08 -0400
Subject: [PATCH 05/16] fix: add missing SPDX licensing tags

Change-Id: If7016a3c83211e88c102f8b395dc290859af4789
Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 include/urcu/annotate.h                 | 25 ++++++-------------
 include/urcu/arch/loongarch.h           | 20 ++++------------
 include/urcu/uatomic/builtins-generic.h | 20 ++++------------
 include/urcu/uatomic/builtins.h         | 20 ++++------------
 include/urcu/uatomic/loongarch.h        | 24 ++++---------------
 m4/ae_cc_atomic_builtins.m4             | 32 +++----------------------
 tests/benchmark/common-states.c         |  4 ++++
 tests/benchmark/common-states.h         |  4 ++++
 tests/unit/test_lfstack.c               | 20 ++++------------
 tests/unit/test_wfcqueue.c              | 20 ++++------------
 tests/unit/test_wfqueue.c               | 20 ++++------------
 tests/unit/test_wfstack.c               | 20 ++++------------
 12 files changed, 50 insertions(+), 179 deletions(-)

diff --git a/include/urcu/annotate.h b/include/urcu/annotate.h
index 37e7f03..f0955a0 100644
--- a/include/urcu/annotate.h
+++ b/include/urcu/annotate.h
@@ -1,23 +1,14 @@
+// SPDX-FileCopyrightText: 2023 Olivier Dion <odion@efficios.com>
+//
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
+#ifndef _URCU_ANNOTATE_H
+#define _URCU_ANNOTATE_H
+
 /*
  * urcu/annotate.h
  *
  * Userspace RCU - annotation header.
- *
- * Copyright 2023 - Olivier Dion <odion@efficios.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /*
@@ -28,8 +19,6 @@
  *
  * You have been warned.
  */
-#ifndef _URCU_ANNOTATE_H
-#define _URCU_ANNOTATE_H
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/include/urcu/arch/loongarch.h b/include/urcu/arch/loongarch.h
index a6d9fee..7e82d9a 100644
--- a/include/urcu/arch/loongarch.h
+++ b/include/urcu/arch/loongarch.h
@@ -1,24 +1,12 @@
+// SPDX-FileCopyrightText: 2021 Wang Jing <wangjing@loongson.cn>
+//
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
 #ifndef _URCU_ARCH_LOONGARCH_H
 #define _URCU_ARCH_LOONGARCH_H
 
 /*
  * arch/loongarch.h: trivial definitions for the LoongArch architecture.
- *
- * Copyright (c) 2021 Wang Jing <wangjing@loongson.cn>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <urcu/compiler.h>
diff --git a/include/urcu/uatomic/builtins-generic.h b/include/urcu/uatomic/builtins-generic.h
index a641bc9..15c7f07 100644
--- a/include/urcu/uatomic/builtins-generic.h
+++ b/include/urcu/uatomic/builtins-generic.h
@@ -1,21 +1,9 @@
+// SPDX-FileCopyrightText: 2023 Olivier Dion <odion@efficios.com>
+//
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
 /*
  * urcu/uatomic/builtins-generic.h
- *
- * Copyright (c) 2023 Olivier Dion <odion@efficios.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef _URCU_UATOMIC_BUILTINS_GENERIC_H
diff --git a/include/urcu/uatomic/builtins.h b/include/urcu/uatomic/builtins.h
index 82e98f8..5b92700 100644
--- a/include/urcu/uatomic/builtins.h
+++ b/include/urcu/uatomic/builtins.h
@@ -1,21 +1,9 @@
+// SPDX-FileCopyrightText: 2023 Olivier Dion <odion@efficios.com>
+//
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
 /*
  * urcu/uatomic/builtins.h
- *
- * Copyright (c) 2023 Olivier Dion <odion@efficios.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef _URCU_UATOMIC_BUILTINS_H
diff --git a/include/urcu/uatomic/loongarch.h b/include/urcu/uatomic/loongarch.h
index dcf9e93..3454d41 100644
--- a/include/urcu/uatomic/loongarch.h
+++ b/include/urcu/uatomic/loongarch.h
@@ -1,28 +1,12 @@
+// SPDX-FileCopyrightText: 2021 Wang Jing <wangjing@loongson.cn>
+//
+// SPDX-License-Identifier: MIT
+
 #ifndef _URCU_UATOMIC_ARCH_LOONGARCH_H
 #define _URCU_UATOMIC_ARCH_LOONGARCH_H
 
 /*
  * Atomic exchange operations for the LoongArch architecture. Let GCC do it.
- *
- * Copyright (c) 2021 Wang Jing <wangjing@loongson.cn>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
  */
 
 #include <urcu/compiler.h>
diff --git a/m4/ae_cc_atomic_builtins.m4 b/m4/ae_cc_atomic_builtins.m4
index 2efbde3..0bdb8d0 100644
--- a/m4/ae_cc_atomic_builtins.m4
+++ b/m4/ae_cc_atomic_builtins.m4
@@ -1,36 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-or-later WITH LicenseRef-Autoconf-exception-macro
+# SPDX-FileCopyrightText: 2023 Michael Jeanson <mjeanson@efficios.com>
+#
 # SYNOPSIS
 #
 #   AE_CC_ATOMIC_BUILTINS([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
 #
-# LICENSE
-#
-#   Copyright (c) 2023 Michael Jeanson <mjeanson@efficios.com>
-#
-#   This program is free software; you can redistribute it and/or modify it
-#   under the terms of the GNU General Public License as published by the
-#   Free Software Foundation; either version 2 of the License, or (at your
-#   option) any later version.
-#
-#   This program is distributed in the hope that it will be useful, but
-#   WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-#   Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License along
-#   with this program. If not, see <https://www.gnu.org/licenses/>.
-#
-#   As a special exception, the respective Autoconf Macro's copyright owner
-#   gives unlimited permission to copy, distribute and modify the configure
-#   scripts that are the output of Autoconf when processing the Macro. You
-#   need not follow the terms of the GNU General Public License when using
-#   or distributing such scripts, even though portions of the text of the
-#   Macro appear in them. The GNU General Public License (GPL) does govern
-#   all other use of the material that constitutes the Autoconf Macro.
-#
-#   This special exception to the GPL applies to versions of the Autoconf
-#   Macro released by the Autoconf Archive. When you make and distribute a
-#   modified version of the Autoconf Macro, you may extend this special
-#   exception to the GPL to apply to your modified version as well.
 
 #serial 1
 
diff --git a/tests/benchmark/common-states.c b/tests/benchmark/common-states.c
index 6e70351..4a7f719 100644
--- a/tests/benchmark/common-states.c
+++ b/tests/benchmark/common-states.c
@@ -1 +1,5 @@
+// SPDX-FileCopyrightText: 2023 Olivier Dion <odion@efficios.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
 volatile int _test_go = 0, _test_stop = 0;
diff --git a/tests/benchmark/common-states.h b/tests/benchmark/common-states.h
index dfbbfe5..d46fd38 100644
--- a/tests/benchmark/common-states.h
+++ b/tests/benchmark/common-states.h
@@ -1,3 +1,7 @@
+// SPDX-FileCopyrightText: 2023 Olivier Dion <odion@efficios.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
 /* Common states for benchmarks. */
 
 #include <unistd.h>
diff --git a/tests/unit/test_lfstack.c b/tests/unit/test_lfstack.c
index a1f99f0..03d1632 100644
--- a/tests/unit/test_lfstack.c
+++ b/tests/unit/test_lfstack.c
@@ -1,23 +1,11 @@
+// SPDX-FileCopyrightText: 2023 Olivier Dion <odion@efficios.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
 /*
  * test_lfstack.c
  *
  * Userspace RCU library - test wftack race conditions
- *
- * Copyright 2023 - Olivier Dion <odion@efficios.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #define _LGPL_SOURCE
diff --git a/tests/unit/test_wfcqueue.c b/tests/unit/test_wfcqueue.c
index 338aa07..45c4988 100644
--- a/tests/unit/test_wfcqueue.c
+++ b/tests/unit/test_wfcqueue.c
@@ -1,23 +1,11 @@
+// SPDX-FileCopyrightText: 2023 Olivier Dion <odion@efficios.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
 /*
  * test_wfcqueue.c
  *
  * Userspace RCU library - test wfcqueue race conditions
- *
- * Copyright 2023 - Olivier Dion <odion@efficios.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #define _LGPL_SOURCE
diff --git a/tests/unit/test_wfqueue.c b/tests/unit/test_wfqueue.c
index 57afaba..944c49a 100644
--- a/tests/unit/test_wfqueue.c
+++ b/tests/unit/test_wfqueue.c
@@ -1,23 +1,11 @@
+// SPDX-FileCopyrightText: 2023 Olivier Dion <odion@efficios.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
 /*
  * test_wfqueue.c
  *
  * Userspace RCU library - test wfqueue race conditions
- *
- * Copyright 2023 - Olivier Dion <odion@efficios.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #define _LGPL_SOURCE
diff --git a/tests/unit/test_wfstack.c b/tests/unit/test_wfstack.c
index 578ae92..17901ce 100644
--- a/tests/unit/test_wfstack.c
+++ b/tests/unit/test_wfstack.c
@@ -1,23 +1,11 @@
+// SPDX-FileCopyrightText: 2023 Olivier Dion <odion@efficios.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
 /*
  * test_wfstack.c
  *
  * Userspace RCU library - test wftack race conditions
- *
- * Copyright 2023 - Olivier Dion <odion@efficios.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #define _LGPL_SOURCE
-- 
2.34.1


From 008a71ef213f23c8d403e086563155121b286c0c Mon Sep 17 00:00:00 2001
From: Michael Jeanson <mjeanson@efficios.com>
Date: Wed, 27 Jul 2022 11:28:58 -0400
Subject: [PATCH 06/16] cleanup: use an enum for the error states of
 nr_cpus_mask

Using an enum with labels for error states instead of literal values
will make the code easier to read and understand.

Change-Id: I4558e17ccb45ab40515bb516af840b2852ee8fc3
Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 src/rculfhash.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/rculfhash.c b/src/rculfhash.c
index a453326..02c7f0f 100644
--- a/src/rculfhash.c
+++ b/src/rculfhash.c
@@ -347,6 +347,11 @@ struct partition_resize_work {
 		    unsigned long start, unsigned long len);
 };
 
+enum nr_cpus_mask_state {
+	NR_CPUS_MASK_INIT_FAILED = -2,
+	NR_CPUS_MASK_UNINITIALIZED = -1,
+};
+
 static struct urcu_workqueue *cds_lfht_workqueue;
 
 /*
@@ -624,7 +629,7 @@ static void mutex_unlock(pthread_mutex_t *mutex)
 		urcu_die(ret);
 }
 
-static long nr_cpus_mask = -1;
+static long nr_cpus_mask = NR_CPUS_MASK_UNINITIALIZED;
 static long split_count_mask = -1;
 static int split_count_order = -1;
 
@@ -634,7 +639,7 @@ static void ht_init_nr_cpus_mask(void)
 
 	maxcpus = get_possible_cpus_array_len();
 	if (maxcpus <= 0) {
-		nr_cpus_mask = -2;
+		nr_cpus_mask = NR_CPUS_MASK_INIT_FAILED;
 		return;
 	}
 	/*
@@ -648,7 +653,7 @@ static void ht_init_nr_cpus_mask(void)
 static
 void alloc_split_items_count(struct cds_lfht *ht)
 {
-	if (nr_cpus_mask == -1)	{
+	if (nr_cpus_mask == NR_CPUS_MASK_UNINITIALIZED)	{
 		ht_init_nr_cpus_mask();
 		if (nr_cpus_mask < 0)
 			split_count_mask = DEFAULT_SPLIT_COUNT_MASK;
@@ -1241,7 +1246,7 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
 	unsigned long thread, nr_threads;
 	sigset_t newmask, oldmask;
 
-	urcu_posix_assert(nr_cpus_mask != -1);
+	urcu_posix_assert(nr_cpus_mask != NR_CPUS_MASK_UNINITIALIZED);
 	if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD)
 		goto fallback;
 
-- 
2.34.1


From 81270292c23ff28aba1abd9a65f0624b657de82b Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Sun, 5 Nov 2023 22:27:17 +0000
Subject: [PATCH 07/16] Fix -Walloc-size
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

GCC 14 introduces a new -Walloc-size included in -Wextra which gives:
```
urcu-call-rcu-impl.h:912:20: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion' with size '16' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
urcu-call-rcu-impl.h:927:22: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion_work' with size '24' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
urcu-call-rcu-impl.h:912:20: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion' with size '16' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
urcu-call-rcu-impl.h:927:22: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion_work' with size '24' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
urcu-call-rcu-impl.h:912:20: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion' with size '16' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
urcu-call-rcu-impl.h:927:22: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion_work' with size '24' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
urcu-call-rcu-impl.h:912:20: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion' with size '16' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
urcu-call-rcu-impl.h:927:22: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion_work' with size '24' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
urcu-call-rcu-impl.h:912:20: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion' with size '16' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
urcu-call-rcu-impl.h:927:22: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion_work' with size '24' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
workqueue.c:401:20: warning: allocation of insufficient size '1' for type 'struct urcu_workqueue_completion' with size '16' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
workqueue.c:432:14: warning: allocation of insufficient size '1' for type 'struct urcu_workqueue_completion_work' with size '24' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
urcu-call-rcu-impl.h:912:20: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion' with size '16' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
urcu-call-rcu-impl.h:927:22: warning: allocation of insufficient size '1' for type 'struct call_rcu_completion_work' with size '24' [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
qsbr.c:49:14: warning: allocation of insufficient size â1â for type âstruct mynodeâ with size â40â [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
mb.c:50:14: warning: allocation of insufficient size â1â for type âstruct mynodeâ with size â40â [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
membarrier.c:50:14: warning: allocation of insufficient size â1â for type âstruct mynodeâ with size â40â [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
signal.c:49:14: warning: allocation of insufficient size â1â for type âstruct mynodeâ with size â40â [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
bp.c:49:14: warning: allocation of insufficient size â1â for type âstruct mynodeâ with size â40â [-Walloc-size[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Walloc-size]]
```

The calloc prototype is:
```
void *calloc(size_t nmemb, size_t size);
```

So, just swap the number of members and size arguments to match the prototype, as
we're initialising 1 struct of size `sizeof(struct ...)`. GCC then sees we're not
doing anything wrong.

Signed-off-by: Sam James <sam@gentoo.org>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: Id84ce5cf9a1b97bfa942597aa188ef6e27e7c10d
---
 doc/examples/urcu-flavors/bp.c         | 2 +-
 doc/examples/urcu-flavors/mb.c         | 2 +-
 doc/examples/urcu-flavors/membarrier.c | 2 +-
 doc/examples/urcu-flavors/qsbr.c       | 2 +-
 src/urcu-call-rcu-impl.h               | 4 ++--
 src/workqueue.c                        | 4 ++--
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/doc/examples/urcu-flavors/bp.c b/doc/examples/urcu-flavors/bp.c
index 0bbc303..d423733 100644
--- a/doc/examples/urcu-flavors/bp.c
+++ b/doc/examples/urcu-flavors/bp.c
@@ -32,7 +32,7 @@ int add_node(uint64_t v)
 {
 	struct mynode *node;
 
-	node = calloc(sizeof(*node), 1);
+	node = calloc(1, sizeof(*node));
 	if (!node)
 		return -1;
 	node->value = v;
diff --git a/doc/examples/urcu-flavors/mb.c b/doc/examples/urcu-flavors/mb.c
index 9c46f2f..7069c24 100644
--- a/doc/examples/urcu-flavors/mb.c
+++ b/doc/examples/urcu-flavors/mb.c
@@ -33,7 +33,7 @@ int add_node(uint64_t v)
 {
 	struct mynode *node;
 
-	node = calloc(sizeof(*node), 1);
+	node = calloc(1, sizeof(*node));
 	if (!node)
 		return -1;
 	node->value = v;
diff --git a/doc/examples/urcu-flavors/membarrier.c b/doc/examples/urcu-flavors/membarrier.c
index dad39cb..73b9bd8 100644
--- a/doc/examples/urcu-flavors/membarrier.c
+++ b/doc/examples/urcu-flavors/membarrier.c
@@ -33,7 +33,7 @@ int add_node(uint64_t v)
 {
 	struct mynode *node;
 
-	node = calloc(sizeof(*node), 1);
+	node = calloc(1, sizeof(*node));
 	if (!node)
 		return -1;
 	node->value = v;
diff --git a/doc/examples/urcu-flavors/qsbr.c b/doc/examples/urcu-flavors/qsbr.c
index 0b5b1a9..ca00933 100644
--- a/doc/examples/urcu-flavors/qsbr.c
+++ b/doc/examples/urcu-flavors/qsbr.c
@@ -32,7 +32,7 @@ int add_node(uint64_t v)
 {
 	struct mynode *node;
 
-	node = calloc(sizeof(*node), 1);
+	node = calloc(1, sizeof(*node));
 	if (!node)
 		return -1;
 	node->value = v;
diff --git a/src/urcu-call-rcu-impl.h b/src/urcu-call-rcu-impl.h
index 22d8570..2ea1efc 100644
--- a/src/urcu-call-rcu-impl.h
+++ b/src/urcu-call-rcu-impl.h
@@ -895,7 +895,7 @@ void rcu_barrier(void)
 		goto online;
 	}
 
-	completion = calloc(sizeof(*completion), 1);
+	completion = calloc(1, sizeof(*completion));
 	if (!completion)
 		urcu_die(errno);
 
@@ -910,7 +910,7 @@ void rcu_barrier(void)
 	cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 		struct call_rcu_completion_work *work;
 
-		work = calloc(sizeof(*work), 1);
+		work = calloc(1, sizeof(*work));
 		if (!work)
 			urcu_die(errno);
 		work->completion = completion;
diff --git a/src/workqueue.c b/src/workqueue.c
index a28d09d..10b9fde 100644
--- a/src/workqueue.c
+++ b/src/workqueue.c
@@ -384,7 +384,7 @@ struct urcu_workqueue_completion *urcu_workqueue_create_completion(void)
 {
 	struct urcu_workqueue_completion *completion;
 
-	completion = calloc(sizeof(*completion), 1);
+	completion = calloc(1, sizeof(*completion));
 	if (!completion)
 		urcu_die(errno);
 	urcu_ref_set(&completion->ref, 1);
@@ -415,7 +415,7 @@ void urcu_workqueue_queue_completion(struct urcu_workqueue *workqueue,
 {
 	struct urcu_workqueue_completion_work *work;
 
-	work = calloc(sizeof(*work), 1);
+	work = calloc(1, sizeof(*work));
 	if (!work)
 		urcu_die(errno);
 	work->completion = completion;
-- 
2.34.1


From 4e7b1f9943cbeec405ab1994315ad374071474fb Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 18 Dec 2023 10:24:13 -0500
Subject: [PATCH 08/16] Fix: allow clang to build liburcu on RISC-V

Clang also defines __GNUC__, so use URCU_GCC_VERSION to detect if built
with gcc.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: Ic32a0cf64556f55ba4aa11141816fce1afcb0e90
---
 include/urcu/uatomic/riscv.h | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/include/urcu/uatomic/riscv.h b/include/urcu/uatomic/riscv.h
index a70ea25..c1ba29e 100644
--- a/include/urcu/uatomic/riscv.h
+++ b/include/urcu/uatomic/riscv.h
@@ -8,6 +8,12 @@
  * Let the compiler do it.
  */
 
+#ifndef _URCU_ARCH_UATOMIC_RISCV_H
+#define _URCU_ARCH_UATOMIC_RISCV_H
+
+#include <urcu/compiler.h>
+#include <urcu/system.h>
+
 /*
  * See <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104831> for details.
  *
@@ -18,19 +24,10 @@
  *  - <https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4990cf84c460f064d6281d0813f20b0ef20c7448>
  *  - <https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d199d2e56da2379004e7e0457150409c0c99d3e6>
  */
-#if defined(__GNUC__)
-#  error "Implementations of some atomic operations of GCC for RISC-V \
-          are insufficient for sequential consistency. For this reason \
-          Userspace RCU is currently marked as 'broken' for RISC-V with \
-          GCC. However, it is still possible to use other toolchains."
+#ifdef URCU_GCC_VERSION
+# error "Implementations of some atomic operations of GCC for RISC-V are insufficient for sequential consistency. For this reason Userspace RCU is currently marked as 'broken' for RISC-V with GCC. However, it is still possible to use other toolchains."
 #endif
 
-#ifndef _URCU_ARCH_UATOMIC_RISCV_H
-#define _URCU_ARCH_UATOMIC_RISCV_H
-
-#include <urcu/compiler.h>
-#include <urcu/system.h>
-
 #ifdef __cplusplus
 extern "C" {
 #endif
-- 
2.34.1


From 70a1c7c2646f3cae2e9cd8fc6ff8a75a03df8c7a Mon Sep 17 00:00:00 2001
From: Kienan Stewart <kstewart@efficios.com>
Date: Fri, 24 Nov 2023 09:55:47 -0500
Subject: [PATCH 09/16] docs: Add links to project resources

Indicate that Gerrit (https://review.lttng.org) is the principal place
where patches are submitted and reviewed, rather than the mailing list.

Based on feedback received on the mailing list:
https://lists.lttng.org/pipermail/lttng-dev/2023-November/030670.html

Change-Id: I19f1459045355c57a1f9932a3b540807ba3d4a1c
Signed-off-by: Kienan Stewart <kstewart@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 README.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/README.md b/README.md
index cadf4d1..d868ac4 100644
--- a/README.md
+++ b/README.md
@@ -476,3 +476,18 @@ Contacts
 
 You can contact the maintainers on the following mailing list:
 `lttng-dev@lists.lttng.org`.
+
+IRC channel: [#lttng](irc://irc.oftc.net/lttng) on the OFTC network
+
+Bug tracker: [Userspace RCU bug tracker](https://bugs.lttng.org/projects/urcu)
+
+Code review: [_userspace-rcu_ project](https://review.lttng.org/q/project:userspace-rcu) on LTTng Review
+
+Continuous integration: [Userspace RCU](https://ci.lttng.org/view/Liburcu/) on LTTng's CI
+
+GitHub mirror: [urcu/userspace-rcu](https://github.com/urcu/userspace-rcu)
+
+Patches are principally submitted and reviewed on [LTTng Review](https://review.lttng.org),
+but may also be submitted to the [mailing list](mailto:lttng-dev@lists.lttng.org)
+with the subject prefix `PATCH urcu` or by pull request on the
+[GitHub mirror](https://github.com/urcu/userspace-rcu).
-- 
2.34.1


From 54bb03ca7635d54dee1466ef885084333e40064a Mon Sep 17 00:00:00 2001
From: Olivier Dion <odion@efficios.com>
Date: Thu, 21 Dec 2023 10:27:18 -0500
Subject: [PATCH 10/16] rcutorture: Check histogram of ages

Ensure that the histogram of ages for all threads are valid after a stress test.

Change-Id: Iadc46f47fe8835ac0e2d8967b6a16a83335a9541
Signed-off-by: Olivier Dion <odion@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 tests/regression/rcutorture.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tests/regression/rcutorture.h b/tests/regression/rcutorture.h
index 01f6456..441ff79 100644
--- a/tests/regression/rcutorture.h
+++ b/tests/regression/rcutorture.h
@@ -527,6 +527,7 @@ int stresstest(int nreaders)
 	int t;
 	long long *p;
 	long long sum;
+	int ret;
 
 	init_per_thread(n_reads_pt, 0LL);
 	for_each_thread(t) {
@@ -552,11 +553,19 @@ int stresstest(int nreaders)
 	       n_reads, n_updates, n_mberror);
 	rdiag_start();
 	rdiag("rcu_stress_count:");
+	ret = 0;
 	for (i = 0; i <= RCU_STRESS_PIPE_LEN; i++) {
 		sum = 0LL;
 		for_each_thread(t) {
 			sum += per_thread(rcu_stress_count, t)[i];
 		}
+		/*
+		 * If any entries past the first two are non-zero, RCU is
+		 * broken. See details above about rcu_stress_count.
+		 */
+		if (i > 1 && sum != 0) {
+			ret = -1;
+		}
 		rdiag(" %lld", sum);
 	}
 	rdiag_end();
@@ -564,10 +573,9 @@ int stresstest(int nreaders)
 		diag("Deallocating per-CPU call_rcu threads.");
 		free_all_cpu_call_rcu_data();
 	}
-	if (!n_mberror)
-		return 0;
-	else
-		return -1;
+	if (n_mberror)
+		ret = -1;
+	return ret;
 }
 
 /*
-- 
2.34.1


From c6fbc27960c6ae09af830e33884a40f53a879f6b Mon Sep 17 00:00:00 2001
From: Sergey Fedorov <vital.had@gmail.com>
Date: Fri, 5 Jan 2024 18:44:18 +0800
Subject: [PATCH 11/16] ppc.h: use mftb on ppc

Older versions of GNU as do not support mftbl. The issue affects Darwin
PowerPC, as well as some older versions of NetBSD and Linux. Since mftb
is equivalent and universally understood, just use that.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: I098b70fa8bb077143d2d658835586b6b059b879f
---
 include/urcu/arch/ppc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/urcu/arch/ppc.h b/include/urcu/arch/ppc.h
index 80462c6..be9d158 100644
--- a/include/urcu/arch/ppc.h
+++ b/include/urcu/arch/ppc.h
@@ -51,7 +51,7 @@ extern "C" {
 	__extension__					\
 	({ 						\
 		unsigned long rval;			\
-		__asm__ __volatile__ ("mftbl %0" : "=r" (rval));	\
+		__asm__ __volatile__ ("mftb %0" : "=r" (rval));	\
 		rval;					\
 	})
 
-- 
2.34.1


From ac7352545826996e3866c599e4a8eff05fb662ca Mon Sep 17 00:00:00 2001
From: Xenofon Foukas <fon1989@gmail.com>
Date: Thu, 15 Feb 2024 15:21:42 +0000
Subject: [PATCH 12/16] Add support for custom memory allocators for rculfhash

The current implementation of rculfhash relies on calloc()
to allocate memory for its buckets. This can in some cases
lead to latency spikes when accessing the hash table, which
can be avoided by using an optimized custom memory allocator.
However, there is currently no way of replacing the default
allocator with a custom one.

This commit allows custom allocators to be used during the
table initialization. The default behavior of the hash table
remains unaffected, by using the stdlib calloc() and free(),
if no custom allocator is given.

Signed-off-by: Xenofon Foukas <fon1989@gmail.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: Id9a405e5dc42e5564ff8623394c86056a4d1ff48
---
 include/urcu/rculfhash.h | 73 +++++++++++++++++++++++++++++++++++-
 src/rculfhash-internal.h | 11 ++++--
 src/rculfhash-mm-chunk.c | 16 ++++----
 src/rculfhash-mm-mmap.c  | 10 ++---
 src/rculfhash-mm-order.c | 16 ++++----
 src/rculfhash.c          | 80 +++++++++++++++++++++++++++++++++++-----
 6 files changed, 170 insertions(+), 36 deletions(-)

diff --git a/include/urcu/rculfhash.h b/include/urcu/rculfhash.h
index 8385cd9..8b57cd8 100644
--- a/include/urcu/rculfhash.h
+++ b/include/urcu/rculfhash.h
@@ -68,6 +68,18 @@ struct cds_lfht_iter {
 #endif
 };
 
+/*
+ * cds_lfht_alloc: Callbacks if we want to use custom memory allocator.
+ */
+struct cds_lfht_alloc {
+	void *(*malloc)(void *state, size_t size);
+	void *(*calloc)(void *state, size_t nmemb, size_t size);
+	void *(*realloc)(void *state, void *ptr, size_t size);
+	void *(*aligned_alloc)(void *state, size_t alignment, size_t size);
+	void  (*free)(void *state, void *ptr);
+	void  *state;
+};
+
 static inline
 struct cds_lfht_node *cds_lfht_iter_get_node(struct cds_lfht_iter *iter)
 {
@@ -115,7 +127,7 @@ enum {
 
 struct cds_lfht_mm_type {
 	struct cds_lfht *(*alloc_cds_lfht)(unsigned long min_nr_alloc_buckets,
-			unsigned long max_nr_buckets);
+			unsigned long max_nr_buckets, const struct cds_lfht_alloc *alloc);
 	void (*alloc_bucket_table)(struct cds_lfht *ht, unsigned long order);
 	void (*free_bucket_table)(struct cds_lfht *ht, unsigned long order);
 	struct cds_lfht_node *(*bucket_at)(struct cds_lfht *ht,
@@ -138,6 +150,19 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
 			const struct rcu_flavor_struct *flavor,
 			pthread_attr_t *attr);
 
+/*
+ * _cds_lfht_new_with_alloc - API used by cds_lfht_new_with_flavor_alloc.
+ */
+extern
+struct cds_lfht *_cds_lfht_new_with_alloc(unsigned long init_size,
+			unsigned long min_nr_alloc_buckets,
+			unsigned long max_nr_buckets,
+			int flags,
+			const struct cds_lfht_mm_type *mm,
+			const struct cds_lfht_alloc *alloc,
+			const struct rcu_flavor_struct *flavor,
+			pthread_attr_t *attr);
+
 /*
  * cds_lfht_new_flavor - allocate a hash table tied to a RCU flavor.
  * @init_size: number of buckets to allocate initially. Must be power of two.
@@ -180,6 +205,52 @@ struct cds_lfht *cds_lfht_new_flavor(unsigned long init_size,
 			flags, NULL, flavor, attr);
 }
 
+/*
+ * cds_lfht_new_with_flavor_alloc - allocate a hash table tied to a RCU flavor.
+ * @init_size: number of buckets to allocate initially. Must be power of two.
+ * @min_nr_alloc_buckets: the minimum number of allocated buckets.
+ *                        (must be power of two)
+ * @max_nr_buckets: the maximum number of hash table buckets allowed.
+ *                  (must be power of two, 0 is accepted, means
+ *                  "infinite")
+ * @flavor: flavor of liburcu to use to synchronize the hash table
+ * @alloc: Custom memory allocator for hash table memory management.
+ *         NULL for default. If a custom allocator is used, then
+ *         the whole interface of struct cds_lfht_alloc must be implemented.
+ * @flags: hash table creation flags (can be combined with bitwise or: '|').
+ *           0: no flags.
+ *           CDS_LFHT_AUTO_RESIZE: automatically resize hash table.
+ *           CDS_LFHT_ACCOUNTING: count the number of node addition
+ *                                and removal in the table
+ * @attr: optional resize worker thread attributes. NULL for default.
+ *
+ * Return NULL on error.
+ * Note: the RCU flavor must be already included before the hash table header.
+ *
+ * The programmer is responsible for ensuring that resize operation has a
+ * priority equal to hash table updater threads. It should be performed by
+ * specifying the appropriate priority in the pthread "attr" argument, and,
+ * for CDS_LFHT_AUTO_RESIZE, by ensuring that call_rcu worker threads also have
+ * this priority level. Having lower priority for call_rcu and resize threads
+ * does not pose any correctness issue, but the resize operations could be
+ * starved by updates, thus leading to long hash table bucket chains.
+ * Threads calling cds_lfht_new are NOT required to be registered RCU
+ * read-side threads. It can be called very early. (e.g. before RCU is
+ * initialized)
+ */
+static inline
+struct cds_lfht *cds_lfht_new_with_flavor_alloc(unsigned long init_size,
+			unsigned long min_nr_alloc_buckets,
+			unsigned long max_nr_buckets,
+			int flags,
+			const struct rcu_flavor_struct *flavor,
+			const struct cds_lfht_alloc *alloc,
+			pthread_attr_t *attr)
+{
+	return _cds_lfht_new_with_alloc(init_size, min_nr_alloc_buckets, max_nr_buckets,
+			flags, NULL, alloc, flavor, attr);
+}
+
 
 #ifdef URCU_API_MAP
 /*
diff --git a/src/rculfhash-internal.h b/src/rculfhash-internal.h
index 081f32e..7225ec9 100644
--- a/src/rculfhash-internal.h
+++ b/src/rculfhash-internal.h
@@ -59,6 +59,7 @@ struct cds_lfht {
 	/* Initial configuration items */
 	unsigned long max_nr_buckets;
 	const struct cds_lfht_mm_type *mm;	/* memory management plugin */
+	const struct cds_lfht_alloc *alloc;	/* memory allocator for mm */
 	const struct rcu_flavor_struct *flavor;	/* RCU flavor */
 
 	long count;			/* global approximate item count */
@@ -139,30 +140,32 @@ extern unsigned int cds_lfht_fls_ulong(unsigned long x);
 extern int cds_lfht_get_count_order_ulong(unsigned long x);
 
 #ifdef POISON_FREE
-#define poison_free(ptr)					\
+#define poison_free(alloc, ptr)					\
 	do {							\
 		if (ptr) {					\
 			memset(ptr, 0x42, sizeof(*(ptr)));	\
-			free(ptr);				\
+			alloc->free(alloc->state, ptr);				\
 		}						\
 	} while (0)
 #else
-#define poison_free(ptr)	free(ptr)
+#define poison_free(alloc, ptr)	alloc->free(alloc->state, ptr)
 #endif
 
 static inline
 struct cds_lfht *__default_alloc_cds_lfht(
 		const struct cds_lfht_mm_type *mm,
+		const struct cds_lfht_alloc *alloc,
 		unsigned long cds_lfht_size,
 		unsigned long min_nr_alloc_buckets,
 		unsigned long max_nr_buckets)
 {
 	struct cds_lfht *ht;
 
-	ht = calloc(1, cds_lfht_size);
+	ht = alloc->calloc(alloc->state, 1, cds_lfht_size);
 	urcu_posix_assert(ht);
 
 	ht->mm = mm;
+	ht->alloc = alloc;
 	ht->bucket_at = mm->bucket_at;
 	ht->min_nr_alloc_buckets = min_nr_alloc_buckets;
 	ht->min_alloc_buckets_order =
diff --git a/src/rculfhash-mm-chunk.c b/src/rculfhash-mm-chunk.c
index cf3a9ff..93931ee 100644
--- a/src/rculfhash-mm-chunk.c
+++ b/src/rculfhash-mm-chunk.c
@@ -14,15 +14,15 @@ static
 void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order)
 {
 	if (order == 0) {
-		ht->tbl_chunk[0] = calloc(ht->min_nr_alloc_buckets,
-			sizeof(struct cds_lfht_node));
+		ht->tbl_chunk[0] = ht->alloc->calloc(ht->alloc->state,
+			ht->min_nr_alloc_buckets, sizeof(struct cds_lfht_node));
 		urcu_posix_assert(ht->tbl_chunk[0]);
 	} else if (order > ht->min_alloc_buckets_order) {
 		unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_buckets_order);
 
 		for (i = len; i < 2 * len; i++) {
-			ht->tbl_chunk[i] = calloc(ht->min_nr_alloc_buckets,
-				sizeof(struct cds_lfht_node));
+			ht->tbl_chunk[i] = ht->alloc->calloc(ht->alloc->state,
+				ht->min_nr_alloc_buckets, sizeof(struct cds_lfht_node));
 			urcu_posix_assert(ht->tbl_chunk[i]);
 		}
 	}
@@ -38,12 +38,12 @@ static
 void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order)
 {
 	if (order == 0)
-		poison_free(ht->tbl_chunk[0]);
+		poison_free(ht->alloc, ht->tbl_chunk[0]);
 	else if (order > ht->min_alloc_buckets_order) {
 		unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_buckets_order);
 
 		for (i = len; i < 2 * len; i++)
-			poison_free(ht->tbl_chunk[i]);
+			poison_free(ht->alloc, ht->tbl_chunk[i]);
 	}
 	/* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */
 }
@@ -60,7 +60,7 @@ struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index)
 
 static
 struct cds_lfht *alloc_cds_lfht(unsigned long min_nr_alloc_buckets,
-		unsigned long max_nr_buckets)
+		unsigned long max_nr_buckets, const struct cds_lfht_alloc *alloc)
 {
 	unsigned long nr_chunks, cds_lfht_size;
 
@@ -72,7 +72,7 @@ struct cds_lfht *alloc_cds_lfht(unsigned long min_nr_alloc_buckets,
 	cds_lfht_size = max(cds_lfht_size, sizeof(struct cds_lfht));
 
 	return __default_alloc_cds_lfht(
-			&cds_lfht_mm_chunk, cds_lfht_size,
+			&cds_lfht_mm_chunk, alloc, cds_lfht_size,
 			min_nr_alloc_buckets, max_nr_buckets);
 }
 
diff --git a/src/rculfhash-mm-mmap.c b/src/rculfhash-mm-mmap.c
index be931e0..2b4bc42 100644
--- a/src/rculfhash-mm-mmap.c
+++ b/src/rculfhash-mm-mmap.c
@@ -118,8 +118,8 @@ void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order)
 	if (order == 0) {
 		if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) {
 			/* small table */
-			ht->tbl_mmap = calloc(ht->max_nr_buckets,
-					sizeof(*ht->tbl_mmap));
+			ht->tbl_mmap = ht->alloc->calloc(ht->alloc->state,
+					ht->max_nr_buckets, sizeof(*ht->tbl_mmap));
 			urcu_posix_assert(ht->tbl_mmap);
 			return;
 		}
@@ -150,7 +150,7 @@ void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order)
 	if (order == 0) {
 		if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) {
 			/* small table */
-			poison_free(ht->tbl_mmap);
+			poison_free(ht->alloc, ht->tbl_mmap);
 			return;
 		}
 		/* large table */
@@ -174,7 +174,7 @@ struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index)
 
 static
 struct cds_lfht *alloc_cds_lfht(unsigned long min_nr_alloc_buckets,
-		unsigned long max_nr_buckets)
+		unsigned long max_nr_buckets, const struct cds_lfht_alloc *alloc)
 {
 	unsigned long page_bucket_size;
 
@@ -189,7 +189,7 @@ struct cds_lfht *alloc_cds_lfht(unsigned long min_nr_alloc_buckets,
 	}
 
 	return __default_alloc_cds_lfht(
-			&cds_lfht_mm_mmap, sizeof(struct cds_lfht),
+			&cds_lfht_mm_mmap, alloc, sizeof(struct cds_lfht),
 			min_nr_alloc_buckets, max_nr_buckets);
 }
 
diff --git a/src/rculfhash-mm-order.c b/src/rculfhash-mm-order.c
index 1014c38..2b0f707 100644
--- a/src/rculfhash-mm-order.c
+++ b/src/rculfhash-mm-order.c
@@ -14,12 +14,12 @@ static
 void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order)
 {
 	if (order == 0) {
-		ht->tbl_order[0] = calloc(ht->min_nr_alloc_buckets,
-			sizeof(struct cds_lfht_node));
+		ht->tbl_order[0] = ht->alloc->calloc(ht->alloc->state,
+			ht->min_nr_alloc_buckets, sizeof(struct cds_lfht_node));
 		urcu_posix_assert(ht->tbl_order[0]);
 	} else if (order > ht->min_alloc_buckets_order) {
-		ht->tbl_order[order] = calloc(1UL << (order -1),
-			sizeof(struct cds_lfht_node));
+		ht->tbl_order[order] = ht->alloc->calloc(ht->alloc->state,
+			1UL << (order -1), sizeof(struct cds_lfht_node));
 		urcu_posix_assert(ht->tbl_order[order]);
 	}
 	/* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */
@@ -34,9 +34,9 @@ static
 void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order)
 {
 	if (order == 0)
-		poison_free(ht->tbl_order[0]);
+		poison_free(ht->alloc, ht->tbl_order[0]);
 	else if (order > ht->min_alloc_buckets_order)
-		poison_free(ht->tbl_order[order]);
+		poison_free(ht->alloc, ht->tbl_order[order]);
 	/* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */
 }
 
@@ -62,10 +62,10 @@ struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index)
 
 static
 struct cds_lfht *alloc_cds_lfht(unsigned long min_nr_alloc_buckets,
-		unsigned long max_nr_buckets)
+		unsigned long max_nr_buckets, const struct cds_lfht_alloc *alloc)
 {
 	return __default_alloc_cds_lfht(
-			&cds_lfht_mm_order, sizeof(struct cds_lfht),
+			&cds_lfht_mm_order, alloc, sizeof(struct cds_lfht),
 			min_nr_alloc_buckets, max_nr_buckets);
 }
 
diff --git a/src/rculfhash.c b/src/rculfhash.c
index 02c7f0f..8d7c1e6 100644
--- a/src/rculfhash.c
+++ b/src/rculfhash.c
@@ -249,6 +249,7 @@
 #include <string.h>
 #include <sched.h>
 #include <unistd.h>
+#include <stdlib.h>
 
 #include "compat-getcpu.h"
 #include <urcu/assert.h>
@@ -568,6 +569,50 @@ unsigned int cds_lfht_fls_ulong(unsigned long x)
 #endif
 }
 
+static void *cds_lfht_malloc(void *state __attribute__((unused)),
+		size_t size)
+{
+	return malloc(size);
+}
+
+static void *cds_lfht_calloc(void *state __attribute__((unused)),
+		size_t nmemb, size_t size)
+{
+	return calloc(nmemb, size);
+}
+
+static void *cds_lfht_realloc(void *state __attribute__((unused)),
+		void *ptr, size_t size)
+{
+	return realloc(ptr, size);
+}
+
+static void *cds_lfht_aligned_alloc(void *state __attribute__((unused)),
+		size_t alignment, size_t size)
+{
+	void *ptr;
+
+	if (posix_memalign(&ptr, alignment, size))
+		return NULL;
+	return ptr;
+}
+
+static void cds_lfht_free(void *state __attribute__((unused)), void *ptr)
+{
+	free(ptr);
+}
+
+
+/* Default memory allocator */
+static struct cds_lfht_alloc cds_lfht_default_alloc = {
+	.malloc = cds_lfht_malloc,
+	.calloc = cds_lfht_calloc,
+	.realloc = cds_lfht_realloc,
+	.aligned_alloc = cds_lfht_aligned_alloc,
+	.free = cds_lfht_free,
+	.state = NULL,
+};
+
 /*
  * Return the minimum order for which x <= (1UL << order).
  * Return -1 if x is 0.
@@ -666,7 +711,7 @@ void alloc_split_items_count(struct cds_lfht *ht)
 	urcu_posix_assert(split_count_mask >= 0);
 
 	if (ht->flags & CDS_LFHT_ACCOUNTING) {
-		ht->split_count = calloc(split_count_mask + 1,
+		ht->split_count = ht->alloc->calloc(ht->alloc->state, split_count_mask + 1,
 					sizeof(struct ht_items_count));
 		urcu_posix_assert(ht->split_count);
 	} else {
@@ -677,7 +722,7 @@ void alloc_split_items_count(struct cds_lfht *ht)
 static
 void free_split_items_count(struct cds_lfht *ht)
 {
-	poison_free(ht->split_count);
+	poison_free(ht->alloc, ht->split_count);
 }
 
 static
@@ -1262,7 +1307,7 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
 		nr_threads = 1;
 	}
 	partition_len = len >> cds_lfht_get_count_order_ulong(nr_threads);
-	work = calloc(nr_threads, sizeof(*work));
+	work = ht->alloc->calloc(ht->alloc->state, nr_threads, sizeof(*work));
 	if (!work) {
 		dbg_printf("error allocating for resize, single-threading\n");
 		goto fallback;
@@ -1303,7 +1348,7 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
 		ret = pthread_join(work[thread].thread_id, NULL);
 		urcu_posix_assert(!ret);
 	}
-	free(work);
+	ht->alloc->free(ht->alloc->state, work);
 
 	/*
 	 * A pthread_create failure above will either lead in us having
@@ -1596,11 +1641,12 @@ void cds_lfht_node_init_deleted(struct cds_lfht_node *node)
 	node->next = flag_removed(NULL);
 }
 
-struct cds_lfht *_cds_lfht_new(unsigned long init_size,
+struct cds_lfht *_cds_lfht_new_with_alloc(unsigned long init_size,
 			unsigned long min_nr_alloc_buckets,
 			unsigned long max_nr_buckets,
 			int flags,
 			const struct cds_lfht_mm_type *mm,
+			const struct cds_lfht_alloc *alloc,
 			const struct rcu_flavor_struct *flavor,
 			pthread_attr_t *attr)
 {
@@ -1637,7 +1683,8 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
 	max_nr_buckets = max(max_nr_buckets, min_nr_alloc_buckets);
 	init_size = min(init_size, max_nr_buckets);
 
-	ht = mm->alloc_cds_lfht(min_nr_alloc_buckets, max_nr_buckets);
+	ht = mm->alloc_cds_lfht(min_nr_alloc_buckets, max_nr_buckets, alloc ? : &cds_lfht_default_alloc);
+
 	urcu_posix_assert(ht);
 	urcu_posix_assert(ht->mm == mm);
 	urcu_posix_assert(ht->bucket_at == mm->bucket_at);
@@ -1657,6 +1704,19 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
 	return ht;
 }
 
+struct cds_lfht *_cds_lfht_new(unsigned long init_size,
+			unsigned long min_nr_alloc_buckets,
+			unsigned long max_nr_buckets,
+			int flags,
+			const struct cds_lfht_mm_type *mm,
+			const struct rcu_flavor_struct *flavor,
+			pthread_attr_t *attr)
+{
+	return _cds_lfht_new_with_alloc(init_size,
+			min_nr_alloc_buckets, max_nr_buckets,
+			flags, mm, NULL, flavor, attr);
+}
+
 void cds_lfht_lookup(struct cds_lfht *ht, unsigned long hash,
 		cds_lfht_match_fct match, const void *key,
 		struct cds_lfht_iter *iter)
@@ -1945,7 +2005,7 @@ void do_auto_resize_destroy_cb(struct urcu_work *work)
 	if (ret)
 		urcu_die(ret);
 	ht->flavor->unregister_thread();
-	poison_free(ht);
+	poison_free(ht->alloc, ht);
 }
 
 /*
@@ -1989,7 +2049,7 @@ int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr)
 	ret = pthread_mutex_destroy(&ht->resize_mutex);
 	if (ret)
 		ret = -EBUSY;
-	poison_free(ht);
+	poison_free(ht->alloc, ht);
 	return ret;
 }
 
@@ -2144,7 +2204,7 @@ void do_resize_cb(struct urcu_work *work)
 	_do_cds_lfht_resize(ht);
 	mutex_unlock(&ht->resize_mutex);
 	ht->flavor->unregister_thread();
-	poison_free(work);
+	poison_free(ht->alloc, work);
 }
 
 static
@@ -2160,7 +2220,7 @@ void __cds_lfht_resize_lazy_launch(struct cds_lfht *ht)
 		if (uatomic_load(&ht->in_progress_destroy, CMM_RELAXED)) {
 			return;
 		}
-		work = malloc(sizeof(*work));
+		work = ht->alloc->malloc(ht->alloc->state, sizeof(*work));
 		if (work == NULL) {
 			dbg_printf("error allocating resize work, bailing out\n");
 			return;
-- 
2.34.1


From 8c5aef691745bc06750886e10bfda84f2b749ed8 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 15 Feb 2024 17:29:22 -0500
Subject: [PATCH 13/16] Fix: change order of _cds_lfht_new_with_alloc
 parameters

The "flavor" parameter should come before the "alloc" parameter
to match the order of cds_lfht_new_with_flavor_alloc() parameters.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: Ia704a0fd9cb90af966464e25e6202fed1a952eed
---
 include/urcu/rculfhash.h | 4 ++--
 src/rculfhash.c          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/urcu/rculfhash.h b/include/urcu/rculfhash.h
index 8b57cd8..e0f4b35 100644
--- a/include/urcu/rculfhash.h
+++ b/include/urcu/rculfhash.h
@@ -159,8 +159,8 @@ struct cds_lfht *_cds_lfht_new_with_alloc(unsigned long init_size,
 			unsigned long max_nr_buckets,
 			int flags,
 			const struct cds_lfht_mm_type *mm,
-			const struct cds_lfht_alloc *alloc,
 			const struct rcu_flavor_struct *flavor,
+			const struct cds_lfht_alloc *alloc,
 			pthread_attr_t *attr);
 
 /*
@@ -248,7 +248,7 @@ struct cds_lfht *cds_lfht_new_with_flavor_alloc(unsigned long init_size,
 			pthread_attr_t *attr)
 {
 	return _cds_lfht_new_with_alloc(init_size, min_nr_alloc_buckets, max_nr_buckets,
-			flags, NULL, alloc, flavor, attr);
+			flags, NULL, flavor, alloc, attr);
 }
 
 
diff --git a/src/rculfhash.c b/src/rculfhash.c
index 8d7c1e6..10f5b8e 100644
--- a/src/rculfhash.c
+++ b/src/rculfhash.c
@@ -1646,8 +1646,8 @@ struct cds_lfht *_cds_lfht_new_with_alloc(unsigned long init_size,
 			unsigned long max_nr_buckets,
 			int flags,
 			const struct cds_lfht_mm_type *mm,
-			const struct cds_lfht_alloc *alloc,
 			const struct rcu_flavor_struct *flavor,
+			const struct cds_lfht_alloc *alloc,
 			pthread_attr_t *attr)
 {
 	struct cds_lfht *ht;
@@ -1714,7 +1714,7 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
 {
 	return _cds_lfht_new_with_alloc(init_size,
 			min_nr_alloc_buckets, max_nr_buckets,
-			flags, mm, NULL, flavor, attr);
+			flags, mm, flavor, NULL, attr);
 }
 
 void cds_lfht_lookup(struct cds_lfht *ht, unsigned long hash,
-- 
2.34.1


From 5307e3abcb6668ceb84114e1c62b635cb995f4c7 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 28 Mar 2024 14:30:01 -0400
Subject: [PATCH 14/16] ppc: Document cache line size choice

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: I63603aaf5529e9a7810cfeffe19f32aea0dd13df
---
 include/urcu/arch/ppc.h | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/include/urcu/arch/ppc.h b/include/urcu/arch/ppc.h
index be9d158..1285b6d 100644
--- a/include/urcu/arch/ppc.h
+++ b/include/urcu/arch/ppc.h
@@ -19,7 +19,30 @@
 extern "C" {
 #endif
 
-/* Include size of POWER5+ L3 cache lines: 256 bytes */
+/*
+ * Most powerpc machines have 128 bytes cache lines, but to make sure
+ * there is no false sharing on all known Power hardware, use the
+ * largest known cache line size, which is the physical size of POWER5
+ * L3 cache lines (256 bytes).
+ *
+ * "Each slice [of the L3] is 12-way set-associative, with 4,096
+ * congruence classes of 256-byte lines managed as two 128-byte sectors
+ * to match the L2 line size."
+ *
+ * From: "POWER5 system microarchitecture",
+ *       IBM Journal of Research & Development,
+ *       vol. 49, no. 4/5, July/September 2005
+ *       https://www.eecg.utoronto.ca/~moshovos/ACA08/readings/power5.pdf
+ *
+ * This value is a compile-time constant, which prevents us from
+ * querying the processor for the cache line size at runtime. We
+ * therefore need to be pessimistic and assume the largest known cache
+ * line size.
+ *
+ * This value is exposed through public headers, so tuning it for
+ * specific environments is a concern for ABI compatibility between
+ * applications and liburcu.
+ */
 #define CAA_CACHE_LINE_SIZE	256
 
 #ifdef __NO_LWSYNC__
-- 
2.34.1


From 6b071d73cffc66df0bdb9ee3c062143f06923c78 Mon Sep 17 00:00:00 2001
From: Michael Jeanson <mjeanson@efficios.com>
Date: Mon, 3 Jul 2023 14:38:08 -0400
Subject: [PATCH 15/16] cleanup: move rand_r compat code to tests

This compat code is only used in the tests, move it there as it should
probably not be used in the library.

Change-Id: I0a36e790c236bb90d07a711af9cc6f8388fa4c81
Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 src/Makefile.am                     | 2 +-
 tests/common/Makefile.am            | 9 +++++----
 {src => tests/common}/compat-rand.h | 0
 3 files changed, 6 insertions(+), 5 deletions(-)
 rename {src => tests/common}/compat-rand.h (100%)

diff --git a/src/Makefile.am b/src/Makefile.am
index ede2669..b555c81 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -9,7 +9,7 @@ AM_CPPFLAGS += -I$(top_srcdir)/src
 AM_LDFLAGS=-version-info $(URCU_LIBRARY_VERSION) $(LT_NO_UNDEFINED)
 
 dist_noinst_HEADERS = urcu-die.h urcu-wait.h compat-getcpu.h \
-	compat-rand.h urcu-utils.h compat-smp.h
+	urcu-utils.h compat-smp.h
 
 COMPAT = compat_arch.c compat_futex.c
 
diff --git a/tests/common/Makefile.am b/tests/common/Makefile.am
index 2cdc273..af6d89f 100644
--- a/tests/common/Makefile.am
+++ b/tests/common/Makefile.am
@@ -2,12 +2,13 @@
 #
 # SPDX-License-Identifier: MIT
 
-AM_CPPFLAGS += -I$(top_srcdir)/src
+AM_CPPFLAGS += -I$(top_srcdir)/src -I$(top_srcdir)/tests/common
 
-noinst_HEADERS = thread-id.h
+noinst_HEADERS = \
+	api.h \
+	compat-rand.h \
+	thread-id.h
 
 noinst_LTLIBRARIES = libdebug-yield.la
 
 libdebug_yield_la_SOURCES = debug-yield.c debug-yield.h
-
-EXTRA_DIST = api.h
diff --git a/src/compat-rand.h b/tests/common/compat-rand.h
similarity index 100%
rename from src/compat-rand.h
rename to tests/common/compat-rand.h
-- 
2.34.1


From ae5712d110b720768dcc30522171506a4b7f28a4 Mon Sep 17 00:00:00 2001
From: Olivier Dion <odion@efficios.com>
Date: Tue, 22 Aug 2023 16:23:17 -0400
Subject: [PATCH 16/16] uatomic/x86: Remove redundant memory barriers

When liburcu is configured to _not_ use atomic builtins, the
implementation of atomic operations is done using inline assembler for
each architecture.

Because we control the emitted assembler, we know whether specific
operations (e.g. lock; cmpxchg) already have an implicit memory barrier.
In those cases, emitting an explicit cmm_smp_mb() before/after the
operation is redundant and hurts performance.

Remove those redundant barriers on x86.

Change-Id: Ic1f6cfe9c2afe250946549cf6187f8fa88f5b009
Signed-off-by: Olivier Dion <odion@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 include/urcu/uatomic/generic.h | 228 ++++------------
 include/urcu/uatomic/x86.h     | 470 +++++++++++++++++++++++++++++++++
 2 files changed, 516 insertions(+), 182 deletions(-)

diff --git a/include/urcu/uatomic/generic.h b/include/urcu/uatomic/generic.h
index 8f8c437..ed655bb 100644
--- a/include/urcu/uatomic/generic.h
+++ b/include/urcu/uatomic/generic.h
@@ -15,7 +15,6 @@
  */
 
 #include <stdint.h>
-#include <stdlib.h>
 #include <urcu/compiler.h>
 #include <urcu/system.h>
 
@@ -27,125 +26,61 @@ extern "C" {
 #define uatomic_set(addr, v)	((void) CMM_STORE_SHARED(*(addr), (v)))
 #endif
 
-#define uatomic_load_store_return_op(op, addr, v, mo)			\
-	__extension__							\
-	({								\
-									\
-		switch (mo) {						\
-		case CMM_ACQUIRE:					\
-		case CMM_CONSUME:					\
-		case CMM_RELAXED:					\
-			break;						\
-		case CMM_RELEASE:					\
-		case CMM_ACQ_REL:					\
-		case CMM_SEQ_CST:					\
-		case CMM_SEQ_CST_FENCE:					\
-			cmm_smp_mb();					\
-			break;						\
-		default:						\
-			abort();					\
-		}							\
-									\
-		__typeof__((*addr)) _value = op(addr, v);		\
-									\
-		switch (mo) {						\
-		case CMM_CONSUME:					\
-			cmm_smp_read_barrier_depends();			\
-			break;						\
-		case CMM_ACQUIRE:					\
-		case CMM_ACQ_REL:					\
-		case CMM_SEQ_CST:					\
-		case CMM_SEQ_CST_FENCE:					\
-			cmm_smp_mb();					\
-			break;						\
-		case CMM_RELAXED:					\
-		case CMM_RELEASE:					\
-			break;						\
-		default:						\
-			abort();					\
-		}							\
-		_value;							\
+/*
+ * Can be defined for the architecture.
+ *
+ * What needs to be emitted _before_ the `operation' with memory ordering `mo'.
+ */
+#ifndef _cmm_compat_c11_smp_mb__before_mo
+# define _cmm_compat_c11_smp_mb__before_mo(operation, mo) cmm_smp_mb()
+#endif
+
+/*
+ * Can be defined for the architecture.
+ *
+ * What needs to be emitted _after_ the `operation' with memory ordering `mo'.
+ */
+#ifndef _cmm_compat_c11_smp_mb__after_mo
+# define _cmm_compat_c11_smp_mb__after_mo(operation, mo) cmm_smp_mb()
+#endif
+
+#define uatomic_load_store_return_op(op, addr, v, mo)		\
+	__extension__						\
+	({							\
+		_cmm_compat_c11_smp_mb__before_mo(op, mo);	\
+		__typeof__((*addr)) _value = op(addr, v);	\
+		_cmm_compat_c11_smp_mb__after_mo(op, mo);	\
+								\
+		_value;						\
 	})
 
-#define uatomic_load_store_op(op, addr, v, mo)				\
-	do {								\
-		switch (mo) {						\
-		case CMM_ACQUIRE:					\
-		case CMM_CONSUME:					\
-		case CMM_RELAXED:					\
-			break;						\
-		case CMM_RELEASE:					\
-		case CMM_ACQ_REL:					\
-		case CMM_SEQ_CST:					\
-		case CMM_SEQ_CST_FENCE:					\
-			cmm_smp_mb();					\
-			break;						\
-		default:						\
-			abort();					\
-		}							\
-									\
-		op(addr, v);						\
-									\
-		switch (mo) {						\
-		case CMM_CONSUME:					\
-			cmm_smp_read_barrier_depends();			\
-			break;						\
-		case CMM_ACQUIRE:					\
-		case CMM_ACQ_REL:					\
-		case CMM_SEQ_CST:					\
-		case CMM_SEQ_CST_FENCE:					\
-			cmm_smp_mb();					\
-			break;						\
-		case CMM_RELAXED:					\
-		case CMM_RELEASE:					\
-			break;						\
-		default:						\
-			abort();					\
-		}							\
+#define uatomic_load_store_op(op, addr, v, mo)			\
+	do {							\
+		_cmm_compat_c11_smp_mb__before_mo(op, mo);	\
+		op(addr, v);					\
+		_cmm_compat_c11_smp_mb__after_mo(op, mo);	\
 	} while (0)
 
-#define uatomic_store(addr, v, mo)			\
-	do {						\
-		switch (mo) {				\
-		case CMM_RELAXED:			\
-			break;				\
-		case CMM_RELEASE:			\
-		case CMM_SEQ_CST:			\
-		case CMM_SEQ_CST_FENCE:			\
-			cmm_smp_mb();			\
-			break;				\
-		default:				\
-			abort();			\
-		}					\
-							\
-		uatomic_set(addr, v);			\
-							\
-		switch (mo) {				\
-		case CMM_RELAXED:			\
-		case CMM_RELEASE:			\
-			break;				\
-		case CMM_SEQ_CST:			\
-		case CMM_SEQ_CST_FENCE:			\
-			cmm_smp_mb();			\
-			break;				\
-		default:				\
-			abort();			\
-		}					\
+#define uatomic_store(addr, v, mo)					\
+	do {								\
+		_cmm_compat_c11_smp_mb__before_mo(uatomic_set, mo);	\
+		uatomic_set(addr, v);					\
+		_cmm_compat_c11_smp_mb__after_mo(uatomic_set, mo);	\
 	} while (0)
 
-#define uatomic_and_mo(addr, v, mo)				\
+#define uatomic_and_mo(addr, v, mo)			\
 	uatomic_load_store_op(uatomic_and, addr, v, mo)
 
-#define uatomic_or_mo(addr, v, mo)				\
+#define uatomic_or_mo(addr, v, mo)			\
 	uatomic_load_store_op(uatomic_or, addr, v, mo)
 
-#define uatomic_add_mo(addr, v, mo)				\
+#define uatomic_add_mo(addr, v, mo)			\
 	uatomic_load_store_op(uatomic_add, addr, v, mo)
 
-#define uatomic_sub_mo(addr, v, mo)				\
+#define uatomic_sub_mo(addr, v, mo)			\
 	uatomic_load_store_op(uatomic_sub, addr, v, mo)
 
-#define uatomic_inc_mo(addr, mo)				\
+#define uatomic_inc_mo(addr, mo)			\
 	uatomic_load_store_op(uatomic_add, addr, 1, mo)
 
 #define uatomic_dec_mo(addr, mo)				\
@@ -157,58 +92,14 @@ extern "C" {
 #define uatomic_cmpxchg_mo(addr, old, new, mos, mof)			\
 	__extension__							\
 	({								\
-		switch (mos) {						\
-		case CMM_ACQUIRE:					\
-		case CMM_CONSUME:					\
-		case CMM_RELAXED:					\
-			break;						\
-		case CMM_RELEASE:					\
-		case CMM_ACQ_REL:					\
-		case CMM_SEQ_CST:					\
-		case CMM_SEQ_CST_FENCE:					\
-			cmm_smp_mb();					\
-			break;						\
-		default:						\
-			abort();					\
-		}							\
-									\
+		_cmm_compat_c11_smp_mb__before_mo(uatomic_cmpxchg, mos); \
 		__typeof__(*(addr)) _value = uatomic_cmpxchg(addr, old,	\
 							new);		\
 									\
 		if (_value == (old)) {					\
-			switch (mos) {					\
-			case CMM_CONSUME:				\
-				cmm_smp_read_barrier_depends();		\
-				break;					\
-			case CMM_ACQUIRE:				\
-			case CMM_ACQ_REL:				\
-			case CMM_SEQ_CST:				\
-			case CMM_SEQ_CST_FENCE:				\
-				cmm_smp_mb();				\
-				break;					\
-			case CMM_RELAXED:				\
-			case CMM_RELEASE:				\
-				break;					\
-			default:					\
-				abort();				\
-			}						\
+			_cmm_compat_c11_smp_mb__after_mo(uatomic_cmpxchg, mos);	\
 		} else {						\
-			switch (mof) {					\
-			case CMM_CONSUME:				\
-				cmm_smp_read_barrier_depends();		\
-				break;					\
-			case CMM_ACQUIRE:				\
-			case CMM_ACQ_REL:				\
-			case CMM_SEQ_CST:				\
-			case CMM_SEQ_CST_FENCE:				\
-				cmm_smp_mb();				\
-				break;					\
-			case CMM_RELAXED:				\
-			case CMM_RELEASE:				\
-				break;					\
-			default:					\
-				abort();				\
-			}						\
+			_cmm_compat_c11_smp_mb__after_mo(uatomic_cmpxchg, mof);	\
 		}							\
 		_value;							\
 	})
@@ -222,7 +113,6 @@ extern "C" {
 #define uatomic_sub_return_mo(addr, v, mo)				\
 	uatomic_load_store_return_op(uatomic_sub_return, addr, v)
 
-
 #ifndef uatomic_read
 #define uatomic_read(addr)	CMM_LOAD_SHARED(*(addr))
 #endif
@@ -230,35 +120,9 @@ extern "C" {
 #define uatomic_load(addr, mo)						\
 	__extension__							\
 	({								\
-		switch (mo) {						\
-		case CMM_ACQUIRE:					\
-		case CMM_CONSUME:					\
-		case CMM_RELAXED:					\
-			break;						\
-		case CMM_SEQ_CST:					\
-		case CMM_SEQ_CST_FENCE:					\
-			cmm_smp_mb();					\
-			break;						\
-		default:						\
-			abort();					\
-		}							\
-									\
+		_cmm_compat_c11_smp_mb__before_mo(uatomic_read, mo);	\
 		__typeof__(*(addr)) _rcu_value = uatomic_read(addr);	\
-									\
-		switch (mo) {						\
-		case CMM_RELAXED:					\
-			break;						\
-		case CMM_CONSUME:					\
-			cmm_smp_read_barrier_depends();			\
-			break;						\
-		case CMM_ACQUIRE:					\
-		case CMM_SEQ_CST:					\
-		case CMM_SEQ_CST_FENCE:					\
-			cmm_smp_mb();					\
-			break;						\
-		default:						\
-			abort();					\
-		}							\
+		_cmm_compat_c11_smp_mb__after_mo(uatomic_read, mo);	\
 									\
 		_rcu_value;						\
 	})
diff --git a/include/urcu/uatomic/x86.h b/include/urcu/uatomic/x86.h
index b5725e0..616eee9 100644
--- a/include/urcu/uatomic/x86.h
+++ b/include/urcu/uatomic/x86.h
@@ -8,6 +8,8 @@
 #ifndef _URCU_ARCH_UATOMIC_X86_H
 #define _URCU_ARCH_UATOMIC_X86_H
 
+#include <stdlib.h>		/* For abort(3). */
+
 /*
  * Code inspired from libuatomic_ops-1.2, inherited in part from the
  * Boehm-Demers-Weiser conservative garbage collector.
@@ -630,6 +632,474 @@ extern unsigned long _compat_uatomic_add_return(void *addr,
 #define cmm_smp_mb__before_uatomic_dec()	cmm_barrier()
 #define cmm_smp_mb__after_uatomic_dec()		cmm_barrier()
 
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_read_mo(enum cmm_memorder mo)
+{
+	/*
+	 * A SMP barrier is not necessary for CMM_SEQ_CST because, only a
+	 * previous store can be reordered with the load.  However, emitting the
+	 * memory barrier after the store is sufficient to prevent reordering
+	 * between the two.  This follows toolchains decision of emitting the
+	 * memory fence on the stores instead of the loads.
+	 *
+	 * A compiler barrier is necessary because the underlying operation does
+	 * not clobber the registers.
+	 */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		cmm_barrier();
+		break;
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	default:
+		abort();
+		break;
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_read_mo(enum cmm_memorder mo)
+{
+	/*
+	 * A SMP barrier is not necessary for CMM_SEQ_CST because following
+	 * loads and stores cannot be reordered with the load.
+	 *
+	 * A SMP barrier is however necessary for CMM_SEQ_CST_FENCE to respect
+	 * the memory model, since the underlying operation does not have a lock
+	 * prefix.
+	 *
+	 * A compiler barrier is necessary because the underlying operation does
+	 * not clobber the registers.
+	 */
+	switch (mo) {
+	case CMM_SEQ_CST_FENCE:
+		cmm_smp_mb();
+		break;
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_SEQ_CST:
+		cmm_barrier();
+		break;
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	default:
+		abort();
+		break;
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_set_mo(enum cmm_memorder mo)
+{
+	/*
+	 * A SMP barrier is not necessary for CMM_SEQ_CST because the store can
+	 * only be reodered with later loads
+	 *
+	 * A compiler barrier is necessary because the underlying operation does
+	 * not clobber the registers.
+	 */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		cmm_barrier();
+		break;
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	default:
+		abort();
+		break;
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_set_mo(enum cmm_memorder mo)
+{
+	/*
+	 * A SMP barrier is necessary for CMM_SEQ_CST because the store can be
+	 * reorded with later loads.  Since no memory barrier is being emitted
+	 * before loads, one has to be emitted after the store.  This follows
+	 * toolchains decision of emitting the memory fence on the stores instead
+	 * of the loads.
+	 *
+	 * A SMP barrier is necessary for CMM_SEQ_CST_FENCE to respect the
+	 * memory model, since the underlying store does not have a lock prefix.
+	 *
+	 * A compiler barrier is necessary because the underlying operation does
+	 * not clobber the registers.
+	 */
+	switch (mo) {
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		cmm_smp_mb();
+		break;
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_RELEASE:
+		cmm_barrier();
+		break;
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	default:
+		abort();
+		break;
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_xchg_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_xchg has implicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_xchg_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_xchg has implicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_cmpxchg_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_cmpxchg has implicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_cmpxchg_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_cmpxchg has implicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_and_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_and has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_and_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_and has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_or_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_or has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_or_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_or has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_add_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_add has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_add_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_add has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_sub_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_sub has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_sub_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_sub has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_inc_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_inc has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_inc_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_inc has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_dec_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_dec has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_dec_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_dec has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_add_return_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_add_return has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_add_return_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_add_return has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__before_uatomic_sub_return_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_sub_return has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+static inline void _cmm_compat_c11_smp_mb__after_uatomic_sub_return_mo(enum cmm_memorder mo)
+{
+	/* NOP. uatomic_sub_return has explicit lock prefix. */
+	switch (mo) {
+	case CMM_RELAXED:	/* Fall-through */
+	case CMM_ACQUIRE:	/* Fall-through */
+	case CMM_CONSUME:	/* Fall-through */
+	case CMM_RELEASE:	/* Fall-through */
+	case CMM_ACQ_REL:	/* Fall-through */
+	case CMM_SEQ_CST:	/* Fall-through */
+	case CMM_SEQ_CST_FENCE:
+		break;
+	default:
+		abort();
+	}
+}
+
+#define _cmm_compat_c11_smp_mb__before_mo(operation, mo)			\
+	do {							\
+		_cmm_compat_c11_smp_mb__before_ ## operation ## _mo (mo);	\
+	} while (0)
+
+#define _cmm_compat_c11_smp_mb__after_mo(operation, mo)			\
+	do {							\
+		_cmm_compat_c11_smp_mb__after_ ## operation ## _mo (mo);	\
+	} while (0)
+
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.34.1