[userspace-rcu.git] / urcu-call-rcu-impl.h

/*
 * urcu-call-rcu.c
 *
 * Userspace RCU library - batch memory reclamation with kernel API
 *
 * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#define _GNU_SOURCE
#include <stdio.h>
#include <pthread.h>
#include <signal.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <poll.h>
#include <sys/time.h>
#include <syscall.h>
#include <unistd.h>
#include <sched.h>

#include "config.h"
#include "urcu/wfqueue.h"
#include "urcu-call-rcu.h"
#include "urcu-pointer.h"
#include "urcu/list.h"
#include "urcu/urcu-futex.h"

/* Data structure that identifies a call_rcu thread. */

struct call_rcu_data {
	struct cds_wfq_queue cbs;
	unsigned long flags;
	pthread_mutex_t mtx;
	int futex;
	unsigned long qlen;
	pthread_t tid;
	int cpu_affinity;
	struct cds_list_head list;
} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));

/*
 * List of all call_rcu_data structures to keep valgrind happy.
 * Protected by call_rcu_mutex.
 */

CDS_LIST_HEAD(call_rcu_data_list);

/* Link a thread using call_rcu() to its call_rcu thread. */

static __thread struct call_rcu_data *thread_call_rcu_data;

/* Guard call_rcu thread creation. */

static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;

/* If a given thread does not have its own call_rcu thread, this is default. */

static struct call_rcu_data *default_call_rcu_data;

/*
 * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
 * available, then we can have call_rcu threads assigned to individual
 * CPUs rather than only to specific threads.
 */

#if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)

/*
 * Pointer to array of pointers to per-CPU call_rcu_data structures
 * and # CPUs.
 */

static struct call_rcu_data **per_cpu_call_rcu_data;
static long maxcpus;

static void call_rcu_wait(struct call_rcu_data *crdp)
{
	/* Read call_rcu list before read futex */
	cmm_smp_mb();
	if (uatomic_read(&crdp->futex) == -1)
		futex_async(&crdp->futex, FUTEX_WAIT, -1,
		      NULL, NULL, 0);
}

static void call_rcu_wake_up(struct call_rcu_data *crdp)
{
	/* Write to call_rcu list before reading/writing futex */
	cmm_smp_mb();
	if (unlikely(uatomic_read(&crdp->futex) == -1)) {
		uatomic_set(&crdp->futex, 0);
		futex_async(&crdp->futex, FUTEX_WAKE, 1,
		      NULL, NULL, 0);
	}
}

/* Allocate the array if it has not already been allocated. */

static void alloc_cpu_call_rcu_data(void)
{
	struct call_rcu_data **p;
	static int warned = 0;

	if (maxcpus != 0)
		return;
	maxcpus = sysconf(_SC_NPROCESSORS_CONF);
	if (maxcpus <= 0) {
		return;
	}
	p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
	if (p != NULL) {
		memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
		per_cpu_call_rcu_data = p;
	} else {
		if (!warned) {
			fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
		}
		warned = 1;
	}
}

#else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */

static const struct call_rcu_data **per_cpu_call_rcu_data = NULL;
static const long maxcpus = -1;

static void alloc_cpu_call_rcu_data(void)
{
}

static int sched_getcpu(void)
{
	return -1;
}

#endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */

/* Acquire the specified pthread mutex. */

static void call_rcu_lock(pthread_mutex_t *pmp)
{
	if (pthread_mutex_lock(pmp) != 0) {
		perror("pthread_mutex_lock");
		exit(-1);
	}
}

/* Release the specified pthread mutex. */

static void call_rcu_unlock(pthread_mutex_t *pmp)
{
	if (pthread_mutex_unlock(pmp) != 0) {
		perror("pthread_mutex_unlock");
		exit(-1);
	}
}

#if HAVE_SCHED_SETAFFINITY
static
int set_thread_cpu_affinity(struct call_rcu_data *crdp)
{
	cpu_set_t mask;

	if (crdp->cpu_affinity < 0)
		return 0;

	CPU_ZERO(&mask);
	CPU_SET(crdp->cpu_affinity, &mask);
#if SCHED_SETAFFINITY_ARGS == 2
	return sched_setaffinity(0, &mask);
#else
	return sched_setaffinity(0, sizeof(mask), &mask);
#endif
}
#else
static
int set_thread_cpu_affinity(struct call_rcu_data *crdp)
{
	return 0;
}
#endif

/* This is the code run by each call_rcu thread. */

static void *call_rcu_thread(void *arg)
{
	unsigned long cbcount;
	struct cds_wfq_node *cbs;
	struct cds_wfq_node **cbs_tail;
	struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
	struct rcu_head *rhp;

	if (set_thread_cpu_affinity(crdp) != 0) {
		perror("pthread_setaffinity_np");
		exit(-1);
	}

	thread_call_rcu_data = crdp;
	for (;;) {
		if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
			while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
				poll(NULL, 0, 1);
			_CMM_STORE_SHARED(crdp->cbs.head, NULL);
			cbs_tail = (struct cds_wfq_node **)
				uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
			synchronize_rcu();
			cbcount = 0;
			do {
				while (cbs->next == NULL &&
				       &cbs->next != cbs_tail)
				       	poll(NULL, 0, 1);
				if (cbs == &crdp->cbs.dummy) {
					cbs = cbs->next;
					continue;
				}
				rhp = (struct rcu_head *)cbs;
				cbs = cbs->next;
				rhp->func(rhp);
				cbcount++;
			} while (cbs != NULL);
			uatomic_sub(&crdp->qlen, cbcount);
		}
		if (crdp->flags & URCU_CALL_RCU_STOP)
			break;
		if (crdp->flags & URCU_CALL_RCU_RT)
			poll(NULL, 0, 10);
		else {
			if (&crdp->cbs.head == _CMM_LOAD_SHARED(crdp->cbs.tail))
				call_rcu_wait(crdp);
			poll(NULL, 0, 10);
		}
	}
	call_rcu_lock(&crdp->mtx);
	crdp->flags |= URCU_CALL_RCU_STOPPED;
	call_rcu_unlock(&crdp->mtx);
	return NULL;
}

/*
 * Create both a call_rcu thread and the corresponding call_rcu_data
 * structure, linking the structure in as specified.  Caller must hold
 * call_rcu_mutex.
 */

static void call_rcu_data_init(struct call_rcu_data **crdpp,
			       unsigned long flags,
			       int cpu_affinity)
{
	struct call_rcu_data *crdp;

	crdp = malloc(sizeof(*crdp));
	if (crdp == NULL) {
		fprintf(stderr, "Out of memory.\n");
		exit(-1);
	}
	memset(crdp, '\0', sizeof(*crdp));
	cds_wfq_init(&crdp->cbs);
	crdp->qlen = 0;
	if (pthread_mutex_init(&crdp->mtx, NULL) != 0) {
		perror("pthread_mutex_init");
		exit(-1);
	}
	crdp->futex = 0;
	crdp->flags = flags;
	cds_list_add(&crdp->list, &call_rcu_data_list);
	crdp->cpu_affinity = cpu_affinity;
	cmm_smp_mb();  /* Structure initialized before pointer is planted. */
	*crdpp = crdp;
	if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
		perror("pthread_create");
		exit(-1);
	}
}

/*
 * Return a pointer to the call_rcu_data structure for the specified
 * CPU, returning NULL if there is none.  We cannot automatically
 * created it because the platform we are running on might not define
 * sched_getcpu().
 */

struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
{
	static int warned = 0;

	if (per_cpu_call_rcu_data == NULL)
		return NULL;
	if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
		fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
		warned = 1;
	}
	if (cpu < 0 || maxcpus <= cpu)
		return NULL;
	return per_cpu_call_rcu_data[cpu];
}

/*
 * Return the tid corresponding to the call_rcu thread whose
 * call_rcu_data structure is specified.
 */

pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
{
	return crdp->tid;
}

/*
 * Create a call_rcu_data structure (with thread) and return a pointer.
 */

static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
						    int cpu_affinity)
{
	struct call_rcu_data *crdp;

	call_rcu_data_init(&crdp, flags, cpu_affinity);
	return crdp;
}

struct call_rcu_data *create_call_rcu_data(unsigned long flags,
					   int cpu_affinity)
{
	struct call_rcu_data *crdp;

	call_rcu_lock(&call_rcu_mutex);
	crdp = __create_call_rcu_data(flags, cpu_affinity);
	call_rcu_unlock(&call_rcu_mutex);
	return crdp;
}

/*
 * Set the specified CPU to use the specified call_rcu_data structure.
 *
 * Use NULL to remove a CPU's call_rcu_data structure, but it is
 * the caller's responsibility to dispose of the removed structure.
 * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 * (prior to NULLing it out, of course).
 */

int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
{
	int warned = 0;

	call_rcu_lock(&call_rcu_mutex);
	if (cpu < 0 || maxcpus <= cpu) {
		if (!warned) {
			fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
			warned = 1;
		}
		call_rcu_unlock(&call_rcu_mutex);
		errno = EINVAL;
		return -EINVAL;
	}
	alloc_cpu_call_rcu_data();
	call_rcu_unlock(&call_rcu_mutex);
	if (per_cpu_call_rcu_data == NULL) {
		errno = ENOMEM;
		return -ENOMEM;
	}
	per_cpu_call_rcu_data[cpu] = crdp;
	return 0;
}

/*
 * Return a pointer to the default call_rcu_data structure, creating
 * one if need be.  Because we never free call_rcu_data structures,
 * we don't need to be in an RCU read-side critical section.
 */

struct call_rcu_data *get_default_call_rcu_data(void)
{
	if (default_call_rcu_data != NULL)
		return rcu_dereference(default_call_rcu_data);
	call_rcu_lock(&call_rcu_mutex);
	if (default_call_rcu_data != NULL) {
		call_rcu_unlock(&call_rcu_mutex);
		return default_call_rcu_data;
	}
	call_rcu_data_init(&default_call_rcu_data, 0, -1);
	call_rcu_unlock(&call_rcu_mutex);
	return default_call_rcu_data;
}

/*
 * Return the call_rcu_data structure that applies to the currently
 * running thread.  Any call_rcu_data structure assigned specifically
 * to this thread has first priority, followed by any call_rcu_data
 * structure assigned to the CPU on which the thread is running,
 * followed by the default call_rcu_data structure.  If there is not
 * yet a default call_rcu_data structure, one will be created.
 */
struct call_rcu_data *get_call_rcu_data(void)
{
	int curcpu;
	static int warned = 0;

	if (thread_call_rcu_data != NULL)
		return thread_call_rcu_data;
	if (maxcpus <= 0)
		return get_default_call_rcu_data();
	curcpu = sched_getcpu();
	if (!warned && (curcpu < 0 || maxcpus <= curcpu)) {
		fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
		warned = 1;
	}
	if (curcpu >= 0 && maxcpus > curcpu &&
	    per_cpu_call_rcu_data != NULL &&
	    per_cpu_call_rcu_data[curcpu] != NULL)
	    	return per_cpu_call_rcu_data[curcpu];
	return get_default_call_rcu_data();
}

/*
 * Return a pointer to this task's call_rcu_data if there is one.
 */

struct call_rcu_data *get_thread_call_rcu_data(void)
{
	return thread_call_rcu_data;
}

/*
 * Set this task's call_rcu_data structure as specified, regardless
 * of whether or not this task already had one.  (This allows switching
 * to and from real-time call_rcu threads, for example.)
 *
 * Use NULL to remove a thread's call_rcu_data structure, but it is
 * the caller's responsibility to dispose of the removed structure.
 * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 * (prior to NULLing it out, of course).
 */

void set_thread_call_rcu_data(struct call_rcu_data *crdp)
{
	thread_call_rcu_data = crdp;
}

/*
 * Create a separate call_rcu thread for each CPU.  This does not
 * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 * function if you want that behavior.
 */

int create_all_cpu_call_rcu_data(unsigned long flags)
{
	int i;
	struct call_rcu_data *crdp;
	int ret;

	call_rcu_lock(&call_rcu_mutex);
	alloc_cpu_call_rcu_data();
	call_rcu_unlock(&call_rcu_mutex);
	if (maxcpus <= 0) {
		errno = EINVAL;
		return -EINVAL;
	}
	if (per_cpu_call_rcu_data == NULL) {
		errno = ENOMEM;
		return -ENOMEM;
	}
	for (i = 0; i < maxcpus; i++) {
		call_rcu_lock(&call_rcu_mutex);
		if (get_cpu_call_rcu_data(i)) {
			call_rcu_unlock(&call_rcu_mutex);
			continue;
		}
		crdp = __create_call_rcu_data(flags, i);
		if (crdp == NULL) {
			call_rcu_unlock(&call_rcu_mutex);
			errno = ENOMEM;
			return -ENOMEM;
		}
		call_rcu_unlock(&call_rcu_mutex);
		if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
			/* FIXME: Leaks crdp for now. */
			return ret; /* Can happen on race. */
		}
	}
	return 0;
}

/*
 * Wake up the call_rcu thread corresponding to the specified
 * call_rcu_data structure.
 */
static void wake_call_rcu_thread(struct call_rcu_data *crdp)
{
	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
		call_rcu_wake_up(crdp);
}

/*
 * Schedule a function to be invoked after a following grace period.
 * This is the only function that must be called -- the others are
 * only present to allow applications to tune their use of RCU for
 * maximum performance.
 *
 * Note that unless a call_rcu thread has not already been created,
 * the first invocation of call_rcu() will create one.  So, if you
 * need the first invocation of call_rcu() to be fast, make sure
 * to create a call_rcu thread first.  One way to accomplish this is
 * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 */

void call_rcu(struct rcu_head *head,
	      void (*func)(struct rcu_head *head))
{
	struct call_rcu_data *crdp;

	cds_wfq_node_init(&head->next);
	head->func = func;
	crdp = get_call_rcu_data();
	cds_wfq_enqueue(&crdp->cbs, &head->next);
	uatomic_inc(&crdp->qlen);
	wake_call_rcu_thread(crdp);
}

/*
 * Free up the specified call_rcu_data structure, terminating the
 * associated call_rcu thread.  The caller must have previously
 * removed the call_rcu_data structure from per-thread or per-CPU
 * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 * per-thread call_rcu_data structures.
 *
 * We silently refuse to free up the default call_rcu_data structure
 * because that is where we put any leftover callbacks.  Note that
 * the possibility of self-spawning callbacks makes it impossible
 * to execute all the callbacks in finite time without putting any
 * newly spawned callbacks somewhere else.  The "somewhere else" of
 * last resort is the default call_rcu_data structure.
 *
 * We also silently refuse to free NULL pointers.  This simplifies
 * the calling code.
 */
void call_rcu_data_free(struct call_rcu_data *crdp)
{
	struct cds_wfq_node *cbs;
	struct cds_wfq_node **cbs_tail;
	struct cds_wfq_node **cbs_endprev;

	if (crdp == NULL || crdp == default_call_rcu_data) {
		return;
	}
	if ((crdp->flags & URCU_CALL_RCU_STOPPED) == 0) {
		call_rcu_lock(&crdp->mtx);
		crdp->flags |= URCU_CALL_RCU_STOP;
		call_rcu_unlock(&crdp->mtx);
		wake_call_rcu_thread(crdp);
		while ((crdp->flags & URCU_CALL_RCU_STOPPED) == 0)
			poll(NULL, 0, 1);
	}
	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
		while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
			poll(NULL, 0, 1);
		_CMM_STORE_SHARED(crdp->cbs.head, NULL);
		cbs_tail = (struct cds_wfq_node **)
			uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
		cbs_endprev = (struct cds_wfq_node **)
			uatomic_xchg(&default_call_rcu_data, cbs_tail);
		*cbs_endprev = cbs;
		uatomic_add(&default_call_rcu_data->qlen,
			    uatomic_read(&crdp->qlen));
		cds_list_del(&crdp->list);
		free(crdp);
	}
}

/*
 * Clean up all the per-CPU call_rcu threads.
 */
void free_all_cpu_call_rcu_data(void)
{
	int cpu;
	struct call_rcu_data *crdp;

	if (maxcpus <= 0)
		return;
	for (cpu = 0; cpu < maxcpus; cpu++) {
		crdp = get_cpu_call_rcu_data(cpu);
		if (crdp == NULL)
			continue;
		set_cpu_call_rcu_data(cpu, NULL);
		call_rcu_data_free(crdp);
	}
}

/*
 * Acquire the call_rcu_mutex in order to ensure that the child sees
 * all of the call_rcu() data structures in a consistent state.
 * Suitable for pthread_atfork() and friends.
 */
void call_rcu_before_fork(void)
{
	call_rcu_lock(&call_rcu_mutex);
}

/*
 * Clean up call_rcu data structures in the parent of a successful fork()
 * that is not followed by exec() in the child.  Suitable for
 * pthread_atfork() and friends.
 */
void call_rcu_after_fork_parent(void)
{
	call_rcu_unlock(&call_rcu_mutex);
}

/*
 * Clean up call_rcu data structures in the child of a successful fork()
 * that is not followed by exec().  Suitable for pthread_atfork() and
 * friends.
 */
void call_rcu_after_fork_child(void)
{
	struct call_rcu_data *crdp;

	/* Release the mutex. */
	call_rcu_unlock(&call_rcu_mutex);

	/*
	 * Allocate a new default call_rcu_data structure in order
	 * to get a working call_rcu thread to go with it.
	 */
	default_call_rcu_data = NULL;
	(void)get_default_call_rcu_data();

	/* Dispose of all of the rest of the call_rcu_data structures. */
	while (call_rcu_data_list.next != call_rcu_data_list.prev) {
		crdp = cds_list_entry(call_rcu_data_list.prev,
				      struct call_rcu_data, list);
		if (crdp == default_call_rcu_data)
			crdp = cds_list_entry(crdp->list.prev,
					      struct call_rcu_data, list);
		crdp->flags = URCU_CALL_RCU_STOPPED;
		call_rcu_data_free(crdp);
	}
}
Commit	Line	Data
b57aee66 PM	1	/*
	2	* urcu-call-rcu.c
	3	*
	4	* Userspace RCU library - batch memory reclamation with kernel API
	5	*
	6	* Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	7	*
	8	* This library is free software; you can redistribute it and/or
	9	* modify it under the terms of the GNU Lesser General Public
	10	* License as published by the Free Software Foundation; either
	11	* version 2.1 of the License, or (at your option) any later version.
	12	*
	13	* This library is distributed in the hope that it will be useful,
	14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	* Lesser General Public License for more details.
	17	*
	18	* You should have received a copy of the GNU Lesser General Public
	19	* License along with this library; if not, write to the Free Software
	20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	21	*/
	22
c1d2c60b	23	#define _GNU_SOURCE
b57aee66 PM	24	#include <stdio.h>
	25	#include <pthread.h>
	26	#include <signal.h>
	27	#include <assert.h>
	28	#include <stdlib.h>
	29	#include <string.h>
	30	#include <errno.h>
	31	#include <poll.h>
	32	#include <sys/time.h>
	33	#include <syscall.h>
	34	#include <unistd.h>
c1d2c60b	35	#include <sched.h>
b57aee66 PM	36
	37	#include "config.h"
	38	#include "urcu/wfqueue.h"
	39	#include "urcu-call-rcu.h"
	40	#include "urcu-pointer.h"
3c24913f	41	#include "urcu/list.h"
263e3cf9	42	#include "urcu/urcu-futex.h"
b57aee66 PM	43
	44	/* Data structure that identifies a call_rcu thread. */
	45
	46	struct call_rcu_data {
	47	struct cds_wfq_queue cbs;
	48	unsigned long flags;
	49	pthread_mutex_t mtx;
263e3cf9	50	int futex;
b57aee66 PM	51	unsigned long qlen;
b57aee66 PM	52	pthread_t tid;
c1d2c60b	53	int cpu_affinity;
3c24913f	54	struct cds_list_head list;
b57aee66 PM	55	} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
b57aee66 PM	56
3c24913f PM	57	/*
	58	* List of all call_rcu_data structures to keep valgrind happy.
	59	* Protected by call_rcu_mutex.
	60	*/
	61
	62	CDS_LIST_HEAD(call_rcu_data_list);
	63
b57aee66 PM	64	/* Link a thread using call_rcu() to its call_rcu thread. */
	65
	66	static __thread struct call_rcu_data *thread_call_rcu_data;
	67
	68	/* Guard call_rcu thread creation. */
	69
	70	static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
	71
	72	/* If a given thread does not have its own call_rcu thread, this is default. */
	73
	74	static struct call_rcu_data *default_call_rcu_data;
	75
b57aee66 PM	76	/*
	77	* If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
	78	* available, then we can have call_rcu threads assigned to individual
	79	* CPUs rather than only to specific threads.
	80	*/
	81
	82	#if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)
	83
	84	/*
	85	* Pointer to array of pointers to per-CPU call_rcu_data structures
	86	* and # CPUs.
	87	*/
	88
	89	static struct call_rcu_data **per_cpu_call_rcu_data;
	90	static long maxcpus;
	91
263e3cf9 MD	92	static void call_rcu_wait(struct call_rcu_data *crdp)
	93	{
	94	/* Read call_rcu list before read futex */
	95	cmm_smp_mb();
	96	if (uatomic_read(&crdp->futex) == -1)
	97	futex_async(&crdp->futex, FUTEX_WAIT, -1,
	98	NULL, NULL, 0);
	99	}
	100
	101	static void call_rcu_wake_up(struct call_rcu_data *crdp)
	102	{
	103	/* Write to call_rcu list before reading/writing futex */
	104	cmm_smp_mb();
	105	if (unlikely(uatomic_read(&crdp->futex) == -1)) {
	106	uatomic_set(&crdp->futex, 0);
	107	futex_async(&crdp->futex, FUTEX_WAKE, 1,
	108	NULL, NULL, 0);
	109	}
	110	}
	111
b57aee66 PM	112	/* Allocate the array if it has not already been allocated. */
	113
	114	static void alloc_cpu_call_rcu_data(void)
	115	{
	116	struct call_rcu_data **p;
	117	static int warned = 0;
	118
	119	if (maxcpus != 0)
	120	return;
	121	maxcpus = sysconf(_SC_NPROCESSORS_CONF);
	122	if (maxcpus <= 0) {
	123	return;
	124	}
	125	p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
	126	if (p != NULL) {
	127	memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
	128	per_cpu_call_rcu_data = p;
	129	} else {
	130	if (!warned) {
	131	fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
	132	}
	133	warned = 1;
	134	}
	135	}
	136
	137	#else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
	138
	139	static const struct call_rcu_data **per_cpu_call_rcu_data = NULL;
	140	static const long maxcpus = -1;
	141
	142	static void alloc_cpu_call_rcu_data(void)
	143	{
	144	}
	145
	146	static int sched_getcpu(void)
	147	{
	148	return -1;
	149	}
	150
	151	#endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
	152
	153	/* Acquire the specified pthread mutex. */
	154
	155	static void call_rcu_lock(pthread_mutex_t *pmp)
	156	{
	157	if (pthread_mutex_lock(pmp) != 0) {
	158	perror("pthread_mutex_lock");
	159	exit(-1);
	160	}
	161	}
	162
	163	/* Release the specified pthread mutex. */
	164
	165	static void call_rcu_unlock(pthread_mutex_t *pmp)
	166	{
	167	if (pthread_mutex_unlock(pmp) != 0) {
	168	perror("pthread_mutex_unlock");
	169	exit(-1);
	170	}
	171	}
	172
c1d2c60b MD	173	#if HAVE_SCHED_SETAFFINITY
	174	static
	175	int set_thread_cpu_affinity(struct call_rcu_data *crdp)
	176	{
	177	cpu_set_t mask;
	178
	179	if (crdp->cpu_affinity < 0)
	180	return 0;
	181
	182	CPU_ZERO(&mask);
	183	CPU_SET(crdp->cpu_affinity, &mask);
	184	#if SCHED_SETAFFINITY_ARGS == 2
	185	return sched_setaffinity(0, &mask);
	186	#else
	187	return sched_setaffinity(0, sizeof(mask), &mask);
	188	#endif
	189	}
	190	#else
	191	static
	192	int set_thread_cpu_affinity(struct call_rcu_data *crdp)
	193	{
	194	return 0;
	195	}
	196	#endif
	197
b57aee66 PM	198	/* This is the code run by each call_rcu thread. */
	199
	200	static void call_rcu_thread(void arg)
	201	{
	202	unsigned long cbcount;
	203	struct cds_wfq_node *cbs;
	204	struct cds_wfq_node **cbs_tail;
	205	struct call_rcu_data crdp = (struct call_rcu_data )arg;
	206	struct rcu_head *rhp;
	207
c1d2c60b MD	208	if (set_thread_cpu_affinity(crdp) != 0) {
	209	perror("pthread_setaffinity_np");
	210	exit(-1);
	211	}
	212
b57aee66 PM	213	thread_call_rcu_data = crdp;
	214	for (;;) {
	215	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
	216	while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
	217	poll(NULL, 0, 1);
	218	_CMM_STORE_SHARED(crdp->cbs.head, NULL);
	219	cbs_tail = (struct cds_wfq_node **)
	220	uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
	221	synchronize_rcu();
	222	cbcount = 0;
	223	do {
	224	while (cbs->next == NULL &&
	225	&cbs->next != cbs_tail)
	226	poll(NULL, 0, 1);
	227	if (cbs == &crdp->cbs.dummy) {
	228	cbs = cbs->next;
	229	continue;
	230	}
	231	rhp = (struct rcu_head *)cbs;
	232	cbs = cbs->next;
	233	rhp->func(rhp);
	234	cbcount++;
	235	} while (cbs != NULL);
	236	uatomic_sub(&crdp->qlen, cbcount);
	237	}
7106ddf8 PM	238	if (crdp->flags & URCU_CALL_RCU_STOP)
7106ddf8 PM	239	break;
b57aee66 PM	240	if (crdp->flags & URCU_CALL_RCU_RT)
	241	poll(NULL, 0, 10);
	242	else {
263e3cf9 MD	243	if (&crdp->cbs.head == _CMM_LOAD_SHARED(crdp->cbs.tail))
263e3cf9 MD	244	call_rcu_wait(crdp);
b57aee66	245	poll(NULL, 0, 10);
b57aee66 PM	246	}
b57aee66 PM	247	}
7106ddf8 PM	248	call_rcu_lock(&crdp->mtx);
	249	crdp->flags \|= URCU_CALL_RCU_STOPPED;
	250	call_rcu_unlock(&crdp->mtx);
	251	return NULL;
b57aee66 PM	252	}
	253
	254	/*
	255	* Create both a call_rcu thread and the corresponding call_rcu_data
3c24913f PM	256	* structure, linking the structure in as specified. Caller must hold
3c24913f PM	257	* call_rcu_mutex.
b57aee66 PM	258	*/
b57aee66 PM	259
3c24913f	260	static void call_rcu_data_init(struct call_rcu_data **crdpp,
c1d2c60b MD	261	unsigned long flags,
c1d2c60b MD	262	int cpu_affinity)
b57aee66 PM	263	{
	264	struct call_rcu_data *crdp;
	265
	266	crdp = malloc(sizeof(*crdp));
	267	if (crdp == NULL) {
	268	fprintf(stderr, "Out of memory.\n");
	269	exit(-1);
	270	}
	271	memset(crdp, '\0', sizeof(*crdp));
	272	cds_wfq_init(&crdp->cbs);
	273	crdp->qlen = 0;
	274	if (pthread_mutex_init(&crdp->mtx, NULL) != 0) {
	275	perror("pthread_mutex_init");
	276	exit(-1);
	277	}
263e3cf9 MD	278	crdp->futex = 0;
263e3cf9 MD	279	crdp->flags = flags;
3c24913f	280	cds_list_add(&crdp->list, &call_rcu_data_list);
c1d2c60b	281	crdp->cpu_affinity = cpu_affinity;
b57aee66 PM	282	cmm_smp_mb(); /* Structure initialized before pointer is planted. */
	283	*crdpp = crdp;
	284	if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
	285	perror("pthread_create");
	286	exit(-1);
	287	}
	288	}
	289
	290	/*
	291	* Return a pointer to the call_rcu_data structure for the specified
	292	* CPU, returning NULL if there is none. We cannot automatically
	293	* created it because the platform we are running on might not define
	294	* sched_getcpu().
	295	*/
	296
	297	struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
	298	{
	299	static int warned = 0;
	300
	301	if (per_cpu_call_rcu_data == NULL)
	302	return NULL;
	303	if (!warned && maxcpus > 0 && (cpu < 0 \|\| maxcpus <= cpu)) {
	304	fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
	305	warned = 1;
	306	}
	307	if (cpu < 0 \|\| maxcpus <= cpu)
	308	return NULL;
	309	return per_cpu_call_rcu_data[cpu];
	310	}
	311
	312	/*
	313	* Return the tid corresponding to the call_rcu thread whose
	314	* call_rcu_data structure is specified.
	315	*/
	316
	317	pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
	318	{
	319	return crdp->tid;
	320	}
	321
	322	/*
	323	* Create a call_rcu_data structure (with thread) and return a pointer.
	324	*/
	325
c1d2c60b MD	326	static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
c1d2c60b MD	327	int cpu_affinity)
b57aee66 PM	328	{
	329	struct call_rcu_data *crdp;
	330
c1d2c60b	331	call_rcu_data_init(&crdp, flags, cpu_affinity);
b57aee66 PM	332	return crdp;
	333	}
	334
c1d2c60b MD	335	struct call_rcu_data *create_call_rcu_data(unsigned long flags,
c1d2c60b MD	336	int cpu_affinity)
3c24913f PM	337	{
	338	struct call_rcu_data *crdp;
	339
	340	call_rcu_lock(&call_rcu_mutex);
c1d2c60b	341	crdp = __create_call_rcu_data(flags, cpu_affinity);
3c24913f PM	342	call_rcu_unlock(&call_rcu_mutex);
	343	return crdp;
	344	}
	345
b57aee66 PM	346	/*
b57aee66 PM	347	* Set the specified CPU to use the specified call_rcu_data structure.
7106ddf8 PM	348	*
	349	* Use NULL to remove a CPU's call_rcu_data structure, but it is
	350	* the caller's responsibility to dispose of the removed structure.
	351	* Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
	352	* (prior to NULLing it out, of course).
b57aee66 PM	353	*/
	354
	355	int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
	356	{
	357	int warned = 0;
	358
	359	call_rcu_lock(&call_rcu_mutex);
	360	if (cpu < 0 \|\| maxcpus <= cpu) {
	361	if (!warned) {
	362	fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
	363	warned = 1;
	364	}
	365	call_rcu_unlock(&call_rcu_mutex);
	366	errno = EINVAL;
	367	return -EINVAL;
	368	}
	369	alloc_cpu_call_rcu_data();
	370	call_rcu_unlock(&call_rcu_mutex);
	371	if (per_cpu_call_rcu_data == NULL) {
	372	errno = ENOMEM;
	373	return -ENOMEM;
	374	}
	375	per_cpu_call_rcu_data[cpu] = crdp;
	376	return 0;
	377	}
	378
	379	/*
	380	* Return a pointer to the default call_rcu_data structure, creating
	381	* one if need be. Because we never free call_rcu_data structures,
	382	* we don't need to be in an RCU read-side critical section.
	383	*/
	384
	385	struct call_rcu_data *get_default_call_rcu_data(void)
	386	{
	387	if (default_call_rcu_data != NULL)
	388	return rcu_dereference(default_call_rcu_data);
	389	call_rcu_lock(&call_rcu_mutex);
	390	if (default_call_rcu_data != NULL) {
	391	call_rcu_unlock(&call_rcu_mutex);
	392	return default_call_rcu_data;
	393	}
c1d2c60b	394	call_rcu_data_init(&default_call_rcu_data, 0, -1);
b57aee66 PM	395	call_rcu_unlock(&call_rcu_mutex);
	396	return default_call_rcu_data;
	397	}
	398
	399	/*
	400	* Return the call_rcu_data structure that applies to the currently
	401	* running thread. Any call_rcu_data structure assigned specifically
	402	* to this thread has first priority, followed by any call_rcu_data
	403	* structure assigned to the CPU on which the thread is running,
	404	* followed by the default call_rcu_data structure. If there is not
	405	* yet a default call_rcu_data structure, one will be created.
	406	*/
	407	struct call_rcu_data *get_call_rcu_data(void)
	408	{
	409	int curcpu;
	410	static int warned = 0;
	411
	412	if (thread_call_rcu_data != NULL)
	413	return thread_call_rcu_data;
	414	if (maxcpus <= 0)
	415	return get_default_call_rcu_data();
	416	curcpu = sched_getcpu();
	417	if (!warned && (curcpu < 0 \|\| maxcpus <= curcpu)) {
	418	fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
	419	warned = 1;
	420	}
	421	if (curcpu >= 0 && maxcpus > curcpu &&
	422	per_cpu_call_rcu_data != NULL &&
	423	per_cpu_call_rcu_data[curcpu] != NULL)
	424	return per_cpu_call_rcu_data[curcpu];
	425	return get_default_call_rcu_data();
	426	}
	427
	428	/*
	429	* Return a pointer to this task's call_rcu_data if there is one.
	430	*/
	431
	432	struct call_rcu_data *get_thread_call_rcu_data(void)
	433	{
	434	return thread_call_rcu_data;
	435	}
	436
	437	/*
	438	* Set this task's call_rcu_data structure as specified, regardless
	439	* of whether or not this task already had one. (This allows switching
	440	* to and from real-time call_rcu threads, for example.)
7106ddf8 PM	441	*
	442	* Use NULL to remove a thread's call_rcu_data structure, but it is
	443	* the caller's responsibility to dispose of the removed structure.
	444	* Use get_thread_call_rcu_data() to obtain a pointer to the old structure
	445	* (prior to NULLing it out, of course).
b57aee66 PM	446	*/
	447
	448	void set_thread_call_rcu_data(struct call_rcu_data *crdp)
	449	{
	450	thread_call_rcu_data = crdp;
	451	}
	452
	453	/*
	454	* Create a separate call_rcu thread for each CPU. This does not
	455	* replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
	456	* function if you want that behavior.
	457	*/
	458
	459	int create_all_cpu_call_rcu_data(unsigned long flags)
	460	{
	461	int i;
	462	struct call_rcu_data *crdp;
	463	int ret;
	464
	465	call_rcu_lock(&call_rcu_mutex);
	466	alloc_cpu_call_rcu_data();
	467	call_rcu_unlock(&call_rcu_mutex);
	468	if (maxcpus <= 0) {
	469	errno = EINVAL;
	470	return -EINVAL;
	471	}
	472	if (per_cpu_call_rcu_data == NULL) {
	473	errno = ENOMEM;
	474	return -ENOMEM;
	475	}
	476	for (i = 0; i < maxcpus; i++) {
	477	call_rcu_lock(&call_rcu_mutex);
	478	if (get_cpu_call_rcu_data(i)) {
	479	call_rcu_unlock(&call_rcu_mutex);
	480	continue;
	481	}
c1d2c60b	482	crdp = __create_call_rcu_data(flags, i);
b57aee66 PM	483	if (crdp == NULL) {
	484	call_rcu_unlock(&call_rcu_mutex);
	485	errno = ENOMEM;
	486	return -ENOMEM;
	487	}
	488	call_rcu_unlock(&call_rcu_mutex);
	489	if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
	490	/* FIXME: Leaks crdp for now. */
	491	return ret; /* Can happen on race. */
	492	}
	493	}
	494	return 0;
	495	}
	496
7106ddf8 PM	497	/*
	498	* Wake up the call_rcu thread corresponding to the specified
	499	* call_rcu_data structure.
	500	*/
	501	static void wake_call_rcu_thread(struct call_rcu_data *crdp)
	502	{
263e3cf9 MD	503	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
263e3cf9 MD	504	call_rcu_wake_up(crdp);
7106ddf8 PM	505	}
7106ddf8 PM	506
b57aee66 PM	507	/*
	508	* Schedule a function to be invoked after a following grace period.
	509	* This is the only function that must be called -- the others are
	510	* only present to allow applications to tune their use of RCU for
	511	* maximum performance.
	512	*
	513	* Note that unless a call_rcu thread has not already been created,
	514	* the first invocation of call_rcu() will create one. So, if you
	515	* need the first invocation of call_rcu() to be fast, make sure
	516	* to create a call_rcu thread first. One way to accomplish this is
	517	* "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
	518	*/
	519
	520	void call_rcu(struct rcu_head *head,
	521	void (func)(struct rcu_head head))
	522	{
	523	struct call_rcu_data *crdp;
	524
	525	cds_wfq_node_init(&head->next);
	526	head->func = func;
	527	crdp = get_call_rcu_data();
	528	cds_wfq_enqueue(&crdp->cbs, &head->next);
	529	uatomic_inc(&crdp->qlen);
7106ddf8 PM	530	wake_call_rcu_thread(crdp);
	531	}
	532
	533	/*
	534	* Free up the specified call_rcu_data structure, terminating the
	535	* associated call_rcu thread. The caller must have previously
	536	* removed the call_rcu_data structure from per-thread or per-CPU
	537	* usage. For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
	538	* call_rcu_data structures or set_thread_call_rcu_data(NULL) for
	539	* per-thread call_rcu_data structures.
	540	*
	541	* We silently refuse to free up the default call_rcu_data structure
	542	* because that is where we put any leftover callbacks. Note that
	543	* the possibility of self-spawning callbacks makes it impossible
	544	* to execute all the callbacks in finite time without putting any
	545	* newly spawned callbacks somewhere else. The "somewhere else" of
	546	* last resort is the default call_rcu_data structure.
	547	*
	548	* We also silently refuse to free NULL pointers. This simplifies
	549	* the calling code.
	550	*/
	551	void call_rcu_data_free(struct call_rcu_data *crdp)
	552	{
	553	struct cds_wfq_node *cbs;
	554	struct cds_wfq_node **cbs_tail;
	555	struct cds_wfq_node **cbs_endprev;
	556
	557	if (crdp == NULL \|\| crdp == default_call_rcu_data) {
	558	return;
	559	}
	560	if ((crdp->flags & URCU_CALL_RCU_STOPPED) == 0) {
b57aee66	561	call_rcu_lock(&crdp->mtx);
7106ddf8	562	crdp->flags \|= URCU_CALL_RCU_STOP;
b57aee66	563	call_rcu_unlock(&crdp->mtx);
7106ddf8 PM	564	wake_call_rcu_thread(crdp);
	565	while ((crdp->flags & URCU_CALL_RCU_STOPPED) == 0)
	566	poll(NULL, 0, 1);
	567	}
	568	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
	569	while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
	570	poll(NULL, 0, 1);
	571	_CMM_STORE_SHARED(crdp->cbs.head, NULL);
	572	cbs_tail = (struct cds_wfq_node **)
	573	uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
	574	cbs_endprev = (struct cds_wfq_node **)
	575	uatomic_xchg(&default_call_rcu_data, cbs_tail);
	576	*cbs_endprev = cbs;
	577	uatomic_add(&default_call_rcu_data->qlen,
	578	uatomic_read(&crdp->qlen));
	579	cds_list_del(&crdp->list);
	580	free(crdp);
	581	}
	582	}
	583
	584	/*
	585	* Clean up all the per-CPU call_rcu threads.
	586	*/
	587	void free_all_cpu_call_rcu_data(void)
	588	{
	589	int cpu;
	590	struct call_rcu_data *crdp;
	591
	592	if (maxcpus <= 0)
	593	return;
	594	for (cpu = 0; cpu < maxcpus; cpu++) {
	595	crdp = get_cpu_call_rcu_data(cpu);
	596	if (crdp == NULL)
	597	continue;
	598	set_cpu_call_rcu_data(cpu, NULL);
	599	call_rcu_data_free(crdp);
	600	}
	601	}
	602
81ad2e19 PM	603	/*
	604	* Acquire the call_rcu_mutex in order to ensure that the child sees
	605	* all of the call_rcu() data structures in a consistent state.
	606	* Suitable for pthread_atfork() and friends.
	607	*/
	608	void call_rcu_before_fork(void)
	609	{
	610	call_rcu_lock(&call_rcu_mutex);
	611	}
	612
	613	/*
	614	* Clean up call_rcu data structures in the parent of a successful fork()
	615	* that is not followed by exec() in the child. Suitable for
	616	* pthread_atfork() and friends.
	617	*/
	618	void call_rcu_after_fork_parent(void)
	619	{
	620	call_rcu_unlock(&call_rcu_mutex);
	621	}
	622
7106ddf8 PM	623	/*
7106ddf8 PM	624	* Clean up call_rcu data structures in the child of a successful fork()
81ad2e19 PM	625	* that is not followed by exec(). Suitable for pthread_atfork() and
81ad2e19 PM	626	* friends.
7106ddf8 PM	627	*/
	628	void call_rcu_after_fork_child(void)
	629	{
	630	struct call_rcu_data *crdp;
	631
81ad2e19 PM	632	/* Release the mutex. */
	633	call_rcu_unlock(&call_rcu_mutex);
	634
7106ddf8 PM	635	/*
	636	* Allocate a new default call_rcu_data structure in order
	637	* to get a working call_rcu thread to go with it.
	638	*/
	639	default_call_rcu_data = NULL;
	640	(void)get_default_call_rcu_data();
	641
	642	/* Dispose of all of the rest of the call_rcu_data structures. */
	643	while (call_rcu_data_list.next != call_rcu_data_list.prev) {
	644	crdp = cds_list_entry(call_rcu_data_list.prev,
	645	struct call_rcu_data, list);
	646	if (crdp == default_call_rcu_data)
	647	crdp = cds_list_entry(crdp->list.prev,
	648	struct call_rcu_data, list);
	649	crdp->flags = URCU_CALL_RCU_STOPPED;
	650	call_rcu_data_free(crdp);
b57aee66 PM	651	}
b57aee66 PM	652	}