[urcu.git] / urcu-call-rcu-impl.h

/*
 * urcu-call-rcu.c
 *
 * Userspace RCU library - batch memory reclamation with kernel API
 *
 * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#define _GNU_SOURCE
#include <stdio.h>
#include <pthread.h>
#include <signal.h>
#include <assert.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <poll.h>
#include <sys/time.h>
#include <unistd.h>
#include <sched.h>

#include "config.h"
#include "urcu/wfqueue.h"
#include "urcu-call-rcu.h"
#include "urcu-pointer.h"
#include "urcu/list.h"
#include "urcu/futex.h"

/* Data structure that identifies a call_rcu thread. */

struct call_rcu_data {
	struct cds_wfq_queue cbs;
	unsigned long flags;
	int32_t futex;
	unsigned long qlen; /* maintained for debugging. */
	pthread_t tid;
	int cpu_affinity;
	struct cds_list_head list;
} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));

/*
 * List of all call_rcu_data structures to keep valgrind happy.
 * Protected by call_rcu_mutex.
 */

CDS_LIST_HEAD(call_rcu_data_list);

/* Link a thread using call_rcu() to its call_rcu thread. */

static __thread struct call_rcu_data *thread_call_rcu_data;

/* Guard call_rcu thread creation. */

static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;

/* If a given thread does not have its own call_rcu thread, this is default. */

static struct call_rcu_data *default_call_rcu_data;

/*
 * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
 * available, then we can have call_rcu threads assigned to individual
 * CPUs rather than only to specific threads.
 */

#if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)

/*
 * Pointer to array of pointers to per-CPU call_rcu_data structures
 * and # CPUs.
 */

static struct call_rcu_data **per_cpu_call_rcu_data;
static long maxcpus;

/* Allocate the array if it has not already been allocated. */

static void alloc_cpu_call_rcu_data(void)
{
	struct call_rcu_data **p;
	static int warned = 0;

	if (maxcpus != 0)
		return;
	maxcpus = sysconf(_SC_NPROCESSORS_CONF);
	if (maxcpus <= 0) {
		return;
	}
	p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
	if (p != NULL) {
		memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
		per_cpu_call_rcu_data = p;
	} else {
		if (!warned) {
			fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
		}
		warned = 1;
	}
}

#else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */

/*
 * per_cpu_call_rcu_data should be constant, but some functions below, used both
 * for cases where cpu number is available and not available, assume it it not
 * constant.
 */
static struct call_rcu_data **per_cpu_call_rcu_data = NULL;
static const long maxcpus = -1;

static void alloc_cpu_call_rcu_data(void)
{
}

static int sched_getcpu(void)
{
	return -1;
}

#endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */

/* Acquire the specified pthread mutex. */

static void call_rcu_lock(pthread_mutex_t *pmp)
{
	if (pthread_mutex_lock(pmp) != 0) {
		perror("pthread_mutex_lock");
		exit(-1);
	}
}

/* Release the specified pthread mutex. */

static void call_rcu_unlock(pthread_mutex_t *pmp)
{
	if (pthread_mutex_unlock(pmp) != 0) {
		perror("pthread_mutex_unlock");
		exit(-1);
	}
}

#if HAVE_SCHED_SETAFFINITY
static
int set_thread_cpu_affinity(struct call_rcu_data *crdp)
{
	cpu_set_t mask;

	if (crdp->cpu_affinity < 0)
		return 0;

	CPU_ZERO(&mask);
	CPU_SET(crdp->cpu_affinity, &mask);
#if SCHED_SETAFFINITY_ARGS == 2
	return sched_setaffinity(0, &mask);
#else
	return sched_setaffinity(0, sizeof(mask), &mask);
#endif
}
#else
static
int set_thread_cpu_affinity(struct call_rcu_data *crdp)
{
	return 0;
}
#endif

static void call_rcu_wait(struct call_rcu_data *crdp)
{
	/* Read call_rcu list before read futex */
	cmm_smp_mb();
	if (uatomic_read(&crdp->futex) == -1)
		futex_async(&crdp->futex, FUTEX_WAIT, -1,
		      NULL, NULL, 0);
}

static void call_rcu_wake_up(struct call_rcu_data *crdp)
{
	/* Write to call_rcu list before reading/writing futex */
	cmm_smp_mb();
	if (unlikely(uatomic_read(&crdp->futex) == -1)) {
		uatomic_set(&crdp->futex, 0);
		futex_async(&crdp->futex, FUTEX_WAKE, 1,
		      NULL, NULL, 0);
	}
}

/* This is the code run by each call_rcu thread. */

static void *call_rcu_thread(void *arg)
{
	unsigned long cbcount;
	struct cds_wfq_node *cbs;
	struct cds_wfq_node **cbs_tail;
	struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
	struct rcu_head *rhp;
	int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);

	if (set_thread_cpu_affinity(crdp) != 0) {
		perror("pthread_setaffinity_np");
		exit(-1);
	}

	thread_call_rcu_data = crdp;
	if (!rt) {
		uatomic_dec(&crdp->futex);
		/* Decrement futex before reading call_rcu list */
		cmm_smp_mb();
	}
	for (;;) {
		if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
			while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
				poll(NULL, 0, 1);
			_CMM_STORE_SHARED(crdp->cbs.head, NULL);
			cbs_tail = (struct cds_wfq_node **)
				uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
			synchronize_rcu();
			cbcount = 0;
			do {
				while (cbs->next == NULL &&
				       &cbs->next != cbs_tail)
				       	poll(NULL, 0, 1);
				if (cbs == &crdp->cbs.dummy) {
					cbs = cbs->next;
					continue;
				}
				rhp = (struct rcu_head *)cbs;
				cbs = cbs->next;
				rhp->func(rhp);
				cbcount++;
			} while (cbs != NULL);
			uatomic_sub(&crdp->qlen, cbcount);
		}
		if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
			break;
		if (!rt) {
			if (&crdp->cbs.head
			    == _CMM_LOAD_SHARED(crdp->cbs.tail)) {
				call_rcu_wait(crdp);
				poll(NULL, 0, 10);
				uatomic_dec(&crdp->futex);
				/*
				 * Decrement futex before reading
				 * call_rcu list.
				 */
				cmm_smp_mb();
			} else {
				poll(NULL, 0, 10);
			}
		} else {
			poll(NULL, 0, 10);
		}
	}
	if (!rt) {
		/*
		 * Read call_rcu list before write futex.
		 */
		cmm_smp_mb();
		uatomic_set(&crdp->futex, 0);
	}
	uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
	return NULL;
}

/*
 * Create both a call_rcu thread and the corresponding call_rcu_data
 * structure, linking the structure in as specified.  Caller must hold
 * call_rcu_mutex.
 */

static void call_rcu_data_init(struct call_rcu_data **crdpp,
			       unsigned long flags,
			       int cpu_affinity)
{
	struct call_rcu_data *crdp;

	crdp = malloc(sizeof(*crdp));
	if (crdp == NULL) {
		fprintf(stderr, "Out of memory.\n");
		exit(-1);
	}
	memset(crdp, '\0', sizeof(*crdp));
	cds_wfq_init(&crdp->cbs);
	crdp->qlen = 0;
	crdp->futex = 0;
	crdp->flags = flags;
	cds_list_add(&crdp->list, &call_rcu_data_list);
	crdp->cpu_affinity = cpu_affinity;
	cmm_smp_mb();  /* Structure initialized before pointer is planted. */
	*crdpp = crdp;
	if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
		perror("pthread_create");
		exit(-1);
	}
}

/*
 * Return a pointer to the call_rcu_data structure for the specified
 * CPU, returning NULL if there is none.  We cannot automatically
 * created it because the platform we are running on might not define
 * sched_getcpu().
 */

struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
{
	static int warned = 0;

	if (per_cpu_call_rcu_data == NULL)
		return NULL;
	if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
		fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
		warned = 1;
	}
	if (cpu < 0 || maxcpus <= cpu)
		return NULL;
	return per_cpu_call_rcu_data[cpu];
}

/*
 * Return the tid corresponding to the call_rcu thread whose
 * call_rcu_data structure is specified.
 */

pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
{
	return crdp->tid;
}

/*
 * Create a call_rcu_data structure (with thread) and return a pointer.
 */

static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
						    int cpu_affinity)
{
	struct call_rcu_data *crdp;

	call_rcu_data_init(&crdp, flags, cpu_affinity);
	return crdp;
}

struct call_rcu_data *create_call_rcu_data(unsigned long flags,
					   int cpu_affinity)
{
	struct call_rcu_data *crdp;

	call_rcu_lock(&call_rcu_mutex);
	crdp = __create_call_rcu_data(flags, cpu_affinity);
	call_rcu_unlock(&call_rcu_mutex);
	return crdp;
}

/*
 * Set the specified CPU to use the specified call_rcu_data structure.
 *
 * Use NULL to remove a CPU's call_rcu_data structure, but it is
 * the caller's responsibility to dispose of the removed structure.
 * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 * (prior to NULLing it out, of course).
 */

int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
{
	int warned = 0;

	call_rcu_lock(&call_rcu_mutex);
	if (cpu < 0 || maxcpus <= cpu) {
		if (!warned) {
			fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
			warned = 1;
		}
		call_rcu_unlock(&call_rcu_mutex);
		errno = EINVAL;
		return -EINVAL;
	}
	alloc_cpu_call_rcu_data();
	call_rcu_unlock(&call_rcu_mutex);
	if (per_cpu_call_rcu_data == NULL) {
		errno = ENOMEM;
		return -ENOMEM;
	}
	per_cpu_call_rcu_data[cpu] = crdp;
	return 0;
}

/*
 * Return a pointer to the default call_rcu_data structure, creating
 * one if need be.  Because we never free call_rcu_data structures,
 * we don't need to be in an RCU read-side critical section.
 */

struct call_rcu_data *get_default_call_rcu_data(void)
{
	if (default_call_rcu_data != NULL)
		return rcu_dereference(default_call_rcu_data);
	call_rcu_lock(&call_rcu_mutex);
	if (default_call_rcu_data != NULL) {
		call_rcu_unlock(&call_rcu_mutex);
		return default_call_rcu_data;
	}
	call_rcu_data_init(&default_call_rcu_data, 0, -1);
	call_rcu_unlock(&call_rcu_mutex);
	return default_call_rcu_data;
}

/*
 * Return the call_rcu_data structure that applies to the currently
 * running thread.  Any call_rcu_data structure assigned specifically
 * to this thread has first priority, followed by any call_rcu_data
 * structure assigned to the CPU on which the thread is running,
 * followed by the default call_rcu_data structure.  If there is not
 * yet a default call_rcu_data structure, one will be created.
 */
struct call_rcu_data *get_call_rcu_data(void)
{
	int curcpu;
	static int warned = 0;

	if (thread_call_rcu_data != NULL)
		return thread_call_rcu_data;
	if (maxcpus <= 0)
		return get_default_call_rcu_data();
	curcpu = sched_getcpu();
	if (!warned && (curcpu < 0 || maxcpus <= curcpu)) {
		fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
		warned = 1;
	}
	if (curcpu >= 0 && maxcpus > curcpu &&
	    per_cpu_call_rcu_data != NULL &&
	    per_cpu_call_rcu_data[curcpu] != NULL)
	    	return per_cpu_call_rcu_data[curcpu];
	return get_default_call_rcu_data();
}

/*
 * Return a pointer to this task's call_rcu_data if there is one.
 */

struct call_rcu_data *get_thread_call_rcu_data(void)
{
	return thread_call_rcu_data;
}

/*
 * Set this task's call_rcu_data structure as specified, regardless
 * of whether or not this task already had one.  (This allows switching
 * to and from real-time call_rcu threads, for example.)
 *
 * Use NULL to remove a thread's call_rcu_data structure, but it is
 * the caller's responsibility to dispose of the removed structure.
 * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 * (prior to NULLing it out, of course).
 */

void set_thread_call_rcu_data(struct call_rcu_data *crdp)
{
	thread_call_rcu_data = crdp;
}

/*
 * Create a separate call_rcu thread for each CPU.  This does not
 * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 * function if you want that behavior.
 */

int create_all_cpu_call_rcu_data(unsigned long flags)
{
	int i;
	struct call_rcu_data *crdp;
	int ret;

	call_rcu_lock(&call_rcu_mutex);
	alloc_cpu_call_rcu_data();
	call_rcu_unlock(&call_rcu_mutex);
	if (maxcpus <= 0) {
		errno = EINVAL;
		return -EINVAL;
	}
	if (per_cpu_call_rcu_data == NULL) {
		errno = ENOMEM;
		return -ENOMEM;
	}
	for (i = 0; i < maxcpus; i++) {
		call_rcu_lock(&call_rcu_mutex);
		if (get_cpu_call_rcu_data(i)) {
			call_rcu_unlock(&call_rcu_mutex);
			continue;
		}
		crdp = __create_call_rcu_data(flags, i);
		if (crdp == NULL) {
			call_rcu_unlock(&call_rcu_mutex);
			errno = ENOMEM;
			return -ENOMEM;
		}
		call_rcu_unlock(&call_rcu_mutex);
		if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
			/* FIXME: Leaks crdp for now. */
			return ret; /* Can happen on race. */
		}
	}
	return 0;
}

/*
 * Wake up the call_rcu thread corresponding to the specified
 * call_rcu_data structure.
 */
static void wake_call_rcu_thread(struct call_rcu_data *crdp)
{
	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
		call_rcu_wake_up(crdp);
}

/*
 * Schedule a function to be invoked after a following grace period.
 * This is the only function that must be called -- the others are
 * only present to allow applications to tune their use of RCU for
 * maximum performance.
 *
 * Note that unless a call_rcu thread has not already been created,
 * the first invocation of call_rcu() will create one.  So, if you
 * need the first invocation of call_rcu() to be fast, make sure
 * to create a call_rcu thread first.  One way to accomplish this is
 * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 */

void call_rcu(struct rcu_head *head,
	      void (*func)(struct rcu_head *head))
{
	struct call_rcu_data *crdp;

	cds_wfq_node_init(&head->next);
	head->func = func;
	crdp = get_call_rcu_data();
	cds_wfq_enqueue(&crdp->cbs, &head->next);
	uatomic_inc(&crdp->qlen);
	wake_call_rcu_thread(crdp);
}

/*
 * Free up the specified call_rcu_data structure, terminating the
 * associated call_rcu thread.  The caller must have previously
 * removed the call_rcu_data structure from per-thread or per-CPU
 * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 * per-thread call_rcu_data structures.
 *
 * We silently refuse to free up the default call_rcu_data structure
 * because that is where we put any leftover callbacks.  Note that
 * the possibility of self-spawning callbacks makes it impossible
 * to execute all the callbacks in finite time without putting any
 * newly spawned callbacks somewhere else.  The "somewhere else" of
 * last resort is the default call_rcu_data structure.
 *
 * We also silently refuse to free NULL pointers.  This simplifies
 * the calling code.
 */
void call_rcu_data_free(struct call_rcu_data *crdp)
{
	struct cds_wfq_node *cbs;
	struct cds_wfq_node **cbs_tail;
	struct cds_wfq_node **cbs_endprev;

	if (crdp == NULL || crdp == default_call_rcu_data) {
		return;
	}
	if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
		uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
		wake_call_rcu_thread(crdp);
		while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
			poll(NULL, 0, 1);
	}
	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
		while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
			poll(NULL, 0, 1);
		_CMM_STORE_SHARED(crdp->cbs.head, NULL);
		cbs_tail = (struct cds_wfq_node **)
			uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
		cbs_endprev = (struct cds_wfq_node **)
			uatomic_xchg(&default_call_rcu_data, cbs_tail);
		*cbs_endprev = cbs;
		uatomic_add(&default_call_rcu_data->qlen,
			    uatomic_read(&crdp->qlen));
		cds_list_del(&crdp->list);
		free(crdp);
	}
}

/*
 * Clean up all the per-CPU call_rcu threads.
 */
void free_all_cpu_call_rcu_data(void)
{
	int cpu;
	struct call_rcu_data *crdp;

	if (maxcpus <= 0)
		return;
	for (cpu = 0; cpu < maxcpus; cpu++) {
		crdp = get_cpu_call_rcu_data(cpu);
		if (crdp == NULL)
			continue;
		set_cpu_call_rcu_data(cpu, NULL);
		call_rcu_data_free(crdp);
	}
}

/*
 * Acquire the call_rcu_mutex in order to ensure that the child sees
 * all of the call_rcu() data structures in a consistent state.
 * Suitable for pthread_atfork() and friends.
 */
void call_rcu_before_fork(void)
{
	call_rcu_lock(&call_rcu_mutex);
}

/*
 * Clean up call_rcu data structures in the parent of a successful fork()
 * that is not followed by exec() in the child.  Suitable for
 * pthread_atfork() and friends.
 */
void call_rcu_after_fork_parent(void)
{
	call_rcu_unlock(&call_rcu_mutex);
}

/*
 * Clean up call_rcu data structures in the child of a successful fork()
 * that is not followed by exec().  Suitable for pthread_atfork() and
 * friends.
 */
void call_rcu_after_fork_child(void)
{
	struct call_rcu_data *crdp;

	/* Release the mutex. */
	call_rcu_unlock(&call_rcu_mutex);

	/*
	 * Allocate a new default call_rcu_data structure in order
	 * to get a working call_rcu thread to go with it.
	 */
	default_call_rcu_data = NULL;
	(void)get_default_call_rcu_data();

	/* Dispose of all of the rest of the call_rcu_data structures. */
	while (call_rcu_data_list.next != call_rcu_data_list.prev) {
		crdp = cds_list_entry(call_rcu_data_list.prev,
				      struct call_rcu_data, list);
		if (crdp == default_call_rcu_data)
			crdp = cds_list_entry(crdp->list.prev,
					      struct call_rcu_data, list);
		uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
		call_rcu_data_free(crdp);
	}
}
Commit	Line	Data
b57aee66 PM	1	/*
	2	* urcu-call-rcu.c
	3	*
	4	* Userspace RCU library - batch memory reclamation with kernel API
	5	*
	6	* Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	7	*
	8	* This library is free software; you can redistribute it and/or
	9	* modify it under the terms of the GNU Lesser General Public
	10	* License as published by the Free Software Foundation; either
	11	* version 2.1 of the License, or (at your option) any later version.
	12	*
	13	* This library is distributed in the hope that it will be useful,
	14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	* Lesser General Public License for more details.
	17	*
	18	* You should have received a copy of the GNU Lesser General Public
	19	* License along with this library; if not, write to the Free Software
	20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	21	*/
	22
c1d2c60b	23	#define _GNU_SOURCE
b57aee66 PM	24	#include <stdio.h>
	25	#include <pthread.h>
	26	#include <signal.h>
	27	#include <assert.h>
	28	#include <stdlib.h>
6d841bc2	29	#include <stdint.h>
b57aee66 PM	30	#include <string.h>
	31	#include <errno.h>
	32	#include <poll.h>
	33	#include <sys/time.h>
b57aee66	34	#include <unistd.h>
c1d2c60b	35	#include <sched.h>
b57aee66 PM	36
	37	#include "config.h"
	38	#include "urcu/wfqueue.h"
	39	#include "urcu-call-rcu.h"
	40	#include "urcu-pointer.h"
3c24913f	41	#include "urcu/list.h"
41849996	42	#include "urcu/futex.h"
b57aee66 PM	43
	44	/* Data structure that identifies a call_rcu thread. */
	45
	46	struct call_rcu_data {
	47	struct cds_wfq_queue cbs;
	48	unsigned long flags;
6d841bc2	49	int32_t futex;
73987721	50	unsigned long qlen; /* maintained for debugging. */
b57aee66	51	pthread_t tid;
c1d2c60b	52	int cpu_affinity;
3c24913f	53	struct cds_list_head list;
b57aee66 PM	54	} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
b57aee66 PM	55
3c24913f PM	56	/*
	57	* List of all call_rcu_data structures to keep valgrind happy.
	58	* Protected by call_rcu_mutex.
	59	*/
	60
	61	CDS_LIST_HEAD(call_rcu_data_list);
	62
b57aee66 PM	63	/* Link a thread using call_rcu() to its call_rcu thread. */
	64
	65	static __thread struct call_rcu_data *thread_call_rcu_data;
	66
	67	/* Guard call_rcu thread creation. */
	68
	69	static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
	70
	71	/* If a given thread does not have its own call_rcu thread, this is default. */
	72
	73	static struct call_rcu_data *default_call_rcu_data;
	74
b57aee66 PM	75	/*
	76	* If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
	77	* available, then we can have call_rcu threads assigned to individual
	78	* CPUs rather than only to specific threads.
	79	*/
	80
	81	#if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)
	82
	83	/*
	84	* Pointer to array of pointers to per-CPU call_rcu_data structures
	85	* and # CPUs.
	86	*/
	87
	88	static struct call_rcu_data **per_cpu_call_rcu_data;
	89	static long maxcpus;
	90
	91	/* Allocate the array if it has not already been allocated. */
	92
	93	static void alloc_cpu_call_rcu_data(void)
	94	{
	95	struct call_rcu_data **p;
	96	static int warned = 0;
	97
	98	if (maxcpus != 0)
	99	return;
	100	maxcpus = sysconf(_SC_NPROCESSORS_CONF);
	101	if (maxcpus <= 0) {
	102	return;
	103	}
	104	p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
	105	if (p != NULL) {
	106	memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
	107	per_cpu_call_rcu_data = p;
	108	} else {
	109	if (!warned) {
	110	fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
	111	}
	112	warned = 1;
	113	}
	114	}
	115
	116	#else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
	117
f9437098 MD	118	/*
	119	* per_cpu_call_rcu_data should be constant, but some functions below, used both
	120	* for cases where cpu number is available and not available, assume it it not
	121	* constant.
	122	*/
	123	static struct call_rcu_data **per_cpu_call_rcu_data = NULL;
b57aee66 PM	124	static const long maxcpus = -1;
	125
	126	static void alloc_cpu_call_rcu_data(void)
	127	{
	128	}
	129
	130	static int sched_getcpu(void)
	131	{
	132	return -1;
	133	}
	134
	135	#endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
	136
	137	/* Acquire the specified pthread mutex. */
	138
	139	static void call_rcu_lock(pthread_mutex_t *pmp)
	140	{
	141	if (pthread_mutex_lock(pmp) != 0) {
	142	perror("pthread_mutex_lock");
	143	exit(-1);
	144	}
	145	}
	146
	147	/* Release the specified pthread mutex. */
	148
	149	static void call_rcu_unlock(pthread_mutex_t *pmp)
	150	{
	151	if (pthread_mutex_unlock(pmp) != 0) {
	152	perror("pthread_mutex_unlock");
	153	exit(-1);
	154	}
	155	}
	156
c1d2c60b MD	157	#if HAVE_SCHED_SETAFFINITY
	158	static
	159	int set_thread_cpu_affinity(struct call_rcu_data *crdp)
	160	{
	161	cpu_set_t mask;
	162
	163	if (crdp->cpu_affinity < 0)
	164	return 0;
	165
	166	CPU_ZERO(&mask);
	167	CPU_SET(crdp->cpu_affinity, &mask);
	168	#if SCHED_SETAFFINITY_ARGS == 2
	169	return sched_setaffinity(0, &mask);
	170	#else
	171	return sched_setaffinity(0, sizeof(mask), &mask);
	172	#endif
	173	}
	174	#else
	175	static
	176	int set_thread_cpu_affinity(struct call_rcu_data *crdp)
	177	{
	178	return 0;
	179	}
	180	#endif
	181
03fe58b3 MD	182	static void call_rcu_wait(struct call_rcu_data *crdp)
	183	{
	184	/* Read call_rcu list before read futex */
	185	cmm_smp_mb();
	186	if (uatomic_read(&crdp->futex) == -1)
	187	futex_async(&crdp->futex, FUTEX_WAIT, -1,
	188	NULL, NULL, 0);
	189	}
	190
	191	static void call_rcu_wake_up(struct call_rcu_data *crdp)
	192	{
	193	/* Write to call_rcu list before reading/writing futex */
	194	cmm_smp_mb();
	195	if (unlikely(uatomic_read(&crdp->futex) == -1)) {
	196	uatomic_set(&crdp->futex, 0);
	197	futex_async(&crdp->futex, FUTEX_WAKE, 1,
	198	NULL, NULL, 0);
	199	}
	200	}
	201
b57aee66 PM	202	/* This is the code run by each call_rcu thread. */
	203
	204	static void call_rcu_thread(void arg)
	205	{
	206	unsigned long cbcount;
	207	struct cds_wfq_node *cbs;
	208	struct cds_wfq_node **cbs_tail;
	209	struct call_rcu_data crdp = (struct call_rcu_data )arg;
	210	struct rcu_head *rhp;
2870aa1e	211	int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);
b57aee66	212
c1d2c60b MD	213	if (set_thread_cpu_affinity(crdp) != 0) {
	214	perror("pthread_setaffinity_np");
	215	exit(-1);
	216	}
	217
b57aee66	218	thread_call_rcu_data = crdp;
bc94ca9b MD	219	if (!rt) {
	220	uatomic_dec(&crdp->futex);
	221	/* Decrement futex before reading call_rcu list */
	222	cmm_smp_mb();
	223	}
b57aee66 PM	224	for (;;) {
	225	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
	226	while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
	227	poll(NULL, 0, 1);
	228	_CMM_STORE_SHARED(crdp->cbs.head, NULL);
	229	cbs_tail = (struct cds_wfq_node **)
	230	uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
	231	synchronize_rcu();
	232	cbcount = 0;
	233	do {
	234	while (cbs->next == NULL &&
	235	&cbs->next != cbs_tail)
	236	poll(NULL, 0, 1);
	237	if (cbs == &crdp->cbs.dummy) {
	238	cbs = cbs->next;
	239	continue;
	240	}
	241	rhp = (struct rcu_head *)cbs;
	242	cbs = cbs->next;
	243	rhp->func(rhp);
	244	cbcount++;
	245	} while (cbs != NULL);
	246	uatomic_sub(&crdp->qlen, cbcount);
	247	}
bc94ca9b MD	248	if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
	249	break;
	250	if (!rt) {
	251	if (&crdp->cbs.head
	252	== _CMM_LOAD_SHARED(crdp->cbs.tail)) {
	253	call_rcu_wait(crdp);
	254	poll(NULL, 0, 10);
	255	uatomic_dec(&crdp->futex);
c768e45e	256	/*
bc94ca9b MD	257	* Decrement futex before reading
bc94ca9b MD	258	* call_rcu list.
c768e45e MD	259	*/
c768e45e MD	260	cmm_smp_mb();
ccbac24d MD	261	} else {
ccbac24d MD	262	poll(NULL, 0, 10);
c768e45e	263	}
bc94ca9b MD	264	} else {
bc94ca9b MD	265	poll(NULL, 0, 10);
b57aee66	266	}
bc94ca9b MD	267	}
	268	if (!rt) {
	269	/*
	270	* Read call_rcu list before write futex.
	271	*/
	272	cmm_smp_mb();
	273	uatomic_set(&crdp->futex, 0);
b57aee66	274	}
2870aa1e	275	uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
7106ddf8	276	return NULL;
b57aee66 PM	277	}
	278
	279	/*
	280	* Create both a call_rcu thread and the corresponding call_rcu_data
3c24913f PM	281	* structure, linking the structure in as specified. Caller must hold
3c24913f PM	282	* call_rcu_mutex.
b57aee66 PM	283	*/
b57aee66 PM	284
3c24913f	285	static void call_rcu_data_init(struct call_rcu_data **crdpp,
c1d2c60b MD	286	unsigned long flags,
c1d2c60b MD	287	int cpu_affinity)
b57aee66 PM	288	{
	289	struct call_rcu_data *crdp;
	290
	291	crdp = malloc(sizeof(*crdp));
	292	if (crdp == NULL) {
	293	fprintf(stderr, "Out of memory.\n");
	294	exit(-1);
	295	}
	296	memset(crdp, '\0', sizeof(*crdp));
	297	cds_wfq_init(&crdp->cbs);
	298	crdp->qlen = 0;
263e3cf9 MD	299	crdp->futex = 0;
263e3cf9 MD	300	crdp->flags = flags;
3c24913f	301	cds_list_add(&crdp->list, &call_rcu_data_list);
c1d2c60b	302	crdp->cpu_affinity = cpu_affinity;
b57aee66 PM	303	cmm_smp_mb(); /* Structure initialized before pointer is planted. */
	304	*crdpp = crdp;
	305	if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
	306	perror("pthread_create");
	307	exit(-1);
	308	}
	309	}
	310
	311	/*
	312	* Return a pointer to the call_rcu_data structure for the specified
	313	* CPU, returning NULL if there is none. We cannot automatically
	314	* created it because the platform we are running on might not define
	315	* sched_getcpu().
	316	*/
	317
	318	struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
	319	{
	320	static int warned = 0;
	321
	322	if (per_cpu_call_rcu_data == NULL)
	323	return NULL;
	324	if (!warned && maxcpus > 0 && (cpu < 0 \|\| maxcpus <= cpu)) {
	325	fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
	326	warned = 1;
	327	}
	328	if (cpu < 0 \|\| maxcpus <= cpu)
	329	return NULL;
	330	return per_cpu_call_rcu_data[cpu];
	331	}
	332
	333	/*
	334	* Return the tid corresponding to the call_rcu thread whose
	335	* call_rcu_data structure is specified.
	336	*/
	337
	338	pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
	339	{
	340	return crdp->tid;
	341	}
	342
	343	/*
	344	* Create a call_rcu_data structure (with thread) and return a pointer.
	345	*/
	346
c1d2c60b MD	347	static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
c1d2c60b MD	348	int cpu_affinity)
b57aee66 PM	349	{
	350	struct call_rcu_data *crdp;
	351
c1d2c60b	352	call_rcu_data_init(&crdp, flags, cpu_affinity);
b57aee66 PM	353	return crdp;
	354	}
	355
c1d2c60b MD	356	struct call_rcu_data *create_call_rcu_data(unsigned long flags,
c1d2c60b MD	357	int cpu_affinity)
3c24913f PM	358	{
	359	struct call_rcu_data *crdp;
	360
	361	call_rcu_lock(&call_rcu_mutex);
c1d2c60b	362	crdp = __create_call_rcu_data(flags, cpu_affinity);
3c24913f PM	363	call_rcu_unlock(&call_rcu_mutex);
	364	return crdp;
	365	}
	366
b57aee66 PM	367	/*
b57aee66 PM	368	* Set the specified CPU to use the specified call_rcu_data structure.
7106ddf8 PM	369	*
	370	* Use NULL to remove a CPU's call_rcu_data structure, but it is
	371	* the caller's responsibility to dispose of the removed structure.
	372	* Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
	373	* (prior to NULLing it out, of course).
b57aee66 PM	374	*/
	375
	376	int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
	377	{
	378	int warned = 0;
	379
	380	call_rcu_lock(&call_rcu_mutex);
	381	if (cpu < 0 \|\| maxcpus <= cpu) {
	382	if (!warned) {
	383	fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
	384	warned = 1;
	385	}
	386	call_rcu_unlock(&call_rcu_mutex);
	387	errno = EINVAL;
	388	return -EINVAL;
	389	}
	390	alloc_cpu_call_rcu_data();
	391	call_rcu_unlock(&call_rcu_mutex);
	392	if (per_cpu_call_rcu_data == NULL) {
	393	errno = ENOMEM;
	394	return -ENOMEM;
	395	}
	396	per_cpu_call_rcu_data[cpu] = crdp;
	397	return 0;
	398	}
	399
	400	/*
	401	* Return a pointer to the default call_rcu_data structure, creating
	402	* one if need be. Because we never free call_rcu_data structures,
	403	* we don't need to be in an RCU read-side critical section.
	404	*/
	405
	406	struct call_rcu_data *get_default_call_rcu_data(void)
	407	{
	408	if (default_call_rcu_data != NULL)
	409	return rcu_dereference(default_call_rcu_data);
	410	call_rcu_lock(&call_rcu_mutex);
	411	if (default_call_rcu_data != NULL) {
	412	call_rcu_unlock(&call_rcu_mutex);
	413	return default_call_rcu_data;
	414	}
c1d2c60b	415	call_rcu_data_init(&default_call_rcu_data, 0, -1);
b57aee66 PM	416	call_rcu_unlock(&call_rcu_mutex);
	417	return default_call_rcu_data;
	418	}
	419
	420	/*
	421	* Return the call_rcu_data structure that applies to the currently
	422	* running thread. Any call_rcu_data structure assigned specifically
	423	* to this thread has first priority, followed by any call_rcu_data
	424	* structure assigned to the CPU on which the thread is running,
	425	* followed by the default call_rcu_data structure. If there is not
	426	* yet a default call_rcu_data structure, one will be created.
	427	*/
	428	struct call_rcu_data *get_call_rcu_data(void)
	429	{
	430	int curcpu;
	431	static int warned = 0;
	432
	433	if (thread_call_rcu_data != NULL)
	434	return thread_call_rcu_data;
	435	if (maxcpus <= 0)
	436	return get_default_call_rcu_data();
	437	curcpu = sched_getcpu();
	438	if (!warned && (curcpu < 0 \|\| maxcpus <= curcpu)) {
	439	fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
	440	warned = 1;
	441	}
	442	if (curcpu >= 0 && maxcpus > curcpu &&
	443	per_cpu_call_rcu_data != NULL &&
	444	per_cpu_call_rcu_data[curcpu] != NULL)
	445	return per_cpu_call_rcu_data[curcpu];
	446	return get_default_call_rcu_data();
	447	}
	448
	449	/*
	450	* Return a pointer to this task's call_rcu_data if there is one.
	451	*/
	452
	453	struct call_rcu_data *get_thread_call_rcu_data(void)
	454	{
	455	return thread_call_rcu_data;
	456	}
	457
	458	/*
	459	* Set this task's call_rcu_data structure as specified, regardless
	460	* of whether or not this task already had one. (This allows switching
	461	* to and from real-time call_rcu threads, for example.)
7106ddf8 PM	462	*
	463	* Use NULL to remove a thread's call_rcu_data structure, but it is
	464	* the caller's responsibility to dispose of the removed structure.
	465	* Use get_thread_call_rcu_data() to obtain a pointer to the old structure
	466	* (prior to NULLing it out, of course).
b57aee66 PM	467	*/
	468
	469	void set_thread_call_rcu_data(struct call_rcu_data *crdp)
	470	{
	471	thread_call_rcu_data = crdp;
	472	}
	473
	474	/*
	475	* Create a separate call_rcu thread for each CPU. This does not
	476	* replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
	477	* function if you want that behavior.
	478	*/
	479
	480	int create_all_cpu_call_rcu_data(unsigned long flags)
	481	{
	482	int i;
	483	struct call_rcu_data *crdp;
	484	int ret;
	485
	486	call_rcu_lock(&call_rcu_mutex);
	487	alloc_cpu_call_rcu_data();
	488	call_rcu_unlock(&call_rcu_mutex);
	489	if (maxcpus <= 0) {
	490	errno = EINVAL;
	491	return -EINVAL;
	492	}
	493	if (per_cpu_call_rcu_data == NULL) {
	494	errno = ENOMEM;
	495	return -ENOMEM;
	496	}
	497	for (i = 0; i < maxcpus; i++) {
	498	call_rcu_lock(&call_rcu_mutex);
	499	if (get_cpu_call_rcu_data(i)) {
	500	call_rcu_unlock(&call_rcu_mutex);
	501	continue;
	502	}
c1d2c60b	503	crdp = __create_call_rcu_data(flags, i);
b57aee66 PM	504	if (crdp == NULL) {
	505	call_rcu_unlock(&call_rcu_mutex);
	506	errno = ENOMEM;
	507	return -ENOMEM;
	508	}
	509	call_rcu_unlock(&call_rcu_mutex);
	510	if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
	511	/* FIXME: Leaks crdp for now. */
	512	return ret; /* Can happen on race. */
	513	}
	514	}
	515	return 0;
	516	}
	517
7106ddf8 PM	518	/*
	519	* Wake up the call_rcu thread corresponding to the specified
	520	* call_rcu_data structure.
	521	*/
	522	static void wake_call_rcu_thread(struct call_rcu_data *crdp)
	523	{
263e3cf9 MD	524	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
263e3cf9 MD	525	call_rcu_wake_up(crdp);
7106ddf8 PM	526	}
7106ddf8 PM	527
b57aee66 PM	528	/*
	529	* Schedule a function to be invoked after a following grace period.
	530	* This is the only function that must be called -- the others are
	531	* only present to allow applications to tune their use of RCU for
	532	* maximum performance.
	533	*
	534	* Note that unless a call_rcu thread has not already been created,
	535	* the first invocation of call_rcu() will create one. So, if you
	536	* need the first invocation of call_rcu() to be fast, make sure
	537	* to create a call_rcu thread first. One way to accomplish this is
	538	* "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
	539	*/
	540
	541	void call_rcu(struct rcu_head *head,
	542	void (func)(struct rcu_head head))
	543	{
	544	struct call_rcu_data *crdp;
	545
	546	cds_wfq_node_init(&head->next);
	547	head->func = func;
	548	crdp = get_call_rcu_data();
	549	cds_wfq_enqueue(&crdp->cbs, &head->next);
	550	uatomic_inc(&crdp->qlen);
7106ddf8 PM	551	wake_call_rcu_thread(crdp);
	552	}
	553
	554	/*
	555	* Free up the specified call_rcu_data structure, terminating the
	556	* associated call_rcu thread. The caller must have previously
	557	* removed the call_rcu_data structure from per-thread or per-CPU
	558	* usage. For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
	559	* call_rcu_data structures or set_thread_call_rcu_data(NULL) for
	560	* per-thread call_rcu_data structures.
	561	*
	562	* We silently refuse to free up the default call_rcu_data structure
	563	* because that is where we put any leftover callbacks. Note that
	564	* the possibility of self-spawning callbacks makes it impossible
	565	* to execute all the callbacks in finite time without putting any
	566	* newly spawned callbacks somewhere else. The "somewhere else" of
	567	* last resort is the default call_rcu_data structure.
	568	*
	569	* We also silently refuse to free NULL pointers. This simplifies
	570	* the calling code.
	571	*/
	572	void call_rcu_data_free(struct call_rcu_data *crdp)
	573	{
	574	struct cds_wfq_node *cbs;
	575	struct cds_wfq_node **cbs_tail;
	576	struct cds_wfq_node **cbs_endprev;
	577
	578	if (crdp == NULL \|\| crdp == default_call_rcu_data) {
	579	return;
	580	}
2870aa1e PB	581	if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
2870aa1e PB	582	uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
7106ddf8	583	wake_call_rcu_thread(crdp);
2870aa1e	584	while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
7106ddf8 PM	585	poll(NULL, 0, 1);
	586	}
	587	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
	588	while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
	589	poll(NULL, 0, 1);
	590	_CMM_STORE_SHARED(crdp->cbs.head, NULL);
	591	cbs_tail = (struct cds_wfq_node **)
	592	uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
	593	cbs_endprev = (struct cds_wfq_node **)
	594	uatomic_xchg(&default_call_rcu_data, cbs_tail);
	595	*cbs_endprev = cbs;
	596	uatomic_add(&default_call_rcu_data->qlen,
	597	uatomic_read(&crdp->qlen));
	598	cds_list_del(&crdp->list);
	599	free(crdp);
	600	}
	601	}
	602
	603	/*
	604	* Clean up all the per-CPU call_rcu threads.
	605	*/
	606	void free_all_cpu_call_rcu_data(void)
	607	{
	608	int cpu;
	609	struct call_rcu_data *crdp;
	610
	611	if (maxcpus <= 0)
	612	return;
	613	for (cpu = 0; cpu < maxcpus; cpu++) {
	614	crdp = get_cpu_call_rcu_data(cpu);
	615	if (crdp == NULL)
	616	continue;
	617	set_cpu_call_rcu_data(cpu, NULL);
	618	call_rcu_data_free(crdp);
	619	}
	620	}
	621
81ad2e19 PM	622	/*
	623	* Acquire the call_rcu_mutex in order to ensure that the child sees
	624	* all of the call_rcu() data structures in a consistent state.
	625	* Suitable for pthread_atfork() and friends.
	626	*/
	627	void call_rcu_before_fork(void)
	628	{
	629	call_rcu_lock(&call_rcu_mutex);
	630	}
	631
	632	/*
	633	* Clean up call_rcu data structures in the parent of a successful fork()
	634	* that is not followed by exec() in the child. Suitable for
	635	* pthread_atfork() and friends.
	636	*/
	637	void call_rcu_after_fork_parent(void)
	638	{
	639	call_rcu_unlock(&call_rcu_mutex);
	640	}
	641
7106ddf8 PM	642	/*
7106ddf8 PM	643	* Clean up call_rcu data structures in the child of a successful fork()
81ad2e19 PM	644	* that is not followed by exec(). Suitable for pthread_atfork() and
81ad2e19 PM	645	* friends.
7106ddf8 PM	646	*/
	647	void call_rcu_after_fork_child(void)
	648	{
	649	struct call_rcu_data *crdp;
	650
81ad2e19 PM	651	/* Release the mutex. */
	652	call_rcu_unlock(&call_rcu_mutex);
	653
7106ddf8 PM	654	/*
	655	* Allocate a new default call_rcu_data structure in order
	656	* to get a working call_rcu thread to go with it.
	657	*/
	658	default_call_rcu_data = NULL;
	659	(void)get_default_call_rcu_data();
	660
	661	/* Dispose of all of the rest of the call_rcu_data structures. */
	662	while (call_rcu_data_list.next != call_rcu_data_list.prev) {
	663	crdp = cds_list_entry(call_rcu_data_list.prev,
	664	struct call_rcu_data, list);
	665	if (crdp == default_call_rcu_data)
	666	crdp = cds_list_entry(crdp->list.prev,
	667	struct call_rcu_data, list);
2870aa1e	668	uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
7106ddf8	669	call_rcu_data_free(crdp);
b57aee66 PM	670	}
b57aee66 PM	671	}