[urcu.git] / urcu-call-rcu-impl.h

/*
 * urcu-call-rcu.c
 *
 * Userspace RCU library - batch memory reclamation with kernel API
 *
 * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#define _GNU_SOURCE
#include <stdio.h>
#include <pthread.h>
#include <signal.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <poll.h>
#include <sys/time.h>
#include <syscall.h>
#include <unistd.h>
#include <sched.h>

#include "config.h"
#include "urcu/wfqueue.h"
#include "urcu-call-rcu.h"
#include "urcu-pointer.h"
#include "urcu/list.h"

/* Data structure that identifies a call_rcu thread. */

struct call_rcu_data {
	struct cds_wfq_queue cbs;
	unsigned long flags;
	pthread_mutex_t mtx;
	pthread_cond_t cond;
	unsigned long qlen;
	pthread_t tid;
	int cpu_affinity;
	struct cds_list_head list;
} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));

/*
 * List of all call_rcu_data structures to keep valgrind happy.
 * Protected by call_rcu_mutex.
 */

CDS_LIST_HEAD(call_rcu_data_list);

/* Link a thread using call_rcu() to its call_rcu thread. */

static __thread struct call_rcu_data *thread_call_rcu_data;

/* Guard call_rcu thread creation. */

static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;

/* If a given thread does not have its own call_rcu thread, this is default. */

static struct call_rcu_data *default_call_rcu_data;

/*
 * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
 * available, then we can have call_rcu threads assigned to individual
 * CPUs rather than only to specific threads.
 */

#if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)

/*
 * Pointer to array of pointers to per-CPU call_rcu_data structures
 * and # CPUs.
 */

static struct call_rcu_data **per_cpu_call_rcu_data;
static long maxcpus;

/* Allocate the array if it has not already been allocated. */

static void alloc_cpu_call_rcu_data(void)
{
	struct call_rcu_data **p;
	static int warned = 0;

	if (maxcpus != 0)
		return;
	maxcpus = sysconf(_SC_NPROCESSORS_CONF);
	if (maxcpus <= 0) {
		return;
	}
	p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
	if (p != NULL) {
		memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
		per_cpu_call_rcu_data = p;
	} else {
		if (!warned) {
			fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
		}
		warned = 1;
	}
}

#else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */

static const struct call_rcu_data **per_cpu_call_rcu_data = NULL;
static const long maxcpus = -1;

static void alloc_cpu_call_rcu_data(void)
{
}

static int sched_getcpu(void)
{
	return -1;
}

#endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */

/* Acquire the specified pthread mutex. */

static void call_rcu_lock(pthread_mutex_t *pmp)
{
	if (pthread_mutex_lock(pmp) != 0) {
		perror("pthread_mutex_lock");
		exit(-1);
	}
}

/* Release the specified pthread mutex. */

static void call_rcu_unlock(pthread_mutex_t *pmp)
{
	if (pthread_mutex_unlock(pmp) != 0) {
		perror("pthread_mutex_unlock");
		exit(-1);
	}
}

#if HAVE_SCHED_SETAFFINITY
static
int set_thread_cpu_affinity(struct call_rcu_data *crdp)
{
	cpu_set_t mask;

	if (crdp->cpu_affinity < 0)
		return 0;

	CPU_ZERO(&mask);
	CPU_SET(crdp->cpu_affinity, &mask);
#if SCHED_SETAFFINITY_ARGS == 2
	return sched_setaffinity(0, &mask);
#else
	return sched_setaffinity(0, sizeof(mask), &mask);
#endif
}
#else
static
int set_thread_cpu_affinity(struct call_rcu_data *crdp)
{
	return 0;
}
#endif

/* This is the code run by each call_rcu thread. */

static void *call_rcu_thread(void *arg)
{
	unsigned long cbcount;
	struct cds_wfq_node *cbs;
	struct cds_wfq_node **cbs_tail;
	struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
	struct rcu_head *rhp;

	if (set_thread_cpu_affinity(crdp) != 0) {
		perror("pthread_setaffinity_np");
		exit(-1);
	}

	thread_call_rcu_data = crdp;
	for (;;) {
		if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
			while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
				poll(NULL, 0, 1);
			_CMM_STORE_SHARED(crdp->cbs.head, NULL);
			cbs_tail = (struct cds_wfq_node **)
				uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
			synchronize_rcu();
			cbcount = 0;
			do {
				while (cbs->next == NULL &&
				       &cbs->next != cbs_tail)
				       	poll(NULL, 0, 1);
				if (cbs == &crdp->cbs.dummy) {
					cbs = cbs->next;
					continue;
				}
				rhp = (struct rcu_head *)cbs;
				cbs = cbs->next;
				rhp->func(rhp);
				cbcount++;
			} while (cbs != NULL);
			uatomic_sub(&crdp->qlen, cbcount);
		}
		if (crdp->flags & URCU_CALL_RCU_STOP)
			break;
		if (crdp->flags & URCU_CALL_RCU_RT)
			poll(NULL, 0, 10);
		else {
			call_rcu_lock(&crdp->mtx);
			_CMM_STORE_SHARED(crdp->flags,
				     crdp->flags & ~URCU_CALL_RCU_RUNNING);
			if (&crdp->cbs.head ==
			    _CMM_LOAD_SHARED(crdp->cbs.tail) &&
			    pthread_cond_wait(&crdp->cond, &crdp->mtx) != 0) {
				perror("pthread_cond_wait");
				exit(-1);
			}
			_CMM_STORE_SHARED(crdp->flags,
				     crdp->flags | URCU_CALL_RCU_RUNNING);
			poll(NULL, 0, 10);
			call_rcu_unlock(&crdp->mtx);
		}
	}
	call_rcu_lock(&crdp->mtx);
	crdp->flags |= URCU_CALL_RCU_STOPPED;
	call_rcu_unlock(&crdp->mtx);
	return NULL;
}

/*
 * Create both a call_rcu thread and the corresponding call_rcu_data
 * structure, linking the structure in as specified.  Caller must hold
 * call_rcu_mutex.
 */

static void call_rcu_data_init(struct call_rcu_data **crdpp,
			       unsigned long flags,
			       int cpu_affinity)
{
	struct call_rcu_data *crdp;

	crdp = malloc(sizeof(*crdp));
	if (crdp == NULL) {
		fprintf(stderr, "Out of memory.\n");
		exit(-1);
	}
	memset(crdp, '\0', sizeof(*crdp));
	cds_wfq_init(&crdp->cbs);
	crdp->qlen = 0;
	if (pthread_mutex_init(&crdp->mtx, NULL) != 0) {
		perror("pthread_mutex_init");
		exit(-1);
	}
	if (pthread_cond_init(&crdp->cond, NULL) != 0) {
		perror("pthread_cond_init");
		exit(-1);
	}
	crdp->flags = flags | URCU_CALL_RCU_RUNNING;
	cds_list_add(&crdp->list, &call_rcu_data_list);
	crdp->cpu_affinity = cpu_affinity;
	cmm_smp_mb();  /* Structure initialized before pointer is planted. */
	*crdpp = crdp;
	if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
		perror("pthread_create");
		exit(-1);
	}
}

/*
 * Return a pointer to the call_rcu_data structure for the specified
 * CPU, returning NULL if there is none.  We cannot automatically
 * created it because the platform we are running on might not define
 * sched_getcpu().
 */

struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
{
	static int warned = 0;

	if (per_cpu_call_rcu_data == NULL)
		return NULL;
	if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
		fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
		warned = 1;
	}
	if (cpu < 0 || maxcpus <= cpu)
		return NULL;
	return per_cpu_call_rcu_data[cpu];
}

/*
 * Return the tid corresponding to the call_rcu thread whose
 * call_rcu_data structure is specified.
 */

pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
{
	return crdp->tid;
}

/*
 * Create a call_rcu_data structure (with thread) and return a pointer.
 */

static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
						    int cpu_affinity)
{
	struct call_rcu_data *crdp;

	call_rcu_data_init(&crdp, flags, cpu_affinity);
	return crdp;
}

struct call_rcu_data *create_call_rcu_data(unsigned long flags,
					   int cpu_affinity)
{
	struct call_rcu_data *crdp;

	call_rcu_lock(&call_rcu_mutex);
	crdp = __create_call_rcu_data(flags, cpu_affinity);
	call_rcu_unlock(&call_rcu_mutex);
	return crdp;
}

/*
 * Set the specified CPU to use the specified call_rcu_data structure.
 *
 * Use NULL to remove a CPU's call_rcu_data structure, but it is
 * the caller's responsibility to dispose of the removed structure.
 * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 * (prior to NULLing it out, of course).
 */

int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
{
	int warned = 0;

	call_rcu_lock(&call_rcu_mutex);
	if (cpu < 0 || maxcpus <= cpu) {
		if (!warned) {
			fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
			warned = 1;
		}
		call_rcu_unlock(&call_rcu_mutex);
		errno = EINVAL;
		return -EINVAL;
	}
	alloc_cpu_call_rcu_data();
	call_rcu_unlock(&call_rcu_mutex);
	if (per_cpu_call_rcu_data == NULL) {
		errno = ENOMEM;
		return -ENOMEM;
	}
	per_cpu_call_rcu_data[cpu] = crdp;
	return 0;
}

/*
 * Return a pointer to the default call_rcu_data structure, creating
 * one if need be.  Because we never free call_rcu_data structures,
 * we don't need to be in an RCU read-side critical section.
 */

struct call_rcu_data *get_default_call_rcu_data(void)
{
	if (default_call_rcu_data != NULL)
		return rcu_dereference(default_call_rcu_data);
	call_rcu_lock(&call_rcu_mutex);
	if (default_call_rcu_data != NULL) {
		call_rcu_unlock(&call_rcu_mutex);
		return default_call_rcu_data;
	}
	call_rcu_data_init(&default_call_rcu_data, 0, -1);
	call_rcu_unlock(&call_rcu_mutex);
	return default_call_rcu_data;
}

/*
 * Return the call_rcu_data structure that applies to the currently
 * running thread.  Any call_rcu_data structure assigned specifically
 * to this thread has first priority, followed by any call_rcu_data
 * structure assigned to the CPU on which the thread is running,
 * followed by the default call_rcu_data structure.  If there is not
 * yet a default call_rcu_data structure, one will be created.
 */
struct call_rcu_data *get_call_rcu_data(void)
{
	int curcpu;
	static int warned = 0;

	if (thread_call_rcu_data != NULL)
		return thread_call_rcu_data;
	if (maxcpus <= 0)
		return get_default_call_rcu_data();
	curcpu = sched_getcpu();
	if (!warned && (curcpu < 0 || maxcpus <= curcpu)) {
		fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
		warned = 1;
	}
	if (curcpu >= 0 && maxcpus > curcpu &&
	    per_cpu_call_rcu_data != NULL &&
	    per_cpu_call_rcu_data[curcpu] != NULL)
	    	return per_cpu_call_rcu_data[curcpu];
	return get_default_call_rcu_data();
}

/*
 * Return a pointer to this task's call_rcu_data if there is one.
 */

struct call_rcu_data *get_thread_call_rcu_data(void)
{
	return thread_call_rcu_data;
}

/*
 * Set this task's call_rcu_data structure as specified, regardless
 * of whether or not this task already had one.  (This allows switching
 * to and from real-time call_rcu threads, for example.)
 *
 * Use NULL to remove a thread's call_rcu_data structure, but it is
 * the caller's responsibility to dispose of the removed structure.
 * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 * (prior to NULLing it out, of course).
 */

void set_thread_call_rcu_data(struct call_rcu_data *crdp)
{
	thread_call_rcu_data = crdp;
}

/*
 * Create a separate call_rcu thread for each CPU.  This does not
 * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 * function if you want that behavior.
 */

int create_all_cpu_call_rcu_data(unsigned long flags)
{
	int i;
	struct call_rcu_data *crdp;
	int ret;

	call_rcu_lock(&call_rcu_mutex);
	alloc_cpu_call_rcu_data();
	call_rcu_unlock(&call_rcu_mutex);
	if (maxcpus <= 0) {
		errno = EINVAL;
		return -EINVAL;
	}
	if (per_cpu_call_rcu_data == NULL) {
		errno = ENOMEM;
		return -ENOMEM;
	}
	for (i = 0; i < maxcpus; i++) {
		call_rcu_lock(&call_rcu_mutex);
		if (get_cpu_call_rcu_data(i)) {
			call_rcu_unlock(&call_rcu_mutex);
			continue;
		}
		crdp = __create_call_rcu_data(flags, i);
		if (crdp == NULL) {
			call_rcu_unlock(&call_rcu_mutex);
			errno = ENOMEM;
			return -ENOMEM;
		}
		call_rcu_unlock(&call_rcu_mutex);
		if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
			/* FIXME: Leaks crdp for now. */
			return ret; /* Can happen on race. */
		}
	}
	return 0;
}

/*
 * Wake up the call_rcu thread corresponding to the specified
 * call_rcu_data structure.
 */
static void wake_call_rcu_thread(struct call_rcu_data *crdp)
{
	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT)) {
		call_rcu_lock(&crdp->mtx);
		if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RUNNING)) {
			if (pthread_cond_signal(&crdp->cond) != 0) {
				perror("pthread_cond_signal");
				exit(-1);
			}
		}
		call_rcu_unlock(&crdp->mtx);
	}
}

/*
 * Schedule a function to be invoked after a following grace period.
 * This is the only function that must be called -- the others are
 * only present to allow applications to tune their use of RCU for
 * maximum performance.
 *
 * Note that unless a call_rcu thread has not already been created,
 * the first invocation of call_rcu() will create one.  So, if you
 * need the first invocation of call_rcu() to be fast, make sure
 * to create a call_rcu thread first.  One way to accomplish this is
 * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 */

void call_rcu(struct rcu_head *head,
	      void (*func)(struct rcu_head *head))
{
	struct call_rcu_data *crdp;

	cds_wfq_node_init(&head->next);
	head->func = func;
	crdp = get_call_rcu_data();
	cds_wfq_enqueue(&crdp->cbs, &head->next);
	uatomic_inc(&crdp->qlen);
	wake_call_rcu_thread(crdp);
}

/*
 * Free up the specified call_rcu_data structure, terminating the
 * associated call_rcu thread.  The caller must have previously
 * removed the call_rcu_data structure from per-thread or per-CPU
 * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 * per-thread call_rcu_data structures.
 *
 * We silently refuse to free up the default call_rcu_data structure
 * because that is where we put any leftover callbacks.  Note that
 * the possibility of self-spawning callbacks makes it impossible
 * to execute all the callbacks in finite time without putting any
 * newly spawned callbacks somewhere else.  The "somewhere else" of
 * last resort is the default call_rcu_data structure.
 *
 * We also silently refuse to free NULL pointers.  This simplifies
 * the calling code.
 */
void call_rcu_data_free(struct call_rcu_data *crdp)
{
	struct cds_wfq_node *cbs;
	struct cds_wfq_node **cbs_tail;
	struct cds_wfq_node **cbs_endprev;

	if (crdp == NULL || crdp == default_call_rcu_data) {
		return;
	}
	if ((crdp->flags & URCU_CALL_RCU_STOPPED) == 0) {
		call_rcu_lock(&crdp->mtx);
		crdp->flags |= URCU_CALL_RCU_STOP;
		call_rcu_unlock(&crdp->mtx);
		wake_call_rcu_thread(crdp);
		while ((crdp->flags & URCU_CALL_RCU_STOPPED) == 0)
			poll(NULL, 0, 1);
	}
	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
		while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
			poll(NULL, 0, 1);
		_CMM_STORE_SHARED(crdp->cbs.head, NULL);
		cbs_tail = (struct cds_wfq_node **)
			uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
		cbs_endprev = (struct cds_wfq_node **)
			uatomic_xchg(&default_call_rcu_data, cbs_tail);
		*cbs_endprev = cbs;
		uatomic_add(&default_call_rcu_data->qlen,
			    uatomic_read(&crdp->qlen));
		cds_list_del(&crdp->list);
		free(crdp);
	}
}

/*
 * Clean up all the per-CPU call_rcu threads.
 */
void free_all_cpu_call_rcu_data(void)
{
	int cpu;
	struct call_rcu_data *crdp;

	if (maxcpus <= 0)
		return;
	for (cpu = 0; cpu < maxcpus; cpu++) {
		crdp = get_cpu_call_rcu_data(cpu);
		if (crdp == NULL)
			continue;
		set_cpu_call_rcu_data(cpu, NULL);
		call_rcu_data_free(crdp);
	}
}

/*
 * Acquire the call_rcu_mutex in order to ensure that the child sees
 * all of the call_rcu() data structures in a consistent state.
 * Suitable for pthread_atfork() and friends.
 */
void call_rcu_before_fork(void)
{
	call_rcu_lock(&call_rcu_mutex);
}

/*
 * Clean up call_rcu data structures in the parent of a successful fork()
 * that is not followed by exec() in the child.  Suitable for
 * pthread_atfork() and friends.
 */
void call_rcu_after_fork_parent(void)
{
	call_rcu_unlock(&call_rcu_mutex);
}

/*
 * Clean up call_rcu data structures in the child of a successful fork()
 * that is not followed by exec().  Suitable for pthread_atfork() and
 * friends.
 */
void call_rcu_after_fork_child(void)
{
	struct call_rcu_data *crdp;

	/* Release the mutex. */
	call_rcu_unlock(&call_rcu_mutex);

	/*
	 * Allocate a new default call_rcu_data structure in order
	 * to get a working call_rcu thread to go with it.
	 */
	default_call_rcu_data = NULL;
	(void)get_default_call_rcu_data();

	/* Dispose of all of the rest of the call_rcu_data structures. */
	while (call_rcu_data_list.next != call_rcu_data_list.prev) {
		crdp = cds_list_entry(call_rcu_data_list.prev,
				      struct call_rcu_data, list);
		if (crdp == default_call_rcu_data)
			crdp = cds_list_entry(crdp->list.prev,
					      struct call_rcu_data, list);
		crdp->flags = URCU_CALL_RCU_STOPPED;
		call_rcu_data_free(crdp);
	}
}
Commit	Line	Data
b57aee66 PM	1	/*
	2	* urcu-call-rcu.c
	3	*
	4	* Userspace RCU library - batch memory reclamation with kernel API
	5	*
	6	* Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	7	*
	8	* This library is free software; you can redistribute it and/or
	9	* modify it under the terms of the GNU Lesser General Public
	10	* License as published by the Free Software Foundation; either
	11	* version 2.1 of the License, or (at your option) any later version.
	12	*
	13	* This library is distributed in the hope that it will be useful,
	14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	* Lesser General Public License for more details.
	17	*
	18	* You should have received a copy of the GNU Lesser General Public
	19	* License along with this library; if not, write to the Free Software
	20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	21	*/
	22
c1d2c60b	23	#define _GNU_SOURCE
b57aee66 PM	24	#include <stdio.h>
	25	#include <pthread.h>
	26	#include <signal.h>
	27	#include <assert.h>
	28	#include <stdlib.h>
	29	#include <string.h>
	30	#include <errno.h>
	31	#include <poll.h>
	32	#include <sys/time.h>
	33	#include <syscall.h>
	34	#include <unistd.h>
c1d2c60b	35	#include <sched.h>
b57aee66 PM	36
	37	#include "config.h"
	38	#include "urcu/wfqueue.h"
	39	#include "urcu-call-rcu.h"
	40	#include "urcu-pointer.h"
3c24913f	41	#include "urcu/list.h"
b57aee66 PM	42
	43	/* Data structure that identifies a call_rcu thread. */
	44
	45	struct call_rcu_data {
	46	struct cds_wfq_queue cbs;
	47	unsigned long flags;
	48	pthread_mutex_t mtx;
	49	pthread_cond_t cond;
	50	unsigned long qlen;
	51	pthread_t tid;
c1d2c60b	52	int cpu_affinity;
3c24913f	53	struct cds_list_head list;
b57aee66 PM	54	} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
b57aee66 PM	55
3c24913f PM	56	/*
	57	* List of all call_rcu_data structures to keep valgrind happy.
	58	* Protected by call_rcu_mutex.
	59	*/
	60
	61	CDS_LIST_HEAD(call_rcu_data_list);
	62
b57aee66 PM	63	/* Link a thread using call_rcu() to its call_rcu thread. */
	64
	65	static __thread struct call_rcu_data *thread_call_rcu_data;
	66
	67	/* Guard call_rcu thread creation. */
	68
	69	static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
	70
	71	/* If a given thread does not have its own call_rcu thread, this is default. */
	72
	73	static struct call_rcu_data *default_call_rcu_data;
	74
b57aee66 PM	75	/*
	76	* If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
	77	* available, then we can have call_rcu threads assigned to individual
	78	* CPUs rather than only to specific threads.
	79	*/
	80
	81	#if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)
	82
	83	/*
	84	* Pointer to array of pointers to per-CPU call_rcu_data structures
	85	* and # CPUs.
	86	*/
	87
	88	static struct call_rcu_data **per_cpu_call_rcu_data;
	89	static long maxcpus;
	90
	91	/* Allocate the array if it has not already been allocated. */
	92
	93	static void alloc_cpu_call_rcu_data(void)
	94	{
	95	struct call_rcu_data **p;
	96	static int warned = 0;
	97
	98	if (maxcpus != 0)
	99	return;
	100	maxcpus = sysconf(_SC_NPROCESSORS_CONF);
	101	if (maxcpus <= 0) {
	102	return;
	103	}
	104	p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
	105	if (p != NULL) {
	106	memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
	107	per_cpu_call_rcu_data = p;
	108	} else {
	109	if (!warned) {
	110	fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
	111	}
	112	warned = 1;
	113	}
	114	}
	115
	116	#else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
	117
	118	static const struct call_rcu_data **per_cpu_call_rcu_data = NULL;
	119	static const long maxcpus = -1;
	120
	121	static void alloc_cpu_call_rcu_data(void)
	122	{
	123	}
	124
	125	static int sched_getcpu(void)
	126	{
	127	return -1;
	128	}
	129
	130	#endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
	131
	132	/* Acquire the specified pthread mutex. */
	133
	134	static void call_rcu_lock(pthread_mutex_t *pmp)
	135	{
	136	if (pthread_mutex_lock(pmp) != 0) {
	137	perror("pthread_mutex_lock");
	138	exit(-1);
139	}
140	}
141
142	/* Release the specified pthread mutex. */
143
144	static void call_rcu_unlock(pthread_mutex_t *pmp)
145	{
146	if (pthread_mutex_unlock(pmp) != 0) {
147	perror("pthread_mutex_unlock");
148	exit(-1);
149	}
150	}
151
c1d2c60b MD	152	#if HAVE_SCHED_SETAFFINITY
	153	static
	154	int set_thread_cpu_affinity(struct call_rcu_data *crdp)
	155	{
	156	cpu_set_t mask;
	157
	158	if (crdp->cpu_affinity < 0)
	159	return 0;
	160
	161	CPU_ZERO(&mask);
	162	CPU_SET(crdp->cpu_affinity, &mask);
	163	#if SCHED_SETAFFINITY_ARGS == 2
	164	return sched_setaffinity(0, &mask);
	165	#else
	166	return sched_setaffinity(0, sizeof(mask), &mask);
	167	#endif
	168	}
	169	#else
	170	static
	171	int set_thread_cpu_affinity(struct call_rcu_data *crdp)
	172	{
	173	return 0;
	174	}
	175	#endif
	176
b57aee66 PM	177	/* This is the code run by each call_rcu thread. */
	178
	179	static void call_rcu_thread(void arg)
	180	{
	181	unsigned long cbcount;
	182	struct cds_wfq_node *cbs;
	183	struct cds_wfq_node **cbs_tail;
	184	struct call_rcu_data crdp = (struct call_rcu_data )arg;
	185	struct rcu_head *rhp;
	186
c1d2c60b MD	187	if (set_thread_cpu_affinity(crdp) != 0) {
	188	perror("pthread_setaffinity_np");
	189	exit(-1);
	190	}
	191
b57aee66 PM	192	thread_call_rcu_data = crdp;
	193	for (;;) {
	194	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
	195	while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
	196	poll(NULL, 0, 1);
	197	_CMM_STORE_SHARED(crdp->cbs.head, NULL);
	198	cbs_tail = (struct cds_wfq_node **)
	199	uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
	200	synchronize_rcu();
	201	cbcount = 0;
	202	do {
	203	while (cbs->next == NULL &&
	204	&cbs->next != cbs_tail)
	205	poll(NULL, 0, 1);
	206	if (cbs == &crdp->cbs.dummy) {
	207	cbs = cbs->next;
	208	continue;
	209	}
	210	rhp = (struct rcu_head *)cbs;
	211	cbs = cbs->next;
	212	rhp->func(rhp);
	213	cbcount++;
	214	} while (cbs != NULL);
	215	uatomic_sub(&crdp->qlen, cbcount);
	216	}
7106ddf8 PM	217	if (crdp->flags & URCU_CALL_RCU_STOP)
7106ddf8 PM	218	break;
b57aee66 PM	219	if (crdp->flags & URCU_CALL_RCU_RT)
	220	poll(NULL, 0, 10);
	221	else {
	222	call_rcu_lock(&crdp->mtx);
	223	_CMM_STORE_SHARED(crdp->flags,
	224	crdp->flags & ~URCU_CALL_RCU_RUNNING);
	225	if (&crdp->cbs.head ==
	226	_CMM_LOAD_SHARED(crdp->cbs.tail) &&
	227	pthread_cond_wait(&crdp->cond, &crdp->mtx) != 0) {
	228	perror("pthread_cond_wait");
	229	exit(-1);
	230	}
	231	_CMM_STORE_SHARED(crdp->flags,
	232	crdp->flags \| URCU_CALL_RCU_RUNNING);
	233	poll(NULL, 0, 10);
	234	call_rcu_unlock(&crdp->mtx);
	235	}
	236	}
7106ddf8 PM	237	call_rcu_lock(&crdp->mtx);
	238	crdp->flags \|= URCU_CALL_RCU_STOPPED;
	239	call_rcu_unlock(&crdp->mtx);
	240	return NULL;
b57aee66 PM	241	}
	242
	243	/*
	244	* Create both a call_rcu thread and the corresponding call_rcu_data
3c24913f PM	245	* structure, linking the structure in as specified. Caller must hold
3c24913f PM	246	* call_rcu_mutex.
b57aee66 PM	247	*/
b57aee66 PM	248
3c24913f	249	static void call_rcu_data_init(struct call_rcu_data **crdpp,
c1d2c60b MD	250	unsigned long flags,
c1d2c60b MD	251	int cpu_affinity)
b57aee66 PM	252	{
	253	struct call_rcu_data *crdp;
	254
	255	crdp = malloc(sizeof(*crdp));
	256	if (crdp == NULL) {
	257	fprintf(stderr, "Out of memory.\n");
	258	exit(-1);
	259	}
	260	memset(crdp, '\0', sizeof(*crdp));
	261	cds_wfq_init(&crdp->cbs);
	262	crdp->qlen = 0;
	263	if (pthread_mutex_init(&crdp->mtx, NULL) != 0) {
	264	perror("pthread_mutex_init");
	265	exit(-1);
	266	}
	267	if (pthread_cond_init(&crdp->cond, NULL) != 0) {
	268	perror("pthread_cond_init");
	269	exit(-1);
	270	}
	271	crdp->flags = flags \| URCU_CALL_RCU_RUNNING;
3c24913f	272	cds_list_add(&crdp->list, &call_rcu_data_list);
c1d2c60b	273	crdp->cpu_affinity = cpu_affinity;
b57aee66 PM	274	cmm_smp_mb(); /* Structure initialized before pointer is planted. */
	275	*crdpp = crdp;
	276	if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
	277	perror("pthread_create");
	278	exit(-1);
	279	}
	280	}
	281
	282	/*
	283	* Return a pointer to the call_rcu_data structure for the specified
	284	* CPU, returning NULL if there is none. We cannot automatically
	285	* created it because the platform we are running on might not define
	286	* sched_getcpu().
	287	*/
	288
	289	struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
	290	{
	291	static int warned = 0;
	292
	293	if (per_cpu_call_rcu_data == NULL)
	294	return NULL;
	295	if (!warned && maxcpus > 0 && (cpu < 0 \|\| maxcpus <= cpu)) {
	296	fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
	297	warned = 1;
	298	}
	299	if (cpu < 0 \|\| maxcpus <= cpu)
	300	return NULL;
	301	return per_cpu_call_rcu_data[cpu];
	302	}
	303
	304	/*
	305	* Return the tid corresponding to the call_rcu thread whose
	306	* call_rcu_data structure is specified.
	307	*/
	308
	309	pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
	310	{
	311	return crdp->tid;
	312	}
	313
	314	/*
	315	* Create a call_rcu_data structure (with thread) and return a pointer.
	316	*/
	317
c1d2c60b MD	318	static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
c1d2c60b MD	319	int cpu_affinity)
b57aee66 PM	320	{
	321	struct call_rcu_data *crdp;
	322
c1d2c60b	323	call_rcu_data_init(&crdp, flags, cpu_affinity);
b57aee66 PM	324	return crdp;
	325	}
	326
c1d2c60b MD	327	struct call_rcu_data *create_call_rcu_data(unsigned long flags,
c1d2c60b MD	328	int cpu_affinity)
3c24913f PM	329	{
	330	struct call_rcu_data *crdp;
	331
	332	call_rcu_lock(&call_rcu_mutex);
c1d2c60b	333	crdp = __create_call_rcu_data(flags, cpu_affinity);
3c24913f PM	334	call_rcu_unlock(&call_rcu_mutex);
	335	return crdp;
	336	}
	337
b57aee66 PM	338	/*
b57aee66 PM	339	* Set the specified CPU to use the specified call_rcu_data structure.
7106ddf8 PM	340	*
	341	* Use NULL to remove a CPU's call_rcu_data structure, but it is
	342	* the caller's responsibility to dispose of the removed structure.
	343	* Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
	344	* (prior to NULLing it out, of course).
b57aee66 PM	345	*/
	346
	347	int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
	348	{
	349	int warned = 0;
	350
	351	call_rcu_lock(&call_rcu_mutex);
	352	if (cpu < 0 \|\| maxcpus <= cpu) {
	353	if (!warned) {
	354	fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
	355	warned = 1;
	356	}
	357	call_rcu_unlock(&call_rcu_mutex);
	358	errno = EINVAL;
	359	return -EINVAL;
	360	}
	361	alloc_cpu_call_rcu_data();
	362	call_rcu_unlock(&call_rcu_mutex);
	363	if (per_cpu_call_rcu_data == NULL) {
	364	errno = ENOMEM;
	365	return -ENOMEM;
	366	}
	367	per_cpu_call_rcu_data[cpu] = crdp;
	368	return 0;
	369	}
	370
	371	/*
	372	* Return a pointer to the default call_rcu_data structure, creating
	373	* one if need be. Because we never free call_rcu_data structures,
	374	* we don't need to be in an RCU read-side critical section.
	375	*/
	376
	377	struct call_rcu_data *get_default_call_rcu_data(void)
	378	{
	379	if (default_call_rcu_data != NULL)
	380	return rcu_dereference(default_call_rcu_data);
	381	call_rcu_lock(&call_rcu_mutex);
	382	if (default_call_rcu_data != NULL) {
	383	call_rcu_unlock(&call_rcu_mutex);
	384	return default_call_rcu_data;
	385	}
c1d2c60b	386	call_rcu_data_init(&default_call_rcu_data, 0, -1);
b57aee66 PM	387	call_rcu_unlock(&call_rcu_mutex);
	388	return default_call_rcu_data;
	389	}
	390
	391	/*
	392	* Return the call_rcu_data structure that applies to the currently
	393	* running thread. Any call_rcu_data structure assigned specifically
	394	* to this thread has first priority, followed by any call_rcu_data
	395	* structure assigned to the CPU on which the thread is running,
	396	* followed by the default call_rcu_data structure. If there is not
	397	* yet a default call_rcu_data structure, one will be created.
	398	*/
	399	struct call_rcu_data *get_call_rcu_data(void)
	400	{
	401	int curcpu;
	402	static int warned = 0;
	403
	404	if (thread_call_rcu_data != NULL)
	405	return thread_call_rcu_data;
	406	if (maxcpus <= 0)
	407	return get_default_call_rcu_data();
	408	curcpu = sched_getcpu();
	409	if (!warned && (curcpu < 0 \|\| maxcpus <= curcpu)) {
	410	fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
	411	warned = 1;
	412	}
	413	if (curcpu >= 0 && maxcpus > curcpu &&
	414	per_cpu_call_rcu_data != NULL &&
	415	per_cpu_call_rcu_data[curcpu] != NULL)
	416	return per_cpu_call_rcu_data[curcpu];
	417	return get_default_call_rcu_data();
	418	}
	419
	420	/*
	421	* Return a pointer to this task's call_rcu_data if there is one.
	422	*/
	423
	424	struct call_rcu_data *get_thread_call_rcu_data(void)
	425	{
	426	return thread_call_rcu_data;
	427	}
	428
	429	/*
	430	* Set this task's call_rcu_data structure as specified, regardless
	431	* of whether or not this task already had one. (This allows switching
	432	* to and from real-time call_rcu threads, for example.)
7106ddf8 PM	433	*
	434	* Use NULL to remove a thread's call_rcu_data structure, but it is
	435	* the caller's responsibility to dispose of the removed structure.
	436	* Use get_thread_call_rcu_data() to obtain a pointer to the old structure
	437	* (prior to NULLing it out, of course).
b57aee66 PM	438	*/
	439
	440	void set_thread_call_rcu_data(struct call_rcu_data *crdp)
	441	{
	442	thread_call_rcu_data = crdp;
	443	}
	444
	445	/*
	446	* Create a separate call_rcu thread for each CPU. This does not
	447	* replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
	448	* function if you want that behavior.
	449	*/
	450
	451	int create_all_cpu_call_rcu_data(unsigned long flags)
	452	{
	453	int i;
	454	struct call_rcu_data *crdp;
	455	int ret;
	456
	457	call_rcu_lock(&call_rcu_mutex);
	458	alloc_cpu_call_rcu_data();
	459	call_rcu_unlock(&call_rcu_mutex);
	460	if (maxcpus <= 0) {
	461	errno = EINVAL;
	462	return -EINVAL;
	463	}
	464	if (per_cpu_call_rcu_data == NULL) {
	465	errno = ENOMEM;
	466	return -ENOMEM;
	467	}
	468	for (i = 0; i < maxcpus; i++) {
	469	call_rcu_lock(&call_rcu_mutex);
	470	if (get_cpu_call_rcu_data(i)) {
	471	call_rcu_unlock(&call_rcu_mutex);
	472	continue;
	473	}
c1d2c60b	474	crdp = __create_call_rcu_data(flags, i);
b57aee66 PM	475	if (crdp == NULL) {
	476	call_rcu_unlock(&call_rcu_mutex);
	477	errno = ENOMEM;
	478	return -ENOMEM;
	479	}
	480	call_rcu_unlock(&call_rcu_mutex);
	481	if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
	482	/* FIXME: Leaks crdp for now. */
	483	return ret; /* Can happen on race. */
	484	}
	485	}
	486	return 0;
	487	}
	488
7106ddf8 PM	489	/*
	490	* Wake up the call_rcu thread corresponding to the specified
	491	* call_rcu_data structure.
	492	*/
	493	static void wake_call_rcu_thread(struct call_rcu_data *crdp)
	494	{
	495	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT)) {
	496	call_rcu_lock(&crdp->mtx);
	497	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RUNNING)) {
	498	if (pthread_cond_signal(&crdp->cond) != 0) {
	499	perror("pthread_cond_signal");
	500	exit(-1);
	501	}
	502	}
	503	call_rcu_unlock(&crdp->mtx);
	504	}
	505	}
	506
b57aee66 PM	507	/*
	508	* Schedule a function to be invoked after a following grace period.
	509	* This is the only function that must be called -- the others are
	510	* only present to allow applications to tune their use of RCU for
	511	* maximum performance.
	512	*
	513	* Note that unless a call_rcu thread has not already been created,
	514	* the first invocation of call_rcu() will create one. So, if you
	515	* need the first invocation of call_rcu() to be fast, make sure
	516	* to create a call_rcu thread first. One way to accomplish this is
	517	* "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
	518	*/
	519
	520	void call_rcu(struct rcu_head *head,
	521	void (func)(struct rcu_head head))
	522	{
	523	struct call_rcu_data *crdp;
	524
	525	cds_wfq_node_init(&head->next);
	526	head->func = func;
	527	crdp = get_call_rcu_data();
	528	cds_wfq_enqueue(&crdp->cbs, &head->next);
	529	uatomic_inc(&crdp->qlen);
7106ddf8 PM	530	wake_call_rcu_thread(crdp);
	531	}
	532
	533	/*
	534	* Free up the specified call_rcu_data structure, terminating the
	535	* associated call_rcu thread. The caller must have previously
	536	* removed the call_rcu_data structure from per-thread or per-CPU
	537	* usage. For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
	538	* call_rcu_data structures or set_thread_call_rcu_data(NULL) for
	539	* per-thread call_rcu_data structures.
	540	*
	541	* We silently refuse to free up the default call_rcu_data structure
	542	* because that is where we put any leftover callbacks. Note that
	543	* the possibility of self-spawning callbacks makes it impossible
	544	* to execute all the callbacks in finite time without putting any
	545	* newly spawned callbacks somewhere else. The "somewhere else" of
	546	* last resort is the default call_rcu_data structure.
	547	*
	548	* We also silently refuse to free NULL pointers. This simplifies
	549	* the calling code.
	550	*/
	551	void call_rcu_data_free(struct call_rcu_data *crdp)
	552	{
	553	struct cds_wfq_node *cbs;
	554	struct cds_wfq_node **cbs_tail;
	555	struct cds_wfq_node **cbs_endprev;
	556
	557	if (crdp == NULL \|\| crdp == default_call_rcu_data) {
	558	return;
	559	}
	560	if ((crdp->flags & URCU_CALL_RCU_STOPPED) == 0) {
b57aee66	561	call_rcu_lock(&crdp->mtx);
7106ddf8	562	crdp->flags \|= URCU_CALL_RCU_STOP;
b57aee66	563	call_rcu_unlock(&crdp->mtx);
7106ddf8 PM	564	wake_call_rcu_thread(crdp);
	565	while ((crdp->flags & URCU_CALL_RCU_STOPPED) == 0)
	566	poll(NULL, 0, 1);
	567	}
	568	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
	569	while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
	570	poll(NULL, 0, 1);
	571	_CMM_STORE_SHARED(crdp->cbs.head, NULL);
	572	cbs_tail = (struct cds_wfq_node **)
	573	uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
	574	cbs_endprev = (struct cds_wfq_node **)
	575	uatomic_xchg(&default_call_rcu_data, cbs_tail);
	576	*cbs_endprev = cbs;
	577	uatomic_add(&default_call_rcu_data->qlen,
	578	uatomic_read(&crdp->qlen));
	579	cds_list_del(&crdp->list);
	580	free(crdp);
	581	}
	582	}
	583
	584	/*
	585	* Clean up all the per-CPU call_rcu threads.
	586	*/
	587	void free_all_cpu_call_rcu_data(void)
	588	{
	589	int cpu;
	590	struct call_rcu_data *crdp;
	591
	592	if (maxcpus <= 0)
	593	return;
	594	for (cpu = 0; cpu < maxcpus; cpu++) {
	595	crdp = get_cpu_call_rcu_data(cpu);
	596	if (crdp == NULL)
	597	continue;
	598	set_cpu_call_rcu_data(cpu, NULL);
	599	call_rcu_data_free(crdp);
	600	}
	601	}
	602
81ad2e19 PM	603	/*
	604	* Acquire the call_rcu_mutex in order to ensure that the child sees
	605	* all of the call_rcu() data structures in a consistent state.
	606	* Suitable for pthread_atfork() and friends.
	607	*/
	608	void call_rcu_before_fork(void)
	609	{
	610	call_rcu_lock(&call_rcu_mutex);
	611	}
	612
	613	/*
	614	* Clean up call_rcu data structures in the parent of a successful fork()
	615	* that is not followed by exec() in the child. Suitable for
	616	* pthread_atfork() and friends.
	617	*/
	618	void call_rcu_after_fork_parent(void)
	619	{
	620	call_rcu_unlock(&call_rcu_mutex);
	621	}
	622
7106ddf8 PM	623	/*
7106ddf8 PM	624	* Clean up call_rcu data structures in the child of a successful fork()
81ad2e19 PM	625	* that is not followed by exec(). Suitable for pthread_atfork() and
81ad2e19 PM	626	* friends.
7106ddf8 PM	627	*/
	628	void call_rcu_after_fork_child(void)
	629	{
	630	struct call_rcu_data *crdp;
	631
81ad2e19 PM	632	/* Release the mutex. */
	633	call_rcu_unlock(&call_rcu_mutex);
	634
7106ddf8 PM	635	/*
	636	* Allocate a new default call_rcu_data structure in order
	637	* to get a working call_rcu thread to go with it.
	638	*/
	639	default_call_rcu_data = NULL;
	640	(void)get_default_call_rcu_data();
	641
	642	/* Dispose of all of the rest of the call_rcu_data structures. */
	643	while (call_rcu_data_list.next != call_rcu_data_list.prev) {
	644	crdp = cds_list_entry(call_rcu_data_list.prev,
	645	struct call_rcu_data, list);
	646	if (crdp == default_call_rcu_data)
	647	crdp = cds_list_entry(crdp->list.prev,
	648	struct call_rcu_data, list);
	649	crdp->flags = URCU_CALL_RCU_STOPPED;
	650	call_rcu_data_free(crdp);
b57aee66 PM	651	}
b57aee66 PM	652	}